unicode-org · srl295 · Apr 24, 2025 · Apr 23, 2025 · Apr 23, 2025 · Apr 23, 2025
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
@@ -45,6 +45,21 @@ jobs:
             ${{ runner.os }}-mavenspotless-
       - name: Check Java style
         run: mvn --file=tools/pom.xml spotless:check || (echo "Style checker failed. Formatting changes can be applied by 'mvn spotless:apply'" && exit 1)
+      - name: Restore lychee cache
+        uses: actions/cache@v3
+        with:
+          path: .lycheecache
+          key: cache-lychee-${{ github.sha }}
+          restore-keys: cache-lychee-
+      - name: Run lychee on PathDescriptions.md
+        uses: lycheeverse/lychee-action@v1
+        with:
+            args: "-n --cache --max-cache-age 10d tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/PathDescriptions.md"
+            fail: true
+            format: markdown
+            output: linkcheck.md
+      - name: Link Checker Summary PathDescriptions.md
+        run: cat linkcheck.md >> $GITHUB_STEP_SUMMARY
   build:
     runs-on: ubuntu-latest
     steps:

diff --git a/docs/site/development/updating-dtds.md b/docs/site/development/updating-dtds.md
@@ -76,7 +76,7 @@ Add the annotations.
 2. **Survey Tool Data.** Add information so that the Survey Tool can display these properly to translators
     1. PathHeader.txt (tools/java/org/unicode/cldr/util/data/) - provides the information for what section of the Survey Tool this item shows up in, and how it sorts.
         1. Edit as described in [PathHeader](/development/updating-dtds).
-    2.  PathDescription.txt (tools/java/org/unicode/cldr/util/data/) - provides a description of what the field is, for translators.
+    2.  PathDescriptions.md (tools/java/org/unicode/cldr/util/data/) - provides a description of what the field is, for translators.
         1. If it needs more explanation, add a section (or perhaps a whole page) to the translation guide, eg http://cldr.org/translation/plurals.
         2. For an example, see [8479](/index/bug-reports#TOC-Filing-a-Ticket)
     3. Placeholders.txt - provides information about the placeholders, if there can be any.
@@ -257,9 +257,9 @@ If a value has placeholders, edit Placeholders.txt:
 5. Check that the ConsoleCheckCLDR **CheckForExamplars** fails if there are no placeholders in the value
 6. Note: we should switch methods so that we don't need to quote \\\[, etc, but we haven't yet.
 
-## PathDescription
+## PathDescriptions.md
 
-This file provides a description of each kind of path, and a link to a section of https://cldr.unicode.org/translation. Easiest is to take an existing description and modify.
+This file provides a description of each kind of path, and a link to a section of https://cldr.unicode.org/translation. Docs are in the top of the file. Easiest is to take an existing description and modify.
 
 ## Coverage
 

diff --git a/tools/cldr-apps/js/src/esm/cldrDeferHelp.mjs b/tools/cldr-apps/js/src/esm/cldrDeferHelp.mjs
@@ -2,6 +2,7 @@
  * cldrDeferHelp: encapsulate code related to showing language descriptions in the Info Panel
  */
 import { marked } from "./cldrMarked.mjs";
+import * as cldrDom from "./cldrDom.mjs";
 const defaultEndpoint = "https://dbpedia.org/sparql/";
 const format = "JSON";
 const abstractLang = "en";
@@ -45,7 +46,7 @@ function addDeferredHelpTo(fragment, helpHtml, resource, translationHint) {
     const absDiv = subloadAbstract(resource);
     theHelp.append(absDiv);
   }
-
+  cldrDom.setDocTargets(theHelp[0]); // apply to DOM object, not jQuery object
   $(fragment).append(theHelp);
 }
 

diff --git a/tools/cldr-apps/js/src/esm/cldrDom.mjs b/tools/cldr-apps/js/src/esm/cldrDom.mjs
@@ -237,6 +237,19 @@ function parentOfType(tag, obj) {
   return parentOfType(tag, obj.parentElement);
 }
 
+/** CLDR conventional target= for documentation */
+const TARGET_DOCS = "CLDR-ST-DOCS";
+
+/** set target=TARGET_DOCS on all <a< nodes */
+function setDocTargets(n) {
+  if (n.tagName == "A") {
+    n.setAttribute("target", TARGET_DOCS);
+  }
+  for (const c of n?.children) {
+    setDocTargets(c);
+  }
+}
+
 export {
   addClass,
   appendIcon,
@@ -249,5 +262,7 @@ export {
   removeAllChildNodes,
   removeClass,
   setDisplayed,
+  setDocTargets,
+  TARGET_DOCS,
   updateIf,
 };
diff --git a/tools/cldr-apps/src/test/java/org/unicode/cldr/util/TestCLDRLinks.java b/tools/cldr-apps/src/test/java/org/unicode/cldr/util/TestCLDRLinks.java
@@ -1,6 +1,7 @@
 package org.unicode.cldr.util;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assumptions.assumeTrue;
 
@@ -65,6 +66,11 @@ public void TestPathDescriptionLinks(final String url, final String xpath)
         assertURLOK(url, xpath);
     }
 
+    @Test
+    void testPathDescriptionProvider() {
+        assertNotEquals(0, pathDescriptionProvider().count());
+    }
+
     /**
      * Provider for above test
      *
@@ -83,13 +89,23 @@ public static Stream<Arguments> pathDescriptionProvider() {
 
         final Map<String, String> urls = new TreeMap<String, String>();
 
+        // this gets any URLs that are not in the footer
         for (final String xpath : english.fullIterable()) {
             final String description = pathDescriptionFactory.getRawDescription(xpath, null);
             // System.out.println(description);
             for (final String url : urlsFromString(description)) {
                 urls.putIfAbsent(url, xpath);
             }
         }
+
+        // this gets all URLs in references
+        PathDescriptionParser pathDescriptionParser = new PathDescriptionParser();
+        pathDescriptionParser.parse(PathDescription.getPathDescriptionString());
+        for (final String url : urlsFromString(pathDescriptionParser.getReferences())) {
+            urls.putIfAbsent(url, "PathDescriptions.md");
+        }
+
+        assertNotEquals(0, urls.size(), "PathDescription had no urls");
         return urls.entrySet().stream().map(e -> Arguments.of(e.getKey(), e.getValue()));
     }
 

diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/test/ExampleGenerator.java b/tools/cldr-code/src/main/java/org/unicode/cldr/test/ExampleGenerator.java
@@ -3823,22 +3823,7 @@ public synchronized String getHelpHtml(String xpath, String value) {
             return null;
         }
         int start = 0;
-        StringBuilder buffer = new StringBuilder();
-
-        Matcher URLMatcher = URL_PATTERN.matcher("");
-        while (URLMatcher.reset(description).find(start)) {
-            final String url = URLMatcher.group();
-            buffer.append(
-                            TransliteratorUtilities.toHTML.transliterate(
-                                    description.substring(start, URLMatcher.start())))
-                    .append("<a target='CLDR-ST-DOCS' href='")
-                    .append(url)
-                    .append("'>")
-                    .append(url)
-                    .append("</a>");
-            start = URLMatcher.end();
-        }
-        buffer.append(TransliteratorUtilities.toHTML.transliterate(description.substring(start)));
+        StringBuilder buffer = new StringBuilder(description);
         if (AnnotationUtil.pathIsAnnotation(xpath)) {
             XPathParts emoji = XPathParts.getFrozenInstance(xpath);
             String cp = emoji.getAttributeValue(-1, "cp");

diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/PathDescription.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/PathDescription.java
@@ -19,7 +19,7 @@
 public class PathDescription {
     /** Remember to quote any [ character! */
     private static final String pathDescriptionString =
-            CldrUtility.getUTF8Data("PathDescriptions.txt")
+            CldrUtility.getUTF8Data("PathDescriptions.md")
                     .lines()
                     .collect(Collectors.joining("\n"));
 
@@ -44,11 +44,24 @@ public enum ErrorHandling {
     private static final Map<String, String> ZONE2COUNTRY =
             STANDARD_CODES.zoneParser.getZoneToCountry();
 
-    static RegexLookup<String> parseLookupString() {
-        return new RegexLookup<String>().loadFromString(pathDescriptionString);
+    /** <Description, Markdown> */
+    private static final PathDescriptionParser parser = new PathDescriptionParser();
+
+    private static final RegexLookup<Pair<String, String>> pathHandling =
+            parser.parse(pathDescriptionString);
+
+    /** markdown to append */
+    private static final String references = parser.getReferences();
+
+    /** for tests, returns the big string */
+    static final String getPathDescriptionString() {
+        return pathDescriptionString;
     }
 
-    private static final RegexLookup<String> pathHandling = parseLookupString();
+    /** for tests */
+    static final RegexLookup<Pair<String, String>> getPathHandling() {
+        return pathHandling;
+    }
 
     // set in construction
 
@@ -93,22 +106,41 @@ public enum Status {
 
     public String getRawDescription(String path, Object context) {
         status.clear();
-        return pathHandling.get(path, context, pathArguments);
+        final Pair<String, String> entry = pathHandling.get(path, context, pathArguments);
+        if (entry == null) {
+            return null;
+        }
+        return entry.getSecond();
     }
 
     public String getRawDescription(
             String path, Object context, Output<Finder> matcherFound, Set<String> failures) {
         status.clear();
-        return pathHandling.get(path, context, pathArguments, matcherFound, failures);
+        final Pair<String, String> entry =
+                pathHandling.get(path, context, pathArguments, matcherFound, failures);
+        if (entry == null) {
+            return null;
+        }
+        return entry.getSecond();
     }
 
     public String getDescription(String path, String value, Object context) {
         status.clear();
 
-        String description = pathHandling.get(path, context, pathArguments);
-        if (description == null) {
+        final Pair<String, String> entry = pathHandling.get(path, context, pathArguments);
+        String description;
+        String markdown;
+        if (entry == null) {
+            markdown = MISSING_DESCRIPTION;
+            description = null;
+        } else {
+            description = entry.getFirst();
+            markdown = entry.getSecond();
+        }
+
+        if (description == null || description.isEmpty()) {
             description = MISSING_DESCRIPTION;
-        } else if ("SKIP".equals(description)) {
+        } else if (description.startsWith("SKIP")) {
             status.add(Status.SKIP);
             if (errorHandling == ErrorHandling.SKIP) {
                 return null;
@@ -148,9 +180,7 @@ public String getDescription(String path, String value, Object context) {
 
         // In special cases, only use if there is a root value (languageNames, ...
         if (description.startsWith("ROOT")) {
-            int typeEnd = description.indexOf(';');
-            String type = description.substring(4, typeEnd).trim();
-            description = description.substring(typeEnd + 1).trim();
+            String type = description.substring(4).trim();
 
             boolean isMetazone = type.equals("metazone");
             String code = attributes.get(0);
@@ -198,23 +228,23 @@ public String getDescription(String path, String value, Object context) {
                     logger.warning("Missing country for timezone " + code);
                 }
             }
-            description =
-                    MessageFormat.format(MessageFormat.autoQuoteApostrophe(description), code);
+            markdown = MessageFormat.format(MessageFormat.autoQuoteApostrophe(markdown), code);
         } else if (path.contains("exemplarCity")) {
             String regionCode = ZONE2COUNTRY.get(attributes.get(0));
             String englishRegionName =
                     english.nameGetter().getNameFromTypeEnumCode(NameType.TERRITORY, regionCode);
-            description =
+            markdown =
                     MessageFormat.format(
-                            MessageFormat.autoQuoteApostrophe(description), englishRegionName);
-        } else if (!MISSING_DESCRIPTION.equals(description)) {
-            description =
+                            MessageFormat.autoQuoteApostrophe(markdown), englishRegionName);
+        } else if (entry != null) {
+            markdown =
                     MessageFormat.format(
-                            MessageFormat.autoQuoteApostrophe(description),
+                            MessageFormat.autoQuoteApostrophe(markdown),
                             (Object[]) pathArguments.value);
         }
 
-        return description;
+        // we always append the "References" blob
+        return markdown + "\n" + references;
     }
 
     private static boolean isRootCode(