[ES|QL] Automate the retrieval of grouping functions (elastic#210513)

## Summary Closes elastic#210313 Automates the retrieval of grouping functions ( categorize and bucket ) for both the functions definitions and docs. Buckets signatures are tricky so I overwrite them with our implementation. Everything else is being retrieved by ES
xcrzx · Feb 12, 2025 · 7683f01 · 7683f01
1 parent bcfdd13
commit 7683f01
Show file tree

Hide file tree

Showing 17 changed files with 1,077 additions and 351 deletions.
diff --git a/src/platform/packages/private/kbn-language-documentation/scripts/generate_esql_docs.ts b/src/platform/packages/private/kbn-language-documentation/scripts/generate_esql_docs.ts
@@ -20,7 +20,8 @@ interface DocsSectionContent {
 
 (function () {
   const pathToElasticsearch = process.argv[2];
-  const { scalarFunctions, aggregationFunctions } = loadFunctionDocs(pathToElasticsearch);
+  const { scalarFunctions, aggregationFunctions, groupingFunctions } =
+    loadFunctionDocs(pathToElasticsearch);
   writeFunctionDocs(
     scalarFunctions,
     path.join(__dirname, '../src/sections/generated/scalar_functions.tsx')
@@ -29,6 +30,10 @@ interface DocsSectionContent {
     aggregationFunctions,
     path.join(__dirname, '../src/sections/generated/aggregation_functions.tsx')
   );
+  writeFunctionDocs(
+    groupingFunctions,
+    path.join(__dirname, '../src/sections/generated/grouping_functions.tsx')
+  );
 })();
 
 function loadFunctionDocs(pathToElasticsearch: string) {
@@ -48,6 +53,7 @@ function loadFunctionDocs(pathToElasticsearch: string) {
 
   const scalarFunctions = new Map<string, DocsSectionContent>();
   const aggregationFunctions = new Map<string, DocsSectionContent>();
+  const groupingFunctions = new Map<string, DocsSectionContent>();
 
   // Iterate over each file in the directory
   for (const file of docsFiles) {
@@ -80,10 +86,16 @@ function loadFunctionDocs(pathToElasticsearch: string) {
           preview: functionDefinition.preview,
         });
       }
+      if (functionDefinition.type === 'grouping') {
+        groupingFunctions.set(functionName, {
+          description: content,
+          preview: functionDefinition.preview,
+        });
+      }
     }
   }
 
-  return { scalarFunctions, aggregationFunctions };
+  return { scalarFunctions, aggregationFunctions, groupingFunctions };
 }
 
 function writeFunctionDocs(functionDocs: Map<string, DocsSectionContent>, pathToDocsFile: string) {

diff --git a/.../packages/private/kbn-language-documentation/src/sections/esql_documentation_sections.tsx b/.../packages/private/kbn-language-documentation/src/sections/esql_documentation_sections.tsx
@@ -682,139 +682,6 @@ Refer to **Operators** for an overview of the supported operators.
   ],
 };
 
-export const groupingFunctions = {
-  label: i18n.translate('languageDocumentation.documentationESQL.groupingFunctions', {
-    defaultMessage: 'Grouping functions',
-  }),
-  description: i18n.translate(
-    'languageDocumentation.documentationESQL.groupingFunctionsDocumentationESQLDescription',
-    {
-      defaultMessage: `These grouping functions can be used with \`STATS...BY\`:`,
-    }
-  ),
-  items: [
-    {
-      label: i18n.translate('languageDocumentation.documentationESQL.autoBucketFunction', {
-        defaultMessage: 'BUCKET',
-      }),
-      description: (
-        <Markdown
-          markdownContent={i18n.translate(
-            'languageDocumentation.documentationESQL.autoBucketFunction.markdown',
-            {
-              defaultMessage: `### BUCKET
-Creates groups of values - buckets - out of a datetime or numeric input. The size of the buckets can either be provided directly, or chosen based on a recommended count and values range.
-
-\`BUCKET\` works in two modes:
-
-1. Where the size of the bucket is computed based on a buckets count recommendation (four parameters) and a range.
-2. Where the bucket size is provided directly (two parameters).
-
-Using a target number of buckets, a start of a range, and an end of a range, \`BUCKET\` picks an appropriate bucket size to generate the target number of buckets or fewer.
-
-For example, requesting up to 20 buckets for a year will organize data into monthly intervals:
-
-\`\`\`
-FROM employees
-| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
-| STATS hire_date = MV_SORT(VALUES(hire_date)) BY month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
-| SORT hire_date
-\`\`\`
-
-**NOTE**: The goal isn’t to provide the exact target number of buckets, it’s to pick a range that provides _at most_ the target number of buckets.
-
-You can combine \`BUCKET\` with an aggregation to create a histogram:
-
-\`\`\`
-FROM employees
-| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
-| STATS hires_per_month = COUNT(*) BY month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
-| SORT month
-\`\`\`
-
-**NOTE**: \`BUCKET\` does not create buckets that match zero documents. That’s why the previous example is missing \`1985-03-01\` and other dates.
-
-Asking for more buckets can result in a smaller range. For example, requesting at most 100 buckets in a year results in weekly buckets:
-
-\`\`\`
-FROM employees
-| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
-| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 100, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
-| SORT week
-\`\`\`
-
-**NOTE**: \`BUCKET\` does not filter any rows. It only uses the provided range to pick a good bucket size. For rows with a value outside of the range, it returns a bucket value that corresponds to a bucket outside the range. Combine \`BUCKET\` with \`WHERE\` to filter rows.
-
-If the desired bucket size is known in advance, simply provide it as the second argument, leaving the range out:
-
-\`\`\`
-FROM employees
-| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
-| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 1 week)
-| SORT week
-\`\`\`
-
-**NOTE**: When providing the bucket size as the second parameter, it must be a time duration or date period.
-
-\`BUCKET\` can also operate on numeric fields. For example, to create a salary histogram:
-
-\`\`\`
-FROM employees
-| STATS COUNT(*) by bs = BUCKET(salary, 20, 25324, 74999)
-| SORT bs
-\`\`\`
-
-Unlike the earlier example that intentionally filters on a date range, you rarely want to filter on a numeric range. You have to find the min and max separately. ES|QL doesn’t yet have an easy way to do that automatically.
-
-The range can be omitted if the desired bucket size is known in advance. Simply provide it as the second argument:
-
-\`\`\`
-FROM employees
-| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
-| STATS c = COUNT(1) BY b = BUCKET(salary, 5000.)
-| SORT b
-\`\`\`
-
-**NOTE**: When providing the bucket size as the second parameter, it must be of a **floating point type**.
-
-Here's an example to create hourly buckets for the last 24 hours, and calculate the number of events per hour:
-
-\`\`\`
-FROM sample_data
-| WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW()
-| STATS COUNT(*) BY bucket = BUCKET(@timestamp, 25, NOW() - 1 day, NOW())
-\`\`\`
-
-Here's an example  to create monthly buckets for the year 1985, and calculate the average salary by hiring month:
-
-\`\`\`
-FROM employees
-| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
-| STATS AVG(salary) BY bucket = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
-| SORT bucket
-\`\`\`
-
-\`BUCKET\` may be used in both the aggregating and grouping part of the \`STATS … BY …\` command, provided that in the aggregating part the function is **referenced by an alias defined in the grouping part**, or that it is invoked with the exact same expression.
-
-For example:
-
-\`\`\`
-FROM employees
-| STATS s1 = b1 + 1, s2 = BUCKET(salary / 1000 + 999, 50.) + 2 BY b1 = BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.)
-| SORT b1, b2
-| KEEP s1, b1, s2, b2
-\`\`\`
-              `,
-              description:
-                'Text is in markdown. Do not translate function names, special characters, or field names like sum(bytes)',
-            }
-          )}
-        />
-      ),
-    },
-  ],
-};
-
 export const operators = {
   label: i18n.translate('languageDocumentation.documentationESQL.operators', {
     defaultMessage: 'Operators',
@@ -1005,3 +872,4 @@ FROM employees
 
 export { functions as scalarFunctions } from './generated/scalar_functions';
 export { functions as aggregationFunctions } from './generated/aggregation_functions';
+export { functions as groupingFunctions } from './generated/grouping_functions';
diff --git a/...packages/private/kbn-language-documentation/src/sections/generated/grouping_functions.tsx b/...packages/private/kbn-language-documentation/src/sections/generated/grouping_functions.tsx
@@ -0,0 +1,99 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+import React from 'react';
+import { i18n } from '@kbn/i18n';
+import { Markdown } from '@kbn/shared-ux-markdown';
+
+// DO NOT RENAME!
+export const functions = {
+  label: i18n.translate('languageDocumentation.documentationESQL.groupingFunctions', {
+    defaultMessage: 'Grouping functions',
+  }),
+  description: i18n.translate(
+    'languageDocumentation.documentationESQL.groupingFunctionsDocumentationESQLDescription',
+    {
+      defaultMessage: `These grouping functions can be used with \`STATS...BY\`:`,
+    }
+  ),
+  // items are managed by scripts/generate_esql_docs.ts
+  items: [
+    // Do not edit manually... automatically generated by scripts/generate_esql_docs.ts
+    {
+      label: i18n.translate('languageDocumentation.documentationESQL.bucket', {
+        defaultMessage: 'BUCKET',
+      }),
+      preview: false,
+      description: (
+        <Markdown
+          openLinksInNewTab
+          readOnly
+          enableSoftLineBreaks
+          markdownContent={i18n.translate(
+            'languageDocumentation.documentationESQL.bucket.markdown',
+            {
+              defaultMessage: `<!--
+  This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+  -->
+
+  ### BUCKET
+  Creates groups of values - buckets - out of a datetime or numeric input.
+  The size of the buckets can either be provided directly, or chosen based on a recommended count and values range.
+
+  \`\`\`
+  FROM employees
+  | WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
+  | STATS hire_date = MV_SORT(VALUES(hire_date)) BY month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
+  | SORT hire_date
+  \`\`\`
+  `,
+              description:
+                'Text is in markdown. Do not translate function names, special characters, or field names like sum(bytes)',
+              ignoreTag: true,
+            }
+          )}
+        />
+      ),
+    },
+    // Do not edit manually... automatically generated by scripts/generate_esql_docs.ts
+    {
+      label: i18n.translate('languageDocumentation.documentationESQL.categorize', {
+        defaultMessage: 'CATEGORIZE',
+      }),
+      preview: true,
+      description: (
+        <Markdown
+          openLinksInNewTab
+          readOnly
+          enableSoftLineBreaks
+          markdownContent={i18n.translate(
+            'languageDocumentation.documentationESQL.categorize.markdown',
+            {
+              defaultMessage: `<!--
+  This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+  -->
+
+  ### CATEGORIZE
+  Groups text messages into categories of similarly formatted text values.
+
+  \`\`\`
+  FROM sample_data
+  | STATS count=COUNT() BY category=CATEGORIZE(message)
+  \`\`\`
+  `,
+              description:
+                'Text is in markdown. Do not translate function names, special characters, or field names like sum(bytes)',
+              ignoreTag: true,
+            }
+          )}
+        />
+      ),
+    },
+  ],
+};
diff --git a/...m/packages/private/kbn-language-documentation/src/sections/generated/scalar_functions.tsx b/...m/packages/private/kbn-language-documentation/src/sections/generated/scalar_functions.tsx
@@ -212,42 +212,6 @@ export const functions = {
   | EVAL fn_length = LENGTH(city), fn_bit_length = BIT_LENGTH(city)
   \`\`\`
   Note: All strings are in UTF-8, so a single character can use multiple bytes.
-  `,
-              description:
-                'Text is in markdown. Do not translate function names, special characters, or field names like sum(bytes)',
-              ignoreTag: true,
-            }
-          )}
-        />
-      ),
-    },
-    {
-      label: i18n.translate('languageDocumentation.documentationESQL.bucket', {
-        defaultMessage: 'BUCKET',
-      }),
-      preview: false,
-      description: (
-        <Markdown
-          openLinksInNewTab
-          readOnly
-          enableSoftLineBreaks
-          markdownContent={i18n.translate(
-            'languageDocumentation.documentationESQL.bucket.markdown',
-            {
-              defaultMessage: `<!--
-  This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
-  -->
-
-  ### BUCKET
-  Creates groups of values - buckets - out of a datetime or numeric input.
-  The size of the buckets can either be provided directly, or chosen based on a recommended count and values range.
-
-  \`\`\`
-  FROM employees
-  | WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
-  | STATS hire_date = MV_SORT(VALUES(hire_date)) BY month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
-  | SORT hire_date
-  \`\`\`
   `,
               description:
                 'Text is in markdown. Do not translate function names, special characters, or field names like sum(bytes)',
@@ -334,37 +298,6 @@ export const functions = {
         />
       ),
     },
-    {
-      label: i18n.translate('languageDocumentation.documentationESQL.categorize', {
-        defaultMessage: 'CATEGORIZE',
-      }),
-      preview: true,
-      description: (
-        <Markdown
-          openLinksInNewTab
-          readOnly
-          enableSoftLineBreaks
-          markdownContent={i18n.translate(
-            'languageDocumentation.documentationESQL.categorize.markdown',
-            {
-              defaultMessage: `<!--
-  This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
-  -->
-  ### CATEGORIZE
-  Groups text messages into categories of similarly formatted text values.
-  \`\`\`
-  FROM sample_data
-  | STATS count=COUNT() BY category=CATEGORIZE(message)
-  \`\`\`
-  `,
-              description:
-                'Text is in markdown. Do not translate function names, special characters, or field names like sum(bytes)',
-              ignoreTag: true,
-            }
-          )}
-        />
-      ),
-    },
     // Do not edit manually... automatically generated by scripts/generate_esql_docs.ts
     {
       label: i18n.translate('languageDocumentation.documentationESQL.cbrt', {