ref(trace): output a flattened list of tokens with injected AND and proper parenthesis as tokens (#69591)

JonasBa · web-flow · commit e26fc452f8f2 · 2024-04-25T08:00:23.000-04:00
This will enable us to construct an ast based on the precedence order.
From here, all we need to do is convert to infix/postfix and evaluate
the expression
diff --git a/static/app/components/searchSyntax/evaluator.spec.tsx b/static/app/components/searchSyntax/evaluator.spec.tsx
@@ -0,0 +1,200 @@
+import {
+  insertImplicitAND,
+  type ProcessedTokenResult,
+  toFlattened,
+} from 'sentry/components/searchSyntax/evaluator';
+import {
+  parseSearch,
+  Token,
+  type TokenResult,
+} from 'sentry/components/searchSyntax/parser';
+
+const tokensToString = (tokens: ProcessedTokenResult[]): string => {
+  let str = '';
+
+  for (const token of tokens) {
+    let concatstr;
+    switch (token.type) {
+      case Token.FREE_TEXT:
+        concatstr = token.text;
+        break;
+      case Token.SPACES:
+        concatstr = 'space';
+        break;
+      case Token.VALUE_DURATION:
+      case Token.VALUE_BOOLEAN:
+      case Token.VALUE_NUMBER:
+      case Token.VALUE_SIZE:
+      case Token.VALUE_PERCENTAGE:
+      case Token.VALUE_TEXT:
+      case Token.VALUE_ISO_8601_DATE:
+      case Token.VALUE_RELATIVE_DATE:
+        concatstr = token.value;
+        break;
+      case Token.LOGIC_GROUP:
+      case Token.LOGIC_BOOLEAN:
+        concatstr = token.text;
+        break;
+      case Token.KEY_SIMPLE:
+        concatstr = token.text + ':';
+        break;
+      case Token.VALUE_NUMBER_LIST:
+      case Token.VALUE_TEXT_LIST:
+        concatstr = token.text;
+        break;
+      case Token.KEY_EXPLICIT_TAG:
+        concatstr = token.key;
+        break;
+      case 'L_PAREN': {
+        concatstr = '(';
+        break;
+      }
+      case 'R_PAREN': {
+        concatstr = ')';
+        break;
+      }
+      default: {
+        concatstr = token.text;
+        break;
+      }
+    }
+
+    // The parsing logic text() captures leading/trailing spaces in some cases.
+    // We'll just trim them so the tests are easier to read.
+    str += concatstr.trim();
+    str += concatstr && tokens.indexOf(token) !== tokens.length - 1 ? ' ' : '';
+  }
+
+  return str;
+};
+
+function assertTokens(
+  tokens: TokenResult<Token>[] | null
+): asserts tokens is TokenResult<Token>[] {
+  if (tokens === null) {
+    throw new Error('Expected tokens to be an array');
+  }
+}
+
+describe('Search Syntax Evaluator', () => {
+  describe('flatten tree', () => {
+    it('flattens simple expressions', () => {
+      const tokens = parseSearch('is:unresolved duration:>1h');
+      assertTokens(tokens);
+      const flattened = toFlattened(tokens);
+      expect(flattened).toHaveLength(2);
+      expect(tokensToString(flattened)).toBe('is:unresolved duration:>1h');
+    });
+    it('handles filters', () => {
+      const tokens = parseSearch('has:unresolved duration:[1,2,3]');
+      assertTokens(tokens);
+      const flattened = toFlattened(tokens);
+      expect(flattened).toHaveLength(2);
+      expect(tokensToString(flattened)).toBe('has:unresolved duration:[1,2,3]');
+    });
+    it('handles free text', () => {
+      const tokens = parseSearch('hello world');
+      assertTokens(tokens);
+      const flattened = toFlattened(tokens);
+      expect(flattened).toHaveLength(1);
+      expect(tokensToString(flattened)).toBe('hello world');
+    });
+    it('handles logical booleans', () => {
+      const tokens = parseSearch('hello AND world');
+      assertTokens(tokens);
+      const flattened = toFlattened(tokens);
+      expect(flattened).toHaveLength(3);
+      expect(tokensToString(flattened)).toBe('hello AND world');
+    });
+    it('handles logical groups', () => {
+      const tokens = parseSearch('is:unresolved AND (is:dead OR is:alive)');
+      assertTokens(tokens);
+      const flattened = toFlattened(tokens);
+      expect(flattened).toHaveLength(7);
+      expect(tokensToString(flattened)).toBe('is:unresolved AND ( is:dead OR is:alive )');
+    });
+  });
+
+  describe('injects implicit AND', () => {
+    describe('boolean operators', () => {
+      it('implicit AND', () => {
+        const tokens = toFlattened(parseSearch('is:unresolved duration:>1h')!);
+        const withImplicitAND = insertImplicitAND(tokens);
+        expect(tokensToString(withImplicitAND)).toBe('is:unresolved AND duration:>1h');
+      });
+
+      it('explicit AND', () => {
+        const tokens = toFlattened(parseSearch('is:unresolved AND duration:>1h')!);
+        const withImplicitAND = insertImplicitAND(tokens);
+        expect(tokensToString(withImplicitAND)).toBe('is:unresolved AND duration:>1h');
+      });
+
+      it('multiple implicit AND', () => {
+        const tokens = toFlattened(
+          parseSearch('is:unresolved duration:>1h duration:<1m')!
+        );
+        const withImplicitAND = insertImplicitAND(tokens);
+        expect(tokensToString(withImplicitAND)).toBe(
+          'is:unresolved AND duration:>1h AND duration:<1m'
+        );
+      });
+
+      it('explicit OR', () => {
+        const tokens = toFlattened(parseSearch('is:unresolved OR duration:>1h')!);
+        const withImplicitAND = insertImplicitAND(tokens);
+        expect(tokensToString(withImplicitAND)).toBe('is:unresolved OR duration:>1h');
+      });
+
+      it('multiple explicit OR', () => {
+        const tokens = toFlattened(
+          parseSearch('is:unresolved OR duration:>1h OR duration:<1h')!
+        );
+        const withImplicitAND = insertImplicitAND(tokens);
+        expect(tokensToString(withImplicitAND)).toBe(
+          'is:unresolved OR duration:>1h OR duration:<1h'
+        );
+      });
+
+      it('with logical groups', () => {
+        const tokens = toFlattened(parseSearch('is:unresolved (duration:>1h)')!);
+        const withImplicitAND = insertImplicitAND(tokens);
+        expect(tokensToString(withImplicitAND)).toBe(
+          'is:unresolved AND ( duration:>1h )'
+        );
+      });
+    });
+
+    describe('logical groups', () => {
+      it('explicit OR', () => {
+        const tokens = toFlattened(parseSearch('is:unresolved OR ( duration:>1h )')!);
+        const withImplicitAND = insertImplicitAND(tokens);
+        expect(tokensToString(withImplicitAND)).toBe('is:unresolved OR ( duration:>1h )');
+      });
+      it('explicit AND', () => {
+        const tokens = toFlattened(parseSearch('is:unresolved AND ( duration:>1h )')!);
+        expect(tokensToString(tokens)).toBe('is:unresolved AND ( duration:>1h )');
+      });
+    });
+
+    describe('complex expressions', () => {
+      it('handles complex expressions', () => {
+        const tokens = toFlattened(
+          parseSearch('is:unresolved AND ( duration:>1h OR duration:<1h )')!
+        );
+        expect(tokensToString(tokens)).toBe(
+          'is:unresolved AND ( duration:>1h OR duration:<1h )'
+        );
+      });
+
+      it('handles complex expressions with implicit AND', () => {
+        const tokens = toFlattened(
+          parseSearch('is:unresolved ( duration:>1h OR ( duration:<1h duration:1m ) )')!
+        );
+        const withImplicitAND = insertImplicitAND(tokens);
+        expect(tokensToString(withImplicitAND)).toBe(
+          'is:unresolved AND ( duration:>1h OR ( duration:<1h AND duration:1m ) )'
+        );
+      });
+    });
+  });
+});
diff --git a/static/app/components/searchSyntax/evaluator.tsx b/static/app/components/searchSyntax/evaluator.tsx
@@ -0,0 +1,92 @@
+// To evaluate a result of the search syntax, we flatten the AST,
+// transform it to postfix notation which gets rid of parenthesis and tokens
+// that do not hold any value as they cannot be evaluated and then evaluate
+// the postfix notation.
+
+import {
+  BooleanOperator,
+  Token,
+  type TokenResult,
+} from 'sentry/components/searchSyntax/parser';
+
+export type ProcessedTokenResult =
+  | TokenResult<Token>
+  | {type: 'L_PAREN'}
+  | {type: 'R_PAREN'};
+
+export function toFlattened(tokens: TokenResult<Token>[]): ProcessedTokenResult[] {
+  const flattened_result: ProcessedTokenResult[] = [];
+
+  function flatten(token: TokenResult<Token>): void {
+    switch (token.type) {
+      case Token.SPACES:
+      case Token.VALUE_BOOLEAN:
+      case Token.VALUE_DURATION:
+      case Token.VALUE_ISO_8601_DATE:
+      case Token.VALUE_SIZE:
+      case Token.VALUE_NUMBER_LIST:
+      case Token.VALUE_NUMBER:
+      case Token.VALUE_TEXT:
+      case Token.VALUE_TEXT_LIST:
+      case Token.VALUE_RELATIVE_DATE:
+      case Token.VALUE_PERCENTAGE:
+      case Token.KEY_SIMPLE:
+        return;
+      case Token.LOGIC_GROUP:
+        flattened_result.push({type: 'L_PAREN'});
+        for (const child of token.inner) {
+          // Logic groups are wrapped in parenthesis,
+          // but those parenthesis are not actual tokens returned by the parser
+          flatten(child);
+        }
+        flattened_result.push({type: 'R_PAREN'});
+        break;
+      case Token.LOGIC_BOOLEAN:
+        flattened_result.push(token);
+        break;
+      default:
+        flattened_result.push(token);
+        break;
+    }
+  }
+
+  for (let i = 0; i < tokens.length; i++) {
+    flatten(tokens[i]);
+  }
+
+  return flattened_result;
+}
+
+// At this point we have a flat list of groups that we can evaluate, however since the syntax allows
+// implicit ANDs, we should still insert those as it will make constructing a valid AST easier
+export function insertImplicitAND(
+  tokens: ProcessedTokenResult[]
+): ProcessedTokenResult[] {
+  const with_implicit_and: ProcessedTokenResult[] = [];
+
+  const AND = {
+    type: Token.LOGIC_BOOLEAN,
+    value: BooleanOperator.AND,
+    text: 'AND',
+    location: null as unknown as PEG.LocationRange,
+    invalid: null,
+  } as TokenResult<Token>;
+
+  for (let i = 0; i < tokens.length; i++) {
+    const next = tokens[i + 1];
+    with_implicit_and.push(tokens[i]);
+
+    // If current is not a logic boolean and next is not a logic boolean, insert an implicit AND.
+    if (
+      next &&
+      next.type !== Token.LOGIC_BOOLEAN &&
+      tokens[i].type !== Token.LOGIC_BOOLEAN &&
+      tokens[i].type !== 'L_PAREN' &&
+      next.type !== 'R_PAREN'
+    ) {
+      with_implicit_and.push(AND);
+    }
+  }
+
+  return with_implicit_and;
+}