From 3bab06d67dbad31dcb967e8d9b5d136fc74443dc Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Tue, 25 Feb 2025 13:40:10 -0800 Subject: [PATCH] Clean up syntax error reporting (#3278) * Clean up syntax error reporting Signed-off-by: Simeon Widdis * Reuse contextStartIndex variable Signed-off-by: Simeon Widdis * Update tests for new error message Signed-off-by: Simeon Widdis * Update a comment Signed-off-by: Simeon Widdis * Simplify details construction Signed-off-by: Simeon Widdis * Apply feedback: clean up suggestion extraction Signed-off-by: Simeon Widdis * Apply feedback: clean up suggestion extraction Signed-off-by: Simeon Widdis * Apply feedback: move exception fragment to const Signed-off-by: Simeon Widdis * Apply spotless Signed-off-by: Simeon Widdis * Remove weird extra reports Signed-off-by: Simeon Widdis * Add some exact error output tests + fix bug Signed-off-by: Simeon Widdis * Fix the flaky tests I just introduced Signed-off-by: Simeon Widdis --------- Signed-off-by: Simeon Widdis --- .../antlr/SyntaxAnalysisErrorListener.java | 60 ++++++++++++++----- docs/user/dql/troubleshooting.rst | 16 ++--- .../opensearch/sql/legacy/TestsConstants.java | 3 + .../opensearch/sql/ppl/DescribeCommandIT.java | 3 +- .../opensearch/sql/ppl/QueryAnalysisIT.java | 10 ++-- .../opensearch/sql/ppl/SearchCommandIT.java | 5 +- .../sql/ppl/antlr/PPLSyntaxParserTest.java | 22 ++++++- 7 files changed, 86 insertions(+), 33 deletions(-) diff --git a/common/src/main/java/org/opensearch/sql/common/antlr/SyntaxAnalysisErrorListener.java b/common/src/main/java/org/opensearch/sql/common/antlr/SyntaxAnalysisErrorListener.java index 76cbf52d58..9cf118f64a 100644 --- a/common/src/main/java/org/opensearch/sql/common/antlr/SyntaxAnalysisErrorListener.java +++ b/common/src/main/java/org/opensearch/sql/common/antlr/SyntaxAnalysisErrorListener.java @@ -5,12 +5,15 @@ package org.opensearch.sql.common.antlr; +import java.util.ArrayList; +import java.util.List; import java.util.Locale; import org.antlr.v4.runtime.BaseErrorListener; import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.RecognitionException; import org.antlr.v4.runtime.Recognizer; import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.Vocabulary; import org.antlr.v4.runtime.misc.IntervalSet; /** @@ -18,6 +21,10 @@ * information. */ public class SyntaxAnalysisErrorListener extends BaseErrorListener { + // Show up to this many characters before the offending token in the query. + private static final int CONTEXT_TRUNCATION_THRESHOLD = 20; + // Avoid presenting too many alternatives when many are available. + private static final int SUGGESTION_TRUNCATION_THRESHOLD = 5; @Override public void syntaxError( @@ -35,8 +42,7 @@ public void syntaxError( throw new SyntaxCheckException( String.format( Locale.ROOT, - "Failed to parse query due to offending symbol [%s] " - + "at: '%s' <--- HERE... More details: %s", + "[%s] is not a valid term at this part of the query: '%s' <-- HERE. %s", getOffendingText(offendingToken), truncateQueryAtOffendingToken(query, offendingToken), getDetails(recognizer, msg, e))); @@ -47,21 +53,47 @@ private String getOffendingText(Token offendingToken) { } private String truncateQueryAtOffendingToken(String query, Token offendingToken) { - return query.substring(0, offendingToken.getStopIndex() + 1); + int contextStartIndex = offendingToken.getStartIndex() - CONTEXT_TRUNCATION_THRESHOLD; + if (contextStartIndex < 3) { // The ellipses won't save us anything below the first 4 characters + return query.substring(0, offendingToken.getStopIndex() + 1); + } + return "..." + query.substring(contextStartIndex, offendingToken.getStopIndex() + 1); + } + + private List topSuggestions(Recognizer recognizer, IntervalSet continuations) { + Vocabulary vocab = recognizer.getVocabulary(); + List tokenNames = new ArrayList<>(SUGGESTION_TRUNCATION_THRESHOLD); + for (int tokenType : + continuations + .toList() + .subList(0, Math.min(continuations.size(), SUGGESTION_TRUNCATION_THRESHOLD))) { + tokenNames.add(vocab.getDisplayName(tokenType)); + } + return tokenNames; } - /** - * As official JavaDoc says, e=null means parser was able to recover from the error. In other - * words, "msg" argument includes the information we want. - */ - private String getDetails(Recognizer recognizer, String msg, RecognitionException e) { - String details; - if (e == null) { - details = msg; + private String getDetails(Recognizer recognizer, String msg, RecognitionException ex) { + if (ex == null) { + // According to the ANTLR docs, ex == null means the parser was able to recover from the + // error. + // In such cases, `msg` includes the raw error information we care about. + return msg; + } + + IntervalSet possibleContinuations = ex.getExpectedTokens(); + List suggestions = topSuggestions(recognizer, possibleContinuations); + + StringBuilder details = new StringBuilder("Expecting "); + if (possibleContinuations.size() > SUGGESTION_TRUNCATION_THRESHOLD) { + details + .append("one of ") + .append(possibleContinuations.size()) + .append(" possible tokens. Some examples: ") + .append(String.join(", ", suggestions)) + .append(", ..."); } else { - IntervalSet followSet = e.getExpectedTokens(); - details = "Expecting tokens in " + followSet.toString(recognizer.getVocabulary()); + details.append("tokens: ").append(String.join(", ", suggestions)); } - return details; + return details.toString(); } } diff --git a/docs/user/dql/troubleshooting.rst b/docs/user/dql/troubleshooting.rst index b57479c374..20a9f0e531 100644 --- a/docs/user/dql/troubleshooting.rst +++ b/docs/user/dql/troubleshooting.rst @@ -25,9 +25,9 @@ Query: .. code-block:: JSON - POST /_plugins/_sql + POST /_plugins/_ppl { - "query" : "SELECT * FROM sample:data" + "query" : "SOURCE = test_index | where a > 0)" } Result: @@ -35,12 +35,12 @@ Result: .. code-block:: JSON { - "reason": "Invalid SQL query", - "details": "Failed to parse query due to offending symbol [:] at: 'SELECT * FROM xxx WHERE xxx:' <--- HERE... - More details: Expecting tokens in {, 'AND', 'BETWEEN', 'GROUP', 'HAVING', 'IN', 'IS', 'LIKE', 'LIMIT', - 'NOT', 'OR', 'ORDER', 'REGEXP', '*', '/', '%', '+', '-', 'DIV', 'MOD', '=', '>', '<', '!', - '|', '&', '^', '.', DOT_ID}", - "type": "SyntaxAnalysisException" + "error": { + "reason": "Invalid Query", + "details": "[)] is not a valid term at this part of the query: '..._index | where a > 0)' <-- HERE. extraneous input ')' expecting ", + "type": "SyntaxCheckException" + }, + "status": 400 } **Workaround** diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index 4601aadf7f..bbbeb49b11 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -66,4 +66,7 @@ public class TestsConstants { public static final String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; public static final String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS"; public static final String SIMPLE_DATE_FORMAT = "yyyy-MM-dd"; + + public static final String SYNTAX_EX_MSG_FRAGMENT = + "is not a valid term at this part of the query"; } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/DescribeCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/DescribeCommandIT.java index aee32e08d1..11d6487ec4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/DescribeCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/DescribeCommandIT.java @@ -5,6 +5,7 @@ package org.opensearch.sql.ppl; +import static org.opensearch.sql.legacy.TestsConstants.SYNTAX_EX_MSG_FRAGMENT; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; import static org.opensearch.sql.util.MatcherUtils.columnName; import static org.opensearch.sql.util.MatcherUtils.verifyColumn; @@ -80,7 +81,7 @@ public void describeCommandWithoutIndexShouldFailToParse() throws IOException { fail(); } catch (ResponseException e) { assertTrue(e.getMessage().contains("RuntimeException")); - assertTrue(e.getMessage().contains("Failed to parse query due to offending symbol")); + assertTrue(e.getMessage().contains(SYNTAX_EX_MSG_FRAGMENT)); } } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/QueryAnalysisIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/QueryAnalysisIT.java index 80a89ed9c3..0e48e72eb6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/QueryAnalysisIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/QueryAnalysisIT.java @@ -5,6 +5,7 @@ package org.opensearch.sql.ppl; +import static org.opensearch.sql.legacy.TestsConstants.SYNTAX_EX_MSG_FRAGMENT; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; import java.io.IOException; @@ -14,7 +15,6 @@ import org.opensearch.sql.exception.SemanticCheckException; public class QueryAnalysisIT extends PPLIntegTestCase { - @Override public void init() throws IOException { loadIndex(Index.ACCOUNT); @@ -82,25 +82,25 @@ public void queryShouldBeCaseInsensitiveInKeywords() { @Test public void queryNotStartingWithSearchCommandShouldFailSyntaxCheck() { String query = "fields firstname"; - queryShouldThrowSyntaxException(query, "Failed to parse query due to offending symbol"); + queryShouldThrowSyntaxException(query, SYNTAX_EX_MSG_FRAGMENT); } @Test public void queryWithIncorrectCommandShouldFailSyntaxCheck() { String query = String.format("search source=%s | field firstname", TEST_INDEX_ACCOUNT); - queryShouldThrowSyntaxException(query, "Failed to parse query due to offending symbol"); + queryShouldThrowSyntaxException(query, SYNTAX_EX_MSG_FRAGMENT); } @Test public void queryWithIncorrectKeywordsShouldFailSyntaxCheck() { String query = String.format("search sources=%s", TEST_INDEX_ACCOUNT); - queryShouldThrowSyntaxException(query, "Failed to parse query due to offending symbol"); + queryShouldThrowSyntaxException(query, SYNTAX_EX_MSG_FRAGMENT); } @Test public void unsupportedAggregationFunctionShouldFailSyntaxCheck() { String query = String.format("search source=%s | stats range(age)", TEST_INDEX_ACCOUNT); - queryShouldThrowSyntaxException(query, "Failed to parse query due to offending symbol"); + queryShouldThrowSyntaxException(query, SYNTAX_EX_MSG_FRAGMENT); } /** Commands that fail semantic analysis should throw {@link SemanticCheckException}. */ diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java index 5d1b0203d7..5582cb961a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java @@ -5,8 +5,7 @@ package org.opensearch.sql.ppl; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; +import static org.opensearch.sql.legacy.TestsConstants.*; import static org.opensearch.sql.util.MatcherUtils.columnName; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.verifyColumn; @@ -64,7 +63,7 @@ public void searchCommandWithoutSourceShouldFailToParse() throws IOException { fail(); } catch (ResponseException e) { assertTrue(e.getMessage().contains("RuntimeException")); - assertTrue(e.getMessage().contains("Failed to parse query due to offending symbol")); + assertTrue(e.getMessage().contains(SYNTAX_EX_MSG_FRAGMENT)); } } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java index 2645be3aca..b7789ae170 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java @@ -11,6 +11,7 @@ import java.util.List; import org.antlr.v4.runtime.tree.ParseTree; +import org.hamcrest.text.StringContainsInOrder; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -95,7 +96,7 @@ public void testSearchFieldsCommandCrossClusterShouldPass() { @Test public void testSearchCommandWithoutSourceShouldFail() { exceptionRule.expect(RuntimeException.class); - exceptionRule.expectMessage("Failed to parse query due to offending symbol"); + exceptionRule.expectMessage("is not a valid term at this part of the query"); new PPLSyntaxParser().parse("search a=1"); } @@ -337,11 +338,28 @@ public void testDescribeFieldsCommandShouldPass() { @Test public void testDescribeCommandWithSourceShouldFail() { exceptionRule.expect(RuntimeException.class); - exceptionRule.expectMessage("Failed to parse query due to offending symbol"); + exceptionRule.expectMessage( + StringContainsInOrder.stringContainsInOrder( + "[=] is not a valid term at this part of the query: 'describe source=' <-- HERE.", + "Expecting tokens:")); new PPLSyntaxParser().parse("describe source=t"); } + @Test + public void testInvalidOperatorCombinationShouldFail() { + exceptionRule.expect(RuntimeException.class); + exceptionRule.expectMessage( + StringContainsInOrder.stringContainsInOrder( + "[] is not a valid term at this part of the query: '...= t | where x > y OR' <--" + + " HERE.", + "Expecting one of ", + " possible tokens. Some examples: ", + "...")); + + new PPLSyntaxParser().parse("source = t | where x > y OR"); + } + @Test public void testCanParseExtractFunction() { String[] parts =