DTStack · mumiao · May 8, 2025 · Apr 3, 2025 · Sep 27, 2024 · Apr 3, 2025
diff --git a/src/parser/common/basicSQL.ts b/src/parser/common/basicSQL.ts
@@ -9,6 +9,7 @@ import {
     ParseTreeListener,
     PredictionMode,
     ANTLRErrorListener,
+    Parser,
 } from 'antlr4ng';
 import { CandidatesCollection, CodeCompletionCore } from 'antlr4-c3';
 import { SQLParserBase } from '../../lib/SQLParserBase';
@@ -28,6 +29,8 @@ import type { EntityCollector } from './entityCollector';
 import { EntityContext } from './entityCollector';
 import SemanticContextCollector from './semanticContextCollector';
 
+export const SQL_SPLIT_SYMBOL_TEXT = ';';
+
 /**
  * Basic SQL class, every sql needs extends it.
  */
@@ -264,7 +267,6 @@ export abstract class BasicSQL<
             return null;
         }
         const splitListener = this.splitListener;
-
         this.listen(splitListener, this._parseTree);
 
         const res = splitListener.statementsContext
@@ -277,35 +279,102 @@ export abstract class BasicSQL<
     }
 
     /**
-     * Get a minimum boundary parser near tokenIndex.
-     * @param input source string.
-     * @param tokenIndex start from which index to minimize the boundary.
-     * @param originParseTree the parse tree need to be minimized, default value is the result of parsing `input`.
-     * @returns minimum parser info
+     * Get the smaller range of input
+     * @param input string
+     * @param allTokens all tokens from input
+     * @param tokenIndexOffset offset of the tokenIndex in the range of input
+     * @param caretTokenIndex tokenIndex of caretPosition
+     * @returns inputSlice: string, caretTokenIndex: number
      */
-    public getMinimumParserInfo(
+    private splitInputBySymbolText(
         input: string,
-        tokenIndex: number,
-        originParseTree?: ParserRuleContext | null
-    ) {
-        if (arguments.length <= 2) {
-            this.parseWithCache(input);
-            originParseTree = this._parseTree;
+        allTokens: Token[],
+        tokenIndexOffset: number,
+        caretTokenIndex: number
+    ): { inputSlice: string; allTokens: Token[]; caretTokenIndex: number } {
+        const tokens = allTokens.slice(tokenIndexOffset);
+        /**
+         * Set startToken
+         */
+        let startToken: Token | null = null;
+        for (let tokenIndex = caretTokenIndex - tokenIndexOffset; tokenIndex >= 0; tokenIndex--) {
+            const token = tokens[tokenIndex];
+            if (token?.text === SQL_SPLIT_SYMBOL_TEXT) {
+                startToken = tokens[tokenIndex + 1];
+                break;
+            }
+        }
+        if (startToken === null) {
+            startToken = tokens[0];
+        }
+
+        /**
+         * Set stopToken
+         */
+        let stopToken: Token | null = null;
+        for (
+            let tokenIndex = caretTokenIndex - tokenIndexOffset;
+            tokenIndex < tokens.length;
+            tokenIndex++
+        ) {
+            const token = tokens[tokenIndex];
+            if (token?.text === SQL_SPLIT_SYMBOL_TEXT) {
+                stopToken = token;
+                break;
+            }
+        }
+        if (stopToken === null) {
+            stopToken = tokens[tokens.length - 1];
         }
 
+        const indexOffset = tokens[0].start;
+        let startIndex = startToken.start - indexOffset;
+        let stopIndex = stopToken.stop + 1 - indexOffset;
+
+        /**
+         * Save offset of the tokenIndex in the range of input
+         * compared to the tokenIndex in the whole input
+         */
+        const _tokenIndexOffset = startToken.tokenIndex;
+        const _caretTokenIndex = caretTokenIndex - _tokenIndexOffset;
+
+        /**
+         * Get the smaller range of _input
+         */
+        const _input = input.slice(startIndex, stopIndex);
+
+        return {
+            inputSlice: _input,
+            allTokens: allTokens.slice(_tokenIndexOffset),
+            caretTokenIndex: _caretTokenIndex,
+        };
+    }
+
+    /**
+     * Get the minimum input string that can be parsed successfully by c3.
+     * @param input source string
+     * @param caretTokenIndex tokenIndex of caretPosition
+     * @param originParseTree origin parseTree
+     * @returns MinimumInputInfo
+     */
+    public getMinimumInputInfo(
+        input: string,
+        caretTokenIndex: number,
+        originParseTree: ParserRuleContext | undefined
+    ): { input: string; tokenIndexOffset: number; statementCount: number } | null {
         if (!originParseTree || !input?.length) return null;
+        let inputSlice = input;
 
-        const splitListener = this.splitListener;
         /**
          * Split sql by statement.
          * Try to collect candidates in as small a range as possible.
          */
+        const splitListener = this.splitListener;
         this.listen(splitListener, originParseTree);
+
         const statementCount = splitListener.statementsContext?.length;
         const statementsContext = splitListener.statementsContext;
         let tokenIndexOffset = 0;
-        let sqlParserIns = this._parser;
-        let parseTree = originParseTree;
 
         // If there are multiple statements.
         if (statementCount > 1) {
@@ -330,14 +399,14 @@ export abstract class BasicSQL<
                 const isNextCtxValid =
                     index === statementCount - 1 || !statementsContext[index + 1]?.exception;
 
-                if (ctx.stop && ctx.stop.tokenIndex < tokenIndex && isPrevCtxValid) {
+                if (ctx.stop && ctx.stop.tokenIndex < caretTokenIndex && isPrevCtxValid) {
                     startStatement = ctx;
                 }
 
                 if (
                     ctx.start &&
                     !stopStatement &&
-                    ctx.start.tokenIndex > tokenIndex &&
+                    ctx.start.tokenIndex > caretTokenIndex &&
                     isNextCtxValid
                 ) {
                     stopStatement = ctx;
@@ -347,41 +416,64 @@ export abstract class BasicSQL<
 
             // A boundary consisting of the index of the input.
             const startIndex = startStatement?.start?.start ?? 0;
-            const stopIndex = stopStatement?.stop?.stop ?? input.length - 1;
+            const stopIndex = stopStatement?.stop?.stop ?? inputSlice.length - 1;
 
             /**
              * Save offset of the tokenIndex in the range of input
              * compared to the tokenIndex in the whole input
              */
             tokenIndexOffset = startStatement?.start?.tokenIndex ?? 0;
-            tokenIndex = tokenIndex - tokenIndexOffset;
+            inputSlice = inputSlice.slice(startIndex, stopIndex);
+        }
 
-            /**
-             * Reparse the input fragment，
-             * and c3 will collect candidates in the newly generated parseTree.
-             */
-            const inputSlice = input.slice(startIndex, stopIndex);
+        return {
+            input: inputSlice,
+            tokenIndexOffset,
+            statementCount,
+        };
+    }
 
-            const lexer = this.createLexer(inputSlice);
-            lexer.removeErrorListeners();
-            const tokenStream = new CommonTokenStream(lexer);
-            tokenStream.fill();
+    /**
+     * Get a minimum boundary parser near caretTokenIndex.
+     * @param input source string.
+     * @param caretTokenIndex start from which index to minimize the boundary.
+     * @param originParseTree the parse tree need to be minimized, default value is the result of parsing `input`.
+     * @returns minimum parser info
+     */
+    public getMinimumParserInfo(
+        input: string,
+        caretTokenIndex: number,
+        originParseTree: ParserRuleContext | undefined
+    ): {
+        parser: Parser;
+        parseTree: ParserRuleContext;
+        tokenIndexOffset: number;
+        newTokenIndex: number;
+    } | null {
+        if (!originParseTree || !input?.length) return null;
 
-            const parser = this.createParserFromTokenStream(tokenStream);
-            parser.interpreter.predictionMode = PredictionMode.SLL;
-            parser.removeErrorListeners();
-            parser.buildParseTrees = true;
-            parser.errorHandler = new ErrorStrategy();
+        const inputInfo = this.getMinimumInputInfo(input, caretTokenIndex, originParseTree);
+        if (!inputInfo) return null;
+        const { input: inputSlice, tokenIndexOffset } = inputInfo;
+        caretTokenIndex = caretTokenIndex - tokenIndexOffset;
 
-            sqlParserIns = parser;
-            parseTree = parser.program();
+        let sqlParserIns = this._parser;
+        let parseTree = originParseTree;
+
+        /**
+         * Reparse the input fragment,
+         * and c3 will collect candidates in the newly generated parseTree when input changed.
+         */
+        if (inputSlice !== input) {
+            sqlParserIns = this.createParser(inputSlice);
+            parseTree = sqlParserIns.program();
         }
 
         return {
             parser: sqlParserIns,
             parseTree,
             tokenIndexOffset,
-            newTokenIndex: tokenIndex,
+            newTokenIndex: caretTokenIndex,
         };
     }
 
@@ -396,33 +488,63 @@ export abstract class BasicSQL<
         caretPosition: CaretPosition
     ): Suggestions | null {
         this.parseWithCache(input);
-
         if (!this._parseTree) return null;
 
-        const allTokens = this.getAllTokens(input);
+        let allTokens = this.getAllTokens(input);
         let caretTokenIndex = findCaretTokenIndex(caretPosition, allTokens);
-
         if (!caretTokenIndex && caretTokenIndex !== 0) return null;
 
-        const minimumParser = this.getMinimumParserInfo(input, caretTokenIndex);
+        const inputInfo = this.getMinimumInputInfo(input, caretTokenIndex, this._parseTree);
+        if (!inputInfo) return null;
+        const { input: _input, tokenIndexOffset, statementCount } = inputInfo;
+        let inputSlice = _input;
+
+        /**
+         * Split the inputSlice by separator to get the smaller range of inputSlice.
+         */
+        if (inputSlice.includes(SQL_SPLIT_SYMBOL_TEXT)) {
+            const {
+                inputSlice: _inputSlice,
+                allTokens: _allTokens,
+                caretTokenIndex: _caretTokenIndex,
+            } = this.splitInputBySymbolText(
+                inputSlice,
+                allTokens,
+                tokenIndexOffset,
+                caretTokenIndex
+            );
+
+            allTokens = _allTokens;
+            caretTokenIndex = _caretTokenIndex;
+            inputSlice = _inputSlice;
+        } else {
+            if (statementCount > 1) {
+                caretTokenIndex = caretTokenIndex - tokenIndexOffset;
+            }
+        }
+
+        let sqlParserIns = this._parser;
+        let parseTree = this._parseTree;
 
-        if (!minimumParser) return null;
+        /**
+         * Reparse the input fragment,
+         * and c3 will collect candidates in the newly generated parseTree when input changed.
+         */
+        if (inputSlice !== input) {
+            sqlParserIns = this.createParser(inputSlice);
+            parseTree = sqlParserIns.program();
+        }
 
-        const {
-            parser: sqlParserIns,
-            tokenIndexOffset,
-            newTokenIndex,
-            parseTree: c3Context,
-        } = minimumParser;
         const core = new CodeCompletionCore(sqlParserIns);
         core.preferredRules = this.preferredRules;
 
-        const candidates = core.collectCandidates(newTokenIndex, c3Context);
+        const candidates = core.collectCandidates(caretTokenIndex, parseTree);
         const originalSuggestions = this.processCandidates(
             candidates,
             allTokens,
-            newTokenIndex,
-            tokenIndexOffset
+            caretTokenIndex,
+            0
+            // tokenIndexOffset
         );
 
         const syntaxSuggestions: SyntaxSuggestion<WordRange>[] = originalSuggestions.syntax.map(

diff --git a/src/parser/common/semanticContextCollector.ts b/src/parser/common/semanticContextCollector.ts
@@ -6,8 +6,7 @@ import {
     SemanticContext,
     SqlSplitStrategy,
 } from '../common/types';
-
-export const SQL_SPLIT_SYMBOL_TEXT = ';';
+import { SQL_SPLIT_SYMBOL_TEXT } from './basicSQL';
 
 abstract class SemanticContextCollector {
     constructor(

diff --git a/test/parser/flink/suggestion/completeAfterSyntaxError.test.ts b/test/parser/flink/suggestion/completeAfterSyntaxError.test.ts
@@ -0,0 +1,65 @@
+import { FlinkSQL } from 'src/parser/flink';
+import { CaretPosition, EntityContextType } from 'src/parser/common/types';
+
+describe('FlinkSQL Complete After Syntax Error', () => {
+    const flink = new FlinkSQL();
+
+    const sql1 = `SELECT  FROM tb2;\nINSERT INTO `;
+    const sql2 = `SELECT  FROM tb3;\nCREATE TABLE `;
+    const sql3 = `SELECT FROM t1;\nSL`;
+
+    test('Syntax error but end with semi, should suggest tableName', () => {
+        const pos: CaretPosition = {
+            lineNumber: 2,
+            column: 13,
+        };
+        const suggestion = flink.getSuggestionAtCaretPosition(sql1, pos);
+        expect(suggestion).not.toBeUndefined();
+
+        // syntax
+        const syntaxes = suggestion?.syntax;
+        expect(syntaxes.length).toBe(1);
+        expect(syntaxes[0].syntaxContextType).toBe(EntityContextType.TABLE);
+
+        // keyword
+        const keywords = suggestion?.keywords;
+        expect(keywords.length).toBe(0);
+    });
+
+    test('Syntax error but end with semi, should suggest tableNameCreate', () => {
+        const pos: CaretPosition = {
+            lineNumber: 2,
+            column: 14,
+        };
+        const suggestion = flink.getSuggestionAtCaretPosition(sql2, pos);
+        expect(suggestion).not.toBeUndefined();
+
+        // syntax
+        const syntaxes = suggestion?.syntax;
+        expect(syntaxes.length).toBe(1);
+        expect(syntaxes[0].syntaxContextType).toBe(EntityContextType.TABLE_CREATE);
+
+        // keyword
+        const keywords = suggestion?.keywords;
+        expect(keywords).toMatchUnorderedArray(['IF', 'IF NOT EXISTS']);
+    });
+
+    test('Syntax error but end with semi, should suggest filter token', () => {
+        const pos: CaretPosition = {
+            lineNumber: 2,
+            column: 2,
+        };
+        const suggestion = flink.getSuggestionAtCaretPosition(sql3, pos);
+        expect(suggestion).not.toBeUndefined();
+
+        // syntax
+        const syntaxes = suggestion?.syntax;
+        expect(syntaxes.length).toBe(0);
+
+        // keyword
+        const filterKeywords = suggestion?.keywords?.filter(
+            (item) => item.startsWith('S') && /S(?=.*L)/.test(item)
+        );
+        expect(filterKeywords).toMatchUnorderedArray(['SELECT']);
+    });
+});