Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

feat: complete after error syntax #334

Open
wants to merge 4 commits into
base: next
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
224 changes: 173 additions & 51 deletions src/parser/common/basicSQL.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
ParseTreeListener,
PredictionMode,
ANTLRErrorListener,
Parser,
} from 'antlr4ng';
import { CandidatesCollection, CodeCompletionCore } from 'antlr4-c3';
import { SQLParserBase } from '../../lib/SQLParserBase';
Expand All @@ -28,6 +29,8 @@ import type { EntityCollector } from './entityCollector';
import { EntityContext } from './entityCollector';
import SemanticContextCollector from './semanticContextCollector';

export const SQL_SPLIT_SYMBOL_TEXT = ';';

/**
* Basic SQL class, every sql needs extends it.
*/
Expand Down Expand Up @@ -264,7 +267,6 @@ export abstract class BasicSQL<
return null;
}
const splitListener = this.splitListener;

this.listen(splitListener, this._parseTree);

const res = splitListener.statementsContext
Expand All @@ -277,35 +279,102 @@ export abstract class BasicSQL<
}

/**
* Get a minimum boundary parser near tokenIndex.
* @param input source string.
* @param tokenIndex start from which index to minimize the boundary.
* @param originParseTree the parse tree need to be minimized, default value is the result of parsing `input`.
* @returns minimum parser info
* Get the smaller range of input
* @param input string
* @param allTokens all tokens from input
* @param tokenIndexOffset offset of the tokenIndex in the range of input
* @param caretTokenIndex tokenIndex of caretPosition
* @returns inputSlice: string, caretTokenIndex: number
*/
public getMinimumParserInfo(
private splitInputBySymbolText(
input: string,
tokenIndex: number,
originParseTree?: ParserRuleContext | null
) {
if (arguments.length <= 2) {
this.parseWithCache(input);
originParseTree = this._parseTree;
allTokens: Token[],
tokenIndexOffset: number,
caretTokenIndex: number
): { inputSlice: string; allTokens: Token[]; caretTokenIndex: number } {
const tokens = allTokens.slice(tokenIndexOffset);
/**
* Set startToken
*/
let startToken: Token | null = null;
for (let tokenIndex = caretTokenIndex - tokenIndexOffset; tokenIndex >= 0; tokenIndex--) {
const token = tokens[tokenIndex];
if (token?.text === SQL_SPLIT_SYMBOL_TEXT) {
startToken = tokens[tokenIndex + 1];
break;
}
}
if (startToken === null) {
startToken = tokens[0];
}

/**
* Set stopToken
*/
let stopToken: Token | null = null;
for (
let tokenIndex = caretTokenIndex - tokenIndexOffset;
tokenIndex < tokens.length;
tokenIndex++
) {
const token = tokens[tokenIndex];
if (token?.text === SQL_SPLIT_SYMBOL_TEXT) {
stopToken = token;
break;
}
}
if (stopToken === null) {
stopToken = tokens[tokens.length - 1];
}

const indexOffset = tokens[0].start;
let startIndex = startToken.start - indexOffset;
let stopIndex = stopToken.stop + 1 - indexOffset;

/**
* Save offset of the tokenIndex in the range of input
* compared to the tokenIndex in the whole input
*/
const _tokenIndexOffset = startToken.tokenIndex;
const _caretTokenIndex = caretTokenIndex - _tokenIndexOffset;

/**
* Get the smaller range of _input
*/
const _input = input.slice(startIndex, stopIndex);

return {
inputSlice: _input,
allTokens: allTokens.slice(_tokenIndexOffset),
caretTokenIndex: _caretTokenIndex,
};
}

/**
* Get the minimum input string that can be parsed successfully by c3.
* @param input source string
* @param caretTokenIndex tokenIndex of caretPosition
* @param originParseTree origin parseTree
* @returns MinimumInputInfo
*/
public getMinimumInputInfo(
input: string,
caretTokenIndex: number,
originParseTree: ParserRuleContext | undefined
): { input: string; tokenIndexOffset: number; statementCount: number } | null {
if (!originParseTree || !input?.length) return null;
let inputSlice = input;

const splitListener = this.splitListener;
/**
* Split sql by statement.
* Try to collect candidates in as small a range as possible.
*/
const splitListener = this.splitListener;
this.listen(splitListener, originParseTree);

const statementCount = splitListener.statementsContext?.length;
const statementsContext = splitListener.statementsContext;
let tokenIndexOffset = 0;
let sqlParserIns = this._parser;
let parseTree = originParseTree;

// If there are multiple statements.
if (statementCount > 1) {
Expand All @@ -330,14 +399,14 @@ export abstract class BasicSQL<
const isNextCtxValid =
index === statementCount - 1 || !statementsContext[index + 1]?.exception;

if (ctx.stop && ctx.stop.tokenIndex < tokenIndex && isPrevCtxValid) {
if (ctx.stop && ctx.stop.tokenIndex < caretTokenIndex && isPrevCtxValid) {
startStatement = ctx;
}

if (
ctx.start &&
!stopStatement &&
ctx.start.tokenIndex > tokenIndex &&
ctx.start.tokenIndex > caretTokenIndex &&
isNextCtxValid
) {
stopStatement = ctx;
Expand All @@ -347,41 +416,64 @@ export abstract class BasicSQL<

// A boundary consisting of the index of the input.
const startIndex = startStatement?.start?.start ?? 0;
const stopIndex = stopStatement?.stop?.stop ?? input.length - 1;
const stopIndex = stopStatement?.stop?.stop ?? inputSlice.length - 1;

/**
* Save offset of the tokenIndex in the range of input
* compared to the tokenIndex in the whole input
*/
tokenIndexOffset = startStatement?.start?.tokenIndex ?? 0;
tokenIndex = tokenIndex - tokenIndexOffset;
inputSlice = inputSlice.slice(startIndex, stopIndex);
}

/**
* Reparse the input fragment,
* and c3 will collect candidates in the newly generated parseTree.
*/
const inputSlice = input.slice(startIndex, stopIndex);
return {
input: inputSlice,
tokenIndexOffset,
statementCount,
};
}

const lexer = this.createLexer(inputSlice);
lexer.removeErrorListeners();
const tokenStream = new CommonTokenStream(lexer);
tokenStream.fill();
/**
* Get a minimum boundary parser near caretTokenIndex.
* @param input source string.
* @param caretTokenIndex start from which index to minimize the boundary.
* @param originParseTree the parse tree need to be minimized, default value is the result of parsing `input`.
* @returns minimum parser info
*/
public getMinimumParserInfo(
input: string,
caretTokenIndex: number,
originParseTree: ParserRuleContext | undefined
): {
parser: Parser;
parseTree: ParserRuleContext;
tokenIndexOffset: number;
newTokenIndex: number;
} | null {
if (!originParseTree || !input?.length) return null;

const parser = this.createParserFromTokenStream(tokenStream);
parser.interpreter.predictionMode = PredictionMode.SLL;
parser.removeErrorListeners();
parser.buildParseTrees = true;
parser.errorHandler = new ErrorStrategy();
const inputInfo = this.getMinimumInputInfo(input, caretTokenIndex, originParseTree);
if (!inputInfo) return null;
const { input: inputSlice, tokenIndexOffset } = inputInfo;
caretTokenIndex = caretTokenIndex - tokenIndexOffset;

sqlParserIns = parser;
parseTree = parser.program();
let sqlParserIns = this._parser;
let parseTree = originParseTree;

/**
* Reparse the input fragment,
* and c3 will collect candidates in the newly generated parseTree when input changed.
*/
if (inputSlice !== input) {
sqlParserIns = this.createParser(inputSlice);
parseTree = sqlParserIns.program();
}

return {
parser: sqlParserIns,
parseTree,
tokenIndexOffset,
newTokenIndex: tokenIndex,
newTokenIndex: caretTokenIndex,
};
}

Expand All @@ -396,33 +488,63 @@ export abstract class BasicSQL<
caretPosition: CaretPosition
): Suggestions | null {
this.parseWithCache(input);

if (!this._parseTree) return null;

const allTokens = this.getAllTokens(input);
let allTokens = this.getAllTokens(input);
let caretTokenIndex = findCaretTokenIndex(caretPosition, allTokens);

if (!caretTokenIndex && caretTokenIndex !== 0) return null;

const minimumParser = this.getMinimumParserInfo(input, caretTokenIndex);
const inputInfo = this.getMinimumInputInfo(input, caretTokenIndex, this._parseTree);
if (!inputInfo) return null;
const { input: _input, tokenIndexOffset, statementCount } = inputInfo;
let inputSlice = _input;

/**
* Split the inputSlice by separator to get the smaller range of inputSlice.
*/
if (inputSlice.includes(SQL_SPLIT_SYMBOL_TEXT)) {
const {
inputSlice: _inputSlice,
allTokens: _allTokens,
caretTokenIndex: _caretTokenIndex,
} = this.splitInputBySymbolText(
inputSlice,
allTokens,
tokenIndexOffset,
caretTokenIndex
);

allTokens = _allTokens;
caretTokenIndex = _caretTokenIndex;
inputSlice = _inputSlice;
} else {
if (statementCount > 1) {
caretTokenIndex = caretTokenIndex - tokenIndexOffset;
}
}

let sqlParserIns = this._parser;
let parseTree = this._parseTree;

if (!minimumParser) return null;
/**
* Reparse the input fragment,
* and c3 will collect candidates in the newly generated parseTree when input changed.
*/
if (inputSlice !== input) {
sqlParserIns = this.createParser(inputSlice);
parseTree = sqlParserIns.program();
}

const {
parser: sqlParserIns,
tokenIndexOffset,
newTokenIndex,
parseTree: c3Context,
} = minimumParser;
const core = new CodeCompletionCore(sqlParserIns);
core.preferredRules = this.preferredRules;

const candidates = core.collectCandidates(newTokenIndex, c3Context);
const candidates = core.collectCandidates(caretTokenIndex, parseTree);
const originalSuggestions = this.processCandidates(
candidates,
allTokens,
newTokenIndex,
tokenIndexOffset
caretTokenIndex,
0
// tokenIndexOffset
);

const syntaxSuggestions: SyntaxSuggestion<WordRange>[] = originalSuggestions.syntax.map(
Expand Down
3 changes: 1 addition & 2 deletions src/parser/common/semanticContextCollector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ import {
SemanticContext,
SqlSplitStrategy,
} from '../common/types';

export const SQL_SPLIT_SYMBOL_TEXT = ';';
import { SQL_SPLIT_SYMBOL_TEXT } from './basicSQL';

abstract class SemanticContextCollector {
constructor(
Expand Down
65 changes: 65 additions & 0 deletions test/parser/flink/suggestion/completeAfterSyntaxError.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import { FlinkSQL } from 'src/parser/flink';
import { CaretPosition, EntityContextType } from 'src/parser/common/types';

describe('FlinkSQL Complete After Syntax Error', () => {
const flink = new FlinkSQL();

const sql1 = `SELECT FROM tb2;\nINSERT INTO `;
const sql2 = `SELECT FROM tb3;\nCREATE TABLE `;
const sql3 = `SELECT FROM t1;\nSL`;

test('Syntax error but end with semi, should suggest tableName', () => {
const pos: CaretPosition = {
lineNumber: 2,
column: 13,
};
const suggestion = flink.getSuggestionAtCaretPosition(sql1, pos);
expect(suggestion).not.toBeUndefined();

// syntax
const syntaxes = suggestion?.syntax;
expect(syntaxes.length).toBe(1);
expect(syntaxes[0].syntaxContextType).toBe(EntityContextType.TABLE);

// keyword
const keywords = suggestion?.keywords;
expect(keywords.length).toBe(0);
});

test('Syntax error but end with semi, should suggest tableNameCreate', () => {
const pos: CaretPosition = {
lineNumber: 2,
column: 14,
};
const suggestion = flink.getSuggestionAtCaretPosition(sql2, pos);
expect(suggestion).not.toBeUndefined();

// syntax
const syntaxes = suggestion?.syntax;
expect(syntaxes.length).toBe(1);
expect(syntaxes[0].syntaxContextType).toBe(EntityContextType.TABLE_CREATE);

// keyword
const keywords = suggestion?.keywords;
expect(keywords).toMatchUnorderedArray(['IF', 'IF NOT EXISTS']);
});

test('Syntax error but end with semi, should suggest filter token', () => {
const pos: CaretPosition = {
lineNumber: 2,
column: 2,
};
const suggestion = flink.getSuggestionAtCaretPosition(sql3, pos);
expect(suggestion).not.toBeUndefined();

// syntax
const syntaxes = suggestion?.syntax;
expect(syntaxes.length).toBe(0);

// keyword
const filterKeywords = suggestion?.keywords?.filter(
(item) => item.startsWith('S') && /S(?=.*L)/.test(item)
);
expect(filterKeywords).toMatchUnorderedArray(['SELECT']);
});
});
Loading