-
Notifications
You must be signed in to change notification settings - Fork 101
/
Copy pathbasicSQL.ts
494 lines (427 loc) · 16.2 KB
/
basicSQL.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
import {
Lexer,
Token,
CharStreams,
CommonTokenStream,
CharStream,
ParserRuleContext,
ParseTreeWalker,
ParseTreeListener,
PredictionMode,
ANTLRErrorListener,
} from 'antlr4ng';
import { CandidatesCollection, CodeCompletionCore } from 'antlr4-c3';
import { SQLParserBase } from '../../lib/SQLParserBase';
import { findCaretTokenIndex } from './findCaretTokenIndex';
import { ctxToText, tokenToWord, WordRange, TextSlice } from './textAndWord';
import {
CaretPosition,
LOCALE_TYPE,
SemanticCollectOptions,
Suggestions,
SyntaxSuggestion,
} from './types';
import { ParseError, ErrorListener } from './parseErrorListener';
import { ErrorStrategy } from './errorStrategy';
import type { SplitListener } from './splitListener';
import type { EntityCollector } from './entityCollector';
import { EntityContext } from './entityCollector';
import SemanticContextCollector from './semanticContextCollector';
/**
* Basic SQL class, every sql needs extends it.
*/
export abstract class BasicSQL<
L extends Lexer = Lexer,
PRC extends ParserRuleContext = ParserRuleContext,
P extends SQLParserBase<PRC> = SQLParserBase<PRC>,
> {
/** members for cache start */
protected _charStreams: CharStream;
protected _lexer: L;
protected _tokenStream: CommonTokenStream;
protected _parser: P;
protected _parseTree: PRC | null;
protected _parsedInput: string;
protected _parseErrors: ParseError[] = [];
/** members for cache end */
private _errorListener: ErrorListener = (error) => {
this._parseErrors.push(error);
};
/**
* PreferredRules for antlr4-c3
*/
protected abstract preferredRules: Set<number>;
/**
* Create a antlr4 Lexer instance.
* @param input source string
*/
protected abstract createLexerFromCharStream(charStreams: CharStream): L;
/**
* Create Parser by CommonTokenStream
* @param tokenStream CommonTokenStream
*/
protected abstract createParserFromTokenStream(tokenStream: CommonTokenStream): P;
/**
* Convert candidates to suggestions
* @param candidates candidate list
* @param allTokens slice all tokens from input by tokenIndexOffset
* @param caretTokenIndex tokenIndex of caretPosition
*/
protected abstract processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number
): Suggestions<Token>;
/**
* Get a new splitListener instance.
*/
protected abstract get splitListener(): SplitListener<ParserRuleContext>;
/**
* Get a new errorListener instance.
*/
protected abstract createErrorListener(errorListener: ErrorListener): ANTLRErrorListener;
/**
* Get a new entityCollector instance.
*/
protected abstract createEntityCollector(
input: string,
allTokens?: Token[],
caretTokenIndex?: number
): EntityCollector;
public locale: LOCALE_TYPE = 'en_US';
/**
* Get a new semanticContextCollector instance.
*/
protected abstract createSemanticContextCollector(
input: string,
caretPosition: CaretPosition,
allTokens: Token[],
options?: SemanticCollectOptions
): SemanticContextCollector;
/**
* Create an antlr4 lexer from input.
* @param input string
*/
public createLexer(input: string, errorListener?: ErrorListener) {
const charStreams = CharStreams.fromString(input);
const lexer = this.createLexerFromCharStream(charStreams);
if (errorListener) {
lexer.removeErrorListeners();
lexer.addErrorListener(this.createErrorListener(errorListener));
}
return lexer;
}
/**
* Create an antlr4 parser from input.
* @param input string
*/
public createParser(input: string, errorListener?: ErrorListener) {
const lexer = this.createLexer(input, errorListener);
const tokenStream = new CommonTokenStream(lexer);
const parser = this.createParserFromTokenStream(tokenStream);
parser.interpreter.predictionMode = PredictionMode.SLL;
if (errorListener) {
parser.removeErrorListeners();
parser.addErrorListener(this.createErrorListener(errorListener));
}
return parser;
}
/**
* Parse input string and return parseTree.
* @param input string
* @param errorListener listen parse errors and lexer errors.
* @returns parseTree
*/
public parse(input: string, errorListener?: ErrorListener) {
const parser = this.createParser(input, errorListener);
parser.buildParseTrees = true;
parser.errorHandler = new ErrorStrategy();
return parser.program();
}
/**
* Create an antlr4 parser from input.
* And the instances will be cache.
* @param input string
*/
private createParserWithCache(input: string): P {
this._parseTree = null;
this._charStreams = CharStreams.fromString(input);
this._lexer = this.createLexerFromCharStream(this._charStreams);
this._lexer.removeErrorListeners();
this._lexer.addErrorListener(this.createErrorListener(this._errorListener));
this._tokenStream = new CommonTokenStream(this._lexer);
/**
* All tokens are generated in advance.
* This can cause performance degradation, but it seems necessary for now.
* Because the tokens will be used multiple times.
*/
this._tokenStream.fill();
this._parser = this.createParserFromTokenStream(this._tokenStream);
this._parser.interpreter.predictionMode = PredictionMode.SLL;
this._parser.buildParseTrees = true;
this._parser.errorHandler = new ErrorStrategy();
return this._parser;
}
/**
* If it is invoked multiple times in a row and the input parameters is the same,
* this method returns the parsing result directly for the first time
* unless the errorListener parameter is passed.
* @param input source string
* @param errorListener listen errors
* @returns parseTree
*/
private parseWithCache(input: string, errorListener?: ErrorListener): PRC {
// Avoid parsing the same input repeatedly.
if (this._parsedInput === input && !errorListener && this._parseTree) {
return this._parseTree;
}
this._parseErrors = [];
const parser = this.createParserWithCache(input);
this._parsedInput = input;
parser.removeErrorListeners();
parser.addErrorListener(this.createErrorListener(this._errorListener));
this._parseTree = parser.program();
return this._parseTree;
}
/**
* Validate input string and return syntax errors if exists.
* @param input source string
* @returns syntax errors
*/
public validate(input: string): ParseError[] {
this.parseWithCache(input);
return this._parseErrors;
}
/**
* Get the input string that has been parsed.
*/
public getParsedInput(): string {
return this._parsedInput;
}
/**
* Get all Tokens of input string,'<EOF>' is not included.
* @param input source string
* @returns Token[]
*/
public getAllTokens(input: string): Token[] {
this.parseWithCache(input);
let allTokens = this._tokenStream.getTokens();
if (allTokens[allTokens.length - 1].text === '<EOF>') {
allTokens = allTokens.slice(0, -1);
}
return allTokens;
}
/**
* @param listener Listener instance extends ParserListener
* @param parseTree parser Tree
*/
public listen<PTL extends ParseTreeListener = ParseTreeListener>(
listener: PTL,
parseTree: ParserRuleContext
) {
ParseTreeWalker.DEFAULT.walk(listener, parseTree);
}
/**
* Split input into statements.
* If exist syntax error it will return null.
* @param input source string
*/
public splitSQLByStatement(input: string): TextSlice[] | null {
const errors = this.validate(input);
if (errors.length || !this._parseTree) {
return null;
}
const splitListener = this.splitListener;
this.listen(splitListener, this._parseTree);
const res = splitListener.statementsContext
.map((context) => {
return ctxToText(context, this._parsedInput);
})
.filter(Boolean) as TextSlice[];
return res;
}
/**
* Get a minimum boundary parser near tokenIndex.
* @param input source string.
* @param tokenIndex start from which index to minimize the boundary.
* @param originParseTree the parse tree need to be minimized, default value is the result of parsing `input`.
* @returns minimum parser info
*/
public getMinimumParserInfo(
input: string,
tokenIndex: number,
originParseTree?: ParserRuleContext | null
) {
if (arguments.length <= 2) {
this.parseWithCache(input);
originParseTree = this._parseTree;
}
if (!originParseTree || !input?.length) return null;
const splitListener = this.splitListener;
/**
* Split sql by statement.
* Try to collect candidates in as small a range as possible.
*/
this.listen(splitListener, originParseTree);
const statementCount = splitListener.statementsContext?.length;
const statementsContext = splitListener.statementsContext;
let tokenIndexOffset = 0;
let sqlParserIns = this._parser;
let parseTree = originParseTree;
// If there are multiple statements.
if (statementCount > 1) {
/**
* Find a minimum valid range, reparse the fragment, and provide a new parse tree to C3.
* The boundaries of this range must be statements with no syntax errors.
* This can ensure the stable performance of the C3.
*/
let startStatement: ParserRuleContext | null = null;
let stopStatement: ParserRuleContext | null = null;
for (let index = 0; index < statementCount; index++) {
const ctx = statementsContext[index];
const isCurrentCtxValid = !ctx.exception;
if (!isCurrentCtxValid) continue;
/**
* Ensure that the statementContext before the left boundary
* and the last statementContext on the right boundary are qualified SQL statements.
*/
const isPrevCtxValid = index === 0 || !statementsContext[index - 1]?.exception;
const isNextCtxValid =
index === statementCount - 1 || !statementsContext[index + 1]?.exception;
if (ctx.stop && ctx.stop.tokenIndex < tokenIndex && isPrevCtxValid) {
startStatement = ctx;
}
if (
ctx.start &&
!stopStatement &&
ctx.start.tokenIndex > tokenIndex &&
isNextCtxValid
) {
stopStatement = ctx;
break;
}
}
// A boundary consisting of the index of the input.
const startIndex = startStatement?.start?.start ?? 0;
const stopIndex = stopStatement?.stop?.stop ?? input.length - 1;
/**
* Save offset of the tokenIndex in the range of input
* compared to the tokenIndex in the whole input
*/
tokenIndexOffset = startStatement?.start?.tokenIndex ?? 0;
tokenIndex = tokenIndex - tokenIndexOffset;
/**
* Reparse the input fragment,
* and c3 will collect candidates in the newly generated parseTree.
*/
const inputSlice = input.slice(startIndex, stopIndex);
const lexer = this.createLexer(inputSlice);
lexer.removeErrorListeners();
const tokenStream = new CommonTokenStream(lexer);
tokenStream.fill();
const parser = this.createParserFromTokenStream(tokenStream);
parser.interpreter.predictionMode = PredictionMode.SLL;
parser.removeErrorListeners();
parser.buildParseTrees = true;
parser.errorHandler = new ErrorStrategy();
sqlParserIns = parser;
parseTree = parser.program();
}
return {
parser: sqlParserIns,
parseTree,
tokenIndexOffset,
newTokenIndex: tokenIndex,
};
}
/**
* Get suggestions of syntax and token at caretPosition
* @param input source string
* @param caretPosition caret position, such as cursor position
* @returns suggestion
*/
public getSuggestionAtCaretPosition(
input: string,
caretPosition: CaretPosition
): Suggestions | null {
this.parseWithCache(input);
if (!this._parseTree) return null;
const allTokens = this.getAllTokens(input);
let caretTokenIndex = findCaretTokenIndex(caretPosition, allTokens);
if (!caretTokenIndex && caretTokenIndex !== 0) return null;
const minimumParser = this.getMinimumParserInfo(input, caretTokenIndex);
if (!minimumParser) return null;
const {
parser: sqlParserIns,
tokenIndexOffset,
newTokenIndex,
parseTree: c3Context,
} = minimumParser;
const core = new CodeCompletionCore(sqlParserIns);
core.preferredRules = this.preferredRules;
const candidates = core.collectCandidates(newTokenIndex, c3Context);
const originalSuggestions = this.processCandidates(
candidates,
allTokens.slice(tokenIndexOffset),
newTokenIndex
);
const syntaxSuggestions: SyntaxSuggestion<WordRange>[] = originalSuggestions.syntax.map(
(syntaxCtx) => {
const wordRanges: WordRange[] = syntaxCtx.wordRanges.map((token) => {
return tokenToWord(token, this._parsedInput);
});
return {
syntaxContextType: syntaxCtx.syntaxContextType,
wordRanges,
};
}
);
return {
syntax: syntaxSuggestions,
keywords: originalSuggestions.keywords,
};
}
public getAllEntities(input: string, caretPosition?: CaretPosition): EntityContext[] | null {
const allTokens = this.getAllTokens(input);
const caretTokenIndex = caretPosition
? findCaretTokenIndex(caretPosition, allTokens)
: void 0;
const collectListener = this.createEntityCollector(input, allTokens, caretTokenIndex);
// const parser = this.createParserWithCache(input);
// parser.entityCollecting = true;
// if(caretPosition) {
// const allTokens = this.getAllTokens(input);
// const tokenIndex = findCaretTokenIndex(caretPosition, allTokens);
// parser.caretTokenIndex = tokenIndex;
// }
// const parseTree = parser.program();
const parseTree = this.parseWithCache(input);
this.listen(collectListener, parseTree);
// parser.caretTokenIndex = -1;
// parser.entityCollecting = false;
return collectListener.getEntities();
}
/**
* Get semantic context infos
* @param input source string
* @param caretPosition caret position, such as cursor position
* @param options semantic context options
* @returns analyzed semantic context
*/
public getSemanticContextAtCaretPosition(
input: string,
caretPosition: CaretPosition,
options?: SemanticCollectOptions
) {
const allTokens = this.getAllTokens(input);
const parseTree = this.parseWithCache(input);
const statementContextListener = this.createSemanticContextCollector(
input,
caretPosition,
allTokens,
options
);
this.listen(statementContextListener, parseTree);
return statementContextListener.semanticContext;
}
}