Skip to content

Commit 46bc5ea

Browse files
samuelcolvinayman-sigma
authored andcommitted
Support Dialect level precedence, update Postgres Dialect to match Postgres (apache#1360)
1 parent 32b6609 commit 46bc5ea

File tree

6 files changed

+440
-130
lines changed

6 files changed

+440
-130
lines changed

src/ast/operator.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ pub enum BinaryOperator {
151151
Arrow,
152152
/// The `->>` operator.
153153
///
154-
/// On PostgreSQL, this operator that extracts a JSON object field or JSON
154+
/// On PostgreSQL, this operator extracts a JSON object field or JSON
155155
/// array element and converts it to text, for example `'{"a":"b"}'::json
156156
/// ->> 'a'` or `[1, 2, 3]'::json ->> 2`.
157157
///

src/dialect/mod.rs

+164-1
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,13 @@ mod redshift;
2424
mod snowflake;
2525
mod sqlite;
2626

27-
use crate::ast::{Expr, Statement};
2827
use core::any::{Any, TypeId};
2928
use core::fmt::Debug;
3029
use core::iter::Peekable;
3130
use core::str::Chars;
3231

32+
use log::debug;
33+
3334
pub use self::ansi::AnsiDialect;
3435
pub use self::bigquery::BigQueryDialect;
3536
pub use self::clickhouse::ClickHouseDialect;
@@ -43,8 +44,11 @@ pub use self::postgresql::PostgreSqlDialect;
4344
pub use self::redshift::RedshiftSqlDialect;
4445
pub use self::snowflake::SnowflakeDialect;
4546
pub use self::sqlite::SQLiteDialect;
47+
use crate::ast::{Expr, Statement};
4648
pub use crate::keywords;
49+
use crate::keywords::Keyword;
4750
use crate::parser::{Parser, ParserError};
51+
use crate::tokenizer::Token;
4852

4953
#[cfg(not(feature = "std"))]
5054
use alloc::boxed::Box;
@@ -300,13 +304,172 @@ pub trait Dialect: Debug + Any {
300304
// return None to fall back to the default behavior
301305
None
302306
}
307+
308+
/// Get the precedence of the next token. This "full" method means all precedence logic and remain
309+
/// in the dialect. while still allowing overriding the `get_next_precedence` method with the option to
310+
/// fallback to the default behavior.
311+
///
312+
/// Higher number => higher precedence
313+
fn get_next_precedence_full(&self, parser: &Parser) -> Result<u8, ParserError> {
314+
if let Some(precedence) = self.get_next_precedence(parser) {
315+
return precedence;
316+
}
317+
318+
let token = parser.peek_token();
319+
debug!("get_next_precedence() {:?}", token);
320+
match token.token {
321+
Token::Word(w) if w.keyword == Keyword::OR => Ok(OR_PREC),
322+
Token::Word(w) if w.keyword == Keyword::AND => Ok(AND_PREC),
323+
Token::Word(w) if w.keyword == Keyword::XOR => Ok(XOR_PREC),
324+
325+
Token::Word(w) if w.keyword == Keyword::AT => {
326+
match (
327+
parser.peek_nth_token(1).token,
328+
parser.peek_nth_token(2).token,
329+
) {
330+
(Token::Word(w), Token::Word(w2))
331+
if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
332+
{
333+
Ok(AT_TZ_PREC)
334+
}
335+
_ => Ok(UNKNOWN_PREC),
336+
}
337+
}
338+
339+
Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token {
340+
// The precedence of NOT varies depending on keyword that
341+
// follows it. If it is followed by IN, BETWEEN, or LIKE,
342+
// it takes on the precedence of those tokens. Otherwise, it
343+
// is not an infix operator, and therefore has zero
344+
// precedence.
345+
Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC),
346+
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC),
347+
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC),
348+
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC),
349+
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC),
350+
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC),
351+
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC),
352+
_ => Ok(UNKNOWN_PREC),
353+
},
354+
Token::Word(w) if w.keyword == Keyword::IS => Ok(IS_PREC),
355+
Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC),
356+
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC),
357+
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC),
358+
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC),
359+
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC),
360+
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC),
361+
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC),
362+
Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(BETWEEN_PREC),
363+
Token::Word(w) if w.keyword == Keyword::DIV => Ok(MUL_DIV_MOD_OP_PREC),
364+
Token::Eq
365+
| Token::Lt
366+
| Token::LtEq
367+
| Token::Neq
368+
| Token::Gt
369+
| Token::GtEq
370+
| Token::DoubleEq
371+
| Token::Tilde
372+
| Token::TildeAsterisk
373+
| Token::ExclamationMarkTilde
374+
| Token::ExclamationMarkTildeAsterisk
375+
| Token::DoubleTilde
376+
| Token::DoubleTildeAsterisk
377+
| Token::ExclamationMarkDoubleTilde
378+
| Token::ExclamationMarkDoubleTildeAsterisk
379+
| Token::Spaceship => Ok(EQ_PREC),
380+
Token::Pipe => Ok(PIPE_PREC),
381+
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(CARET_PREC),
382+
Token::Ampersand => Ok(AMPERSAND_PREC),
383+
Token::Plus | Token::Minus => Ok(PLUS_MINUS_PREC),
384+
Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => {
385+
Ok(MUL_DIV_MOD_OP_PREC)
386+
}
387+
Token::DoubleColon
388+
| Token::ExclamationMark
389+
| Token::LBracket
390+
| Token::Overlap
391+
| Token::CaretAt => Ok(DOUBLE_COLON_PREC),
392+
// Token::Colon if (self as dyn Dialect).is::<SnowflakeDialect>() => Ok(DOUBLE_COLON_PREC),
393+
Token::Arrow
394+
| Token::LongArrow
395+
| Token::HashArrow
396+
| Token::HashLongArrow
397+
| Token::AtArrow
398+
| Token::ArrowAt
399+
| Token::HashMinus
400+
| Token::AtQuestion
401+
| Token::AtAt
402+
| Token::Question
403+
| Token::QuestionAnd
404+
| Token::QuestionPipe
405+
| Token::CustomBinaryOperator(_) => Ok(PG_OTHER_PREC),
406+
_ => Ok(UNKNOWN_PREC),
407+
}
408+
}
409+
303410
/// Dialect-specific statement parser override
304411
fn parse_statement(&self, _parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
305412
// return None to fall back to the default behavior
306413
None
307414
}
415+
416+
/// The following precedence values are used directly by `Parse` or in dialects,
417+
/// so have to be made public by the dialect.
418+
fn prec_double_colon(&self) -> u8 {
419+
DOUBLE_COLON_PREC
420+
}
421+
422+
fn prec_mul_div_mod_op(&self) -> u8 {
423+
MUL_DIV_MOD_OP_PREC
424+
}
425+
426+
fn prec_plus_minus(&self) -> u8 {
427+
PLUS_MINUS_PREC
428+
}
429+
430+
fn prec_between(&self) -> u8 {
431+
BETWEEN_PREC
432+
}
433+
434+
fn prec_like(&self) -> u8 {
435+
LIKE_PREC
436+
}
437+
438+
fn prec_unary_not(&self) -> u8 {
439+
UNARY_NOT_PREC
440+
}
441+
442+
fn prec_unknown(&self) -> u8 {
443+
UNKNOWN_PREC
444+
}
308445
}
309446

447+
// Define the lexical Precedence of operators.
448+
//
449+
// Uses (APPROXIMATELY) <https://www.postgresql.org/docs/7.0/operators.htm#AEN2026> as a reference
450+
// higher number = higher precedence
451+
//
452+
// NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator
453+
// actually has higher precedence than addition.
454+
// See <https://postgrespro.com/list/thread-id/2673331>.
455+
const DOUBLE_COLON_PREC: u8 = 50;
456+
const AT_TZ_PREC: u8 = 41;
457+
const MUL_DIV_MOD_OP_PREC: u8 = 40;
458+
const PLUS_MINUS_PREC: u8 = 30;
459+
const XOR_PREC: u8 = 24;
460+
const AMPERSAND_PREC: u8 = 23;
461+
const CARET_PREC: u8 = 22;
462+
const PIPE_PREC: u8 = 21;
463+
const BETWEEN_PREC: u8 = 20;
464+
const EQ_PREC: u8 = 20;
465+
const LIKE_PREC: u8 = 19;
466+
const IS_PREC: u8 = 17;
467+
const PG_OTHER_PREC: u8 = 16;
468+
const UNARY_NOT_PREC: u8 = 15;
469+
const AND_PREC: u8 = 10;
470+
const OR_PREC: u8 = 5;
471+
const UNKNOWN_PREC: u8 = 0;
472+
310473
impl dyn Dialect {
311474
#[inline]
312475
pub fn is<T: Dialect>(&self) -> bool {

src/dialect/postgresql.rs

+134
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1010
// See the License for the specific language governing permissions and
1111
// limitations under the License.
12+
use log::debug;
1213

1314
use crate::ast::{CommentObject, Statement};
1415
use crate::dialect::Dialect;
@@ -20,6 +21,23 @@ use crate::tokenizer::Token;
2021
#[derive(Debug)]
2122
pub struct PostgreSqlDialect {}
2223

24+
const DOUBLE_COLON_PREC: u8 = 140;
25+
const BRACKET_PREC: u8 = 130;
26+
const COLLATE_PREC: u8 = 120;
27+
const AT_TZ_PREC: u8 = 110;
28+
const CARET_PREC: u8 = 100;
29+
const MUL_DIV_MOD_OP_PREC: u8 = 90;
30+
const PLUS_MINUS_PREC: u8 = 80;
31+
// there's no XOR operator in PostgreSQL, but support it here to avoid breaking tests
32+
const XOR_PREC: u8 = 75;
33+
const PG_OTHER_PREC: u8 = 70;
34+
const BETWEEN_LIKE_PREC: u8 = 60;
35+
const EQ_PREC: u8 = 50;
36+
const IS_PREC: u8 = 40;
37+
const NOT_PREC: u8 = 30;
38+
const AND_PREC: u8 = 20;
39+
const OR_PREC: u8 = 10;
40+
2341
impl Dialect for PostgreSqlDialect {
2442
fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
2543
Some('"')
@@ -67,6 +85,102 @@ impl Dialect for PostgreSqlDialect {
6785
)
6886
}
6987

88+
fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
89+
let token = parser.peek_token();
90+
debug!("get_next_precedence() {:?}", token);
91+
92+
let precedence = match token.token {
93+
Token::Word(w) if w.keyword == Keyword::OR => OR_PREC,
94+
Token::Word(w) if w.keyword == Keyword::XOR => XOR_PREC,
95+
Token::Word(w) if w.keyword == Keyword::AND => AND_PREC,
96+
Token::Word(w) if w.keyword == Keyword::AT => {
97+
match (
98+
parser.peek_nth_token(1).token,
99+
parser.peek_nth_token(2).token,
100+
) {
101+
(Token::Word(w), Token::Word(w2))
102+
if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
103+
{
104+
AT_TZ_PREC
105+
}
106+
_ => self.prec_unknown(),
107+
}
108+
}
109+
110+
Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token {
111+
// The precedence of NOT varies depending on keyword that
112+
// follows it. If it is followed by IN, BETWEEN, or LIKE,
113+
// it takes on the precedence of those tokens. Otherwise, it
114+
// is not an infix operator, and therefore has zero
115+
// precedence.
116+
Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC,
117+
Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC,
118+
Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC,
119+
Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC,
120+
Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC,
121+
Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC,
122+
Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC,
123+
_ => self.prec_unknown(),
124+
},
125+
Token::Word(w) if w.keyword == Keyword::IS => IS_PREC,
126+
Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC,
127+
Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC,
128+
Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC,
129+
Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC,
130+
Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC,
131+
Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC,
132+
Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC,
133+
Token::Word(w) if w.keyword == Keyword::OPERATOR => BETWEEN_LIKE_PREC,
134+
Token::Word(w) if w.keyword == Keyword::DIV => MUL_DIV_MOD_OP_PREC,
135+
Token::Word(w) if w.keyword == Keyword::COLLATE => COLLATE_PREC,
136+
Token::Eq
137+
| Token::Lt
138+
| Token::LtEq
139+
| Token::Neq
140+
| Token::Gt
141+
| Token::GtEq
142+
| Token::DoubleEq
143+
| Token::Tilde
144+
| Token::TildeAsterisk
145+
| Token::ExclamationMarkTilde
146+
| Token::ExclamationMarkTildeAsterisk
147+
| Token::DoubleTilde
148+
| Token::DoubleTildeAsterisk
149+
| Token::ExclamationMarkDoubleTilde
150+
| Token::ExclamationMarkDoubleTildeAsterisk
151+
| Token::Spaceship => EQ_PREC,
152+
Token::Caret => CARET_PREC,
153+
Token::Plus | Token::Minus => PLUS_MINUS_PREC,
154+
Token::Mul | Token::Div | Token::Mod => MUL_DIV_MOD_OP_PREC,
155+
Token::DoubleColon => DOUBLE_COLON_PREC,
156+
Token::LBracket => BRACKET_PREC,
157+
Token::Arrow
158+
| Token::LongArrow
159+
| Token::HashArrow
160+
| Token::HashLongArrow
161+
| Token::AtArrow
162+
| Token::ArrowAt
163+
| Token::HashMinus
164+
| Token::AtQuestion
165+
| Token::AtAt
166+
| Token::Question
167+
| Token::QuestionAnd
168+
| Token::QuestionPipe
169+
| Token::ExclamationMark
170+
| Token::Overlap
171+
| Token::CaretAt
172+
| Token::StringConcat
173+
| Token::Sharp
174+
| Token::ShiftRight
175+
| Token::ShiftLeft
176+
| Token::Pipe
177+
| Token::Ampersand
178+
| Token::CustomBinaryOperator(_) => PG_OTHER_PREC,
179+
_ => self.prec_unknown(),
180+
};
181+
Some(Ok(precedence))
182+
}
183+
70184
fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
71185
if parser.parse_keyword(Keyword::COMMENT) {
72186
Some(parse_comment(parser))
@@ -82,6 +196,26 @@ impl Dialect for PostgreSqlDialect {
82196
fn supports_group_by_expr(&self) -> bool {
83197
true
84198
}
199+
200+
fn prec_mul_div_mod_op(&self) -> u8 {
201+
MUL_DIV_MOD_OP_PREC
202+
}
203+
204+
fn prec_plus_minus(&self) -> u8 {
205+
PLUS_MINUS_PREC
206+
}
207+
208+
fn prec_between(&self) -> u8 {
209+
BETWEEN_LIKE_PREC
210+
}
211+
212+
fn prec_like(&self) -> u8 {
213+
BETWEEN_LIKE_PREC
214+
}
215+
216+
fn prec_unary_not(&self) -> u8 {
217+
NOT_PREC
218+
}
85219
}
86220

87221
pub fn parse_comment(parser: &mut Parser) -> Result<Statement, ParserError> {

src/dialect/snowflake.rs

+9
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,15 @@ impl Dialect for SnowflakeDialect {
145145

146146
None
147147
}
148+
149+
fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
150+
let token = parser.peek_token();
151+
// Snowflake supports the `:` cast operator unlike other dialects
152+
match token.token {
153+
Token::Colon => Some(Ok(self.prec_double_colon())),
154+
_ => None,
155+
}
156+
}
148157
}
149158

150159
/// Parse snowflake create table statement.

0 commit comments

Comments
 (0)