diff --git a/README.md b/README.md index 850331b..bd0d694 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,9 @@ go test -v -count=1 -race ./... # Run benchmarks go test -bench=. -benchmem ./... + +# Run fuzz tests, but require go version >= 1.18 +go test -fuzz=. ./parser/... ``` ## Examples diff --git a/parser/fuzz_test.go b/parser/fuzz_test.go new file mode 100644 index 0000000..6b45b67 --- /dev/null +++ b/parser/fuzz_test.go @@ -0,0 +1,69 @@ +//go:build go1.18 +// +build go1.18 + +package parser_test + +import ( + "strings" + "testing" + + "github.com/laojianzi/kql-go/parser" + "github.com/stretchr/testify/assert" +) + +func FuzzParser(f *testing.F) { + // Add initial corpus + seeds := []string{ + "field:value", + "field: value", + "field : value", + `field: "value"`, + "field: *", + "field: value*", + "field: *value", + "field: *value*", + "field > 10", + "field >= 10", + "field < 10", + "field <= 10", + "field: true", + "field: false", + "field: null", + "field1: value1 AND field2: value2", + "field1: value1 OR field2: value2", + "NOT field: value", + "(field: value)", + "(field1: value1) AND (field2: value2)", + `field: "value with spaces"`, + `field: "value with \"escaped\" quotes"`, + `field: "value with \n newline"`, + "field1: value1 AND field2: value2 OR field3: value3", + "field1: (value1 OR value2) AND field2: value3", + } + + for _, seed := range seeds { + f.Add(seed) + } + + f.Fuzz(func(t *testing.T, query string) { + if strings.TrimSpace(query) == "" { + return + } + + // Current fuzzing implementation has limitations in input/output validation. + // This test only covers basic safety checks: + // 1. No panics during parsing + // 2. String() output can be re-parsed + // 3. String() output remains stable + // + // Contributions welcome for better validation approaches :) + stmt, err := parser.New(query).Stmt() + if err != nil || stmt == nil { + return + } + + stmt2, err := parser.New(stmt.String()).Stmt() + assert.NoError(t, err) + assert.Equal(t, stmt.String(), stmt2.String()) + }) +} diff --git a/parser/lexer.go b/parser/lexer.go index d674cc6..4260dbb 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -102,16 +102,22 @@ func (l *defaultLexer) consumeFieldToken() error { // shouldBreak checks if token collection should stop func (l *defaultLexer) shouldBreak(i int, isString, withEscape bool, endChar rune) bool { - if isString && !withEscape && l.peek(i) == endChar { + ch := l.peek(i) + if isString && !withEscape && ch == endChar { return true } if !isString && !withEscape { - if unicode.IsSpace(l.peek(i)) || l.peek(i) == ')' { + if unicode.IsSpace(ch) || ch == ')' || ch == ':' { return true } } + // not \: + if !isString && withEscape && ch == ':' && (!l.peekOk(i-1) || l.peek(i-1) != '\\') { + return true + } + return false } @@ -121,8 +127,8 @@ func (l *defaultLexer) collectNextToken(start int) string { buf.WriteRune(l.peek(start)) for j := start; l.peekOk(j + 1); j++ { - nextRune := l.peek(j + 1) - if unicode.IsSpace(nextRune) || nextRune == ')' { + currentRune, nextRune := l.peek(j), l.peek(j+1) + if currentRune != '\\' && (unicode.IsSpace(nextRune) || nextRune == ')' || nextRune == ':') { break } diff --git a/parser/parser.go b/parser/parser.go index c63b06b..390b15b 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -107,7 +107,7 @@ func (p *defaultParser) parseBinary() (ast.Expr, error) { } op := p.lexer.Token.Kind - if !op.IsOperator() { + if !op.IsOperator() || !p.lexer.lastTokenKind.IsField() { return ast.NewBinaryExpr(pos, "", 0, expr, hasNot), nil } diff --git a/parser/parser_test.go b/parser/parser_test.go index dbe8d86..d53d66e 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -740,3 +740,45 @@ func TestParser_ComplexQueries(t *testing.T) { }) } } + +func TestParserFuzzBugs(t *testing.T) { + tests := []struct { + query string + want ast.Expr + wantErr error + }{ + { + "0 :0", + nil, + errors.New("line 0:2 expected keyword OR|AND|NOT, but got \":\"\n0 :0\n ^\n"), + }, + { + "\\AND :0", + ast.NewBinaryExpr(0, "\\AND", token.TokenKindOperatorEql, ast.NewLiteral( + 6, 7, token.TokenKindInt, "0", nil, + ), false), + nil, + }, + { + "\\AND: 0", + ast.NewBinaryExpr(0, "\\AND", token.TokenKindOperatorEql, ast.NewLiteral( + 6, 7, token.TokenKindInt, "0", nil, + ), false), + nil, + }, + } + + for _, tt := range tests { + t.Run(tt.query, func(t *testing.T) { + stmt, err := parser.New(tt.query).Stmt() + + if tt.wantErr == nil { + assert.NoError(t, err) + } else { + assert.EqualError(t, err, tt.wantErr.Error()) + } + + assert.EqualValues(t, tt.want, stmt) + }) + } +} diff --git a/parser/testdata/fuzz/FuzzParser/45eecbed51a417f5 b/parser/testdata/fuzz/FuzzParser/45eecbed51a417f5 new file mode 100644 index 0000000..6de653e --- /dev/null +++ b/parser/testdata/fuzz/FuzzParser/45eecbed51a417f5 @@ -0,0 +1,2 @@ +go test fuzz v1 +string("\\AND :0") diff --git a/parser/testdata/fuzz/FuzzParser/74ba8e9a0ce951ee b/parser/testdata/fuzz/FuzzParser/74ba8e9a0ce951ee new file mode 100644 index 0000000..bed3ef0 --- /dev/null +++ b/parser/testdata/fuzz/FuzzParser/74ba8e9a0ce951ee @@ -0,0 +1,2 @@ +go test fuzz v1 +string("0 :0")