-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlex_test.go
50 lines (43 loc) · 1.69 KB
/
lex_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
package tinyml
import (
"bytes"
"io"
"testing"
"github.com/tdewolff/test"
)
type TTs []TokenType
func TestTokens(t *testing.T) {
var tokenTests = []struct {
css string
ttypes []TokenType
lexemes []string
}{
{"Hello world", TTs{TextToken}, []string{"Hello world"}},
{"Hello world\n\nThis new next line", TTs{TextToken, NewLineToken, TextToken}, []string{"Hello world", "\n\n", "This new next line"}},
{"Hello world\n\n\n\nThis new next line", TTs{TextToken, NewLineToken, NewLineToken, TextToken}, []string{"Hello world", "\n\n", "\n\n", "This new next line"}},
{"\nHello world\n\nThis is new line", TTs{BreakLineToken, TextToken, NewLineToken, TextToken}, []string{"\n", "Hello world", "\n\n", "This is new line"}},
{"传言[st]ST/US/BABA#阿里巴巴.US[/st]将在港股上市", TTs{TextToken, StartTagToken, TextToken, EndTagToken, TextToken}, []string{"传言", "[st]", "ST/US/BABA#阿里巴巴.US", "[/st]", "将在港股上市"}},
{"[ST]ST/US/BABA#阿里巴巴.US[/st] 将在港股上市", TTs{StartTagToken, TextToken, EndTagToken, TextToken}, []string{"[st]", "ST/US/BABA#阿里巴巴.US", "[/st]", " 将在港股上市"}},
}
for _, tt := range tokenTests {
t.Run(tt.css, func(t *testing.T) {
l := NewLexer(bytes.NewBufferString(tt.css))
i := 0
tokens := []TokenType{}
lexemes := []string{}
for {
token, data := l.Next()
// fmt.Println("token:", token, string(data))
if token == ErrorToken {
test.T(t, l.Err(), io.EOF)
break
}
tokens = append(tokens, token)
lexemes = append(lexemes, string(data))
i++
}
test.T(t, tokens, tt.ttypes, "token types must match")
test.T(t, lexemes, tt.lexemes, "token data must match")
})
}
}