forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_lexer.cpp
88 lines (80 loc) · 3 KB
/
test_lexer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#include <gtest/gtest.h>
#include <torch/csrc/jit/frontend/lexer.h>
namespace torch::jit {
TEST(LexerTest, AllTokens) {
std::vector<std::pair<int /* TokenKind */, std::string>> tokens;
for (const char* ch = valid_single_char_tokens; *ch; ch++) {
tokens.emplace_back(*ch, std::string(1, *ch));
}
#define ADD_TOKEN(tok, _, tokstring) \
if (*tokstring) { \
tokens.emplace_back(tok, tokstring); \
}
TC_FORALL_TOKEN_KINDS(ADD_TOKEN);
#undef ADD_TOKEN
for (const auto& [kind, token] : tokens) {
Lexer l(std::make_shared<Source>(token));
const auto& tok = l.cur();
EXPECT_EQ(kind, tok.kind) << tok.range.text().str();
EXPECT_EQ(token, tok.range.text().str()) << tok.range.text().str();
l.next();
EXPECT_EQ(l.cur().kind, TK_EOF);
}
}
TEST(LexerTest, SlightlyOffIsNot) {
std::vector<std::string> suffixes = {"", " ", "**"};
for (const auto& suffix : suffixes) {
std::vector<std::string> extras = {"n", "no", "no3"};
for (const auto& extra : extras) {
std::string s = "is " + extra + suffix;
Lexer l(std::make_shared<Source>(s));
const auto& is_tok = l.next();
EXPECT_EQ(is_tok.kind, TK_IS) << is_tok.range.text().str();
const auto& no_tok = l.cur();
EXPECT_EQ(no_tok.kind, TK_IDENT) << no_tok.range.text().str();
EXPECT_EQ(no_tok.range.text().str(), extra) << no_tok.range.text().str();
}
}
}
TEST(LexerTest, SlightlyOffNotIn) {
std::vector<std::string> suffixes = {"", " ", "**"};
for (const auto& suffix : suffixes) {
std::vector<std::string> extras = {"i", "i3"};
for (const auto& extra : extras) {
std::string s = "not " + extra + suffix;
Lexer l(std::make_shared<Source>(s));
const auto& not_tok = l.next();
EXPECT_EQ(not_tok.kind, TK_NOT) << not_tok.range.text().str();
const auto& in_tok = l.cur();
EXPECT_EQ(in_tok.kind, TK_IDENT) << in_tok.range.text().str();
EXPECT_EQ(in_tok.range.text().str(), extra) << in_tok.range.text().str();
}
}
}
TEST(LexerTest, IsNoteBug) {
// The code string `is note` is lexed as TK_ISNOT followed by a
// TK_IDENT that is an e. This is not how it works in Python, but
// presumably we need to maintain this behavior.
Lexer l(std::make_shared<Source>("is note"));
const auto is_not_tok = l.next();
EXPECT_EQ(is_not_tok.kind, TK_ISNOT);
const auto e_tok = l.next();
EXPECT_EQ(e_tok.kind, TK_IDENT);
EXPECT_EQ(e_tok.range.text(), "e");
const auto eof_tok = l.next();
EXPECT_EQ(eof_tok.kind, TK_EOF);
}
TEST(LexerTest, NotInpBug) {
// Another manifestation of the above IsNoteBug; `not inp` is lexed
// as TK_NOT_IN followed by a TK_IDENT that is a p. Again, not how
// it works in Python.
Lexer l(std::make_shared<Source>("not inp"));
const auto not_in_tok = l.next();
EXPECT_EQ(not_in_tok.kind, TK_NOTIN);
const auto p_tok = l.next();
EXPECT_EQ(p_tok.kind, TK_IDENT);
EXPECT_EQ(p_tok.range.text(), "p");
const auto eof_tok = l.next();
EXPECT_EQ(eof_tok.kind, TK_EOF);
}
} // namespace torch::jit