-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrule.go
135 lines (117 loc) · 3.45 KB
/
rule.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
package syn
import (
"bytes"
"fmt"
"github.com/dlclark/regexp2"
)
// state represents a state of the lexer. A state consists of a group of rules that are attempted
// to be matched in order. A rule may push a new state onto a logical stack to change the set of rules
// to be used for matching. A rule may also pop Stats off the stack to return to a previous state.
type state struct {
name string
rules []rule
}
func (r state) match(text []rune) (*regexp2.Match, *rule) {
for i, rule := range r.rules {
debugf("State.match: for state %s trying rule %d /%s/\n", r.name, i, rule.pattern)
res, err := rule.match(text)
if res != nil && err == nil {
debugf("State.match: rule %d matched\n", i)
return res, &r.rules[i]
}
}
return nil, nil
}
func (s state) String() string {
var buf bytes.Buffer
fmt.Fprintf(&buf, "State %s:\n", s.name)
for i, v := range s.rules {
fmt.Fprintf(&buf, " rule %d: %s\n", i, v.String())
}
return buf.String()
}
// rules is the set of rules in a Lexer
type rules struct {
// Map of state names to rules in that state
rules map[string]state
}
// newRules creates an empty Rules
func newRules() rules {
return rules{rules: make(map[string]state)}
}
// NewRules adds a State to the rules
func (r *rules) AddState(s state) {
r.rules[s.name] = s
}
// Get retrieves the State with the specified name. If not found, ok is false.
func (r *rules) Get(stateName string) (stat state, ok bool) {
stat, ok = r.rules[stateName]
return
}
func (r *rules) Contains(stateName string) (ok bool) {
_, ok = r.rules[stateName]
return
}
func (r rules) String() string {
var buf bytes.Buffer
for _, v := range r.rules {
buf.WriteString(v.String())
}
return buf.String()
}
// A Rule specifies a regexp to match when lexing at the current position in the text, and an action
// to take if the regexp matches.
type rule struct {
pattern *regexp2.Regexp
tok TokenType
pushState string
popDepth int
byGroups []byGroupElement
include string
useSelfState string
}
func (r rule) String() string {
var buf bytes.Buffer
fmt.Fprintf(&buf, "(rule /%s/ tok: %s", r.pattern, r.tok)
if r.pushState != "" {
fmt.Fprintf(&buf, " push: %s", r.pushState)
}
fmt.Fprintf(&buf, " pop: %d", r.popDepth)
if r.byGroups != nil {
fmt.Fprintf(&buf, " bygroups size: %d", len(r.byGroups))
}
if r.include != "" {
fmt.Fprintf(&buf, " include: %s", r.include)
}
if r.useSelfState != "" {
fmt.Fprintf(&buf, " usingself: %s", r.useSelfState)
}
fmt.Fprintf(&buf, ")")
return buf.String()
}
// IsUseSelf returns true if the Rule specifies that the group should be handled by lexing
// the group text with a new instance of the lexer.
func (r rule) IsUseSelf() bool {
return r.useSelfState != ""
}
// Match attempts to match the rule. If it succeeds it returns a slice
// holding the index pairs identifying the
// leftmost match of the regular expression in b and the matches, if any, of
// its subexpressions like regexp.FindSubmatchIndex.
// Returns nil if there is no match.
func (r rule) match(text []rune) (*regexp2.Match, error) {
m, err := r.pattern.FindRunesMatch(text)
if m != nil && m.Index != 0 {
return nil, nil
}
return m, err
}
type byGroupElement struct {
tok TokenType
useSelfState string
}
// IsUseSelf returns true if the Rule specifies that the group should be handled by lexing
// the group text with a new instance of the lexer.
func (b byGroupElement) IsUseSelf() bool {
return b.useSelfState != ""
}