-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtoken8stream.h
154 lines (114 loc) · 4.57 KB
/
token8stream.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#ifndef _H_TOKEN_STREAM
#define _H_TOKEN_STREAM
#include <unordered_map>
#include <vector>
#include "re8map.h"
#include "token8.h"
#include "ustr_data.h"
namespace pun {
/*!
Parse a UTF-8 stream into a series of mapped token Ids,
according to what regular expressions, and single characters seem appropriate.
*/
class Token8Stream : public Php::Base {
public:
static const char* PHP_NAME;
static void setup_ext(Php::Extension& ext);
Token8Stream();
~Token8Stream();
// set means of various integer token ids
// End of Stream/String
void setEOSId(Php::Parameters& params);
// Newline
void setEOLId(Php::Parameters& params);
// Not a mapped id, not a singles id
void setUnknownId(Php::Parameters& params);
// Set ordered array of integers
void setExpSet(Php::Parameters& params);
// Get ordered array of integer map Ids
Php::Value getExpSet();
// set array of chr => integer
void setSingles(Php::Parameters& params);
// set sharable expression map object
void setRe8map(Php::Parameters& params);
// set input string for processing
void setInput(Php::Parameters& params);
// EOS not yet encountered
Php::Value hasPendingTokens() const;
// Pass a Token8, get Id back, values in Token
Php::Value getToken(Php::Parameters& params) const;
// get rest of line for debugging
Php::Value beforeEOL();
Php::Value getOffset() const;
// list of map ids to iterate
void setIdList(Php::Parameters& params);
// return array of ordered map id's
Php::Value getIdList();
// Get source line number
Php::Value getLine() const;
// get current expression capture, or string chr
Php::Value getValue() const;
// get Id
Php::Value getId() const;
// first argument is Token8 - return same object with values
Php::Value peekToken(Php::Parameters& params);
// first argument is Token8, after a call to peekToken
// advance the stream with byteoffset implied by Token
void acceptToken(Php::Parameters& params);
Php::Value moveNextId();
// move the distance or not of a individual PCRE8
Php::Value moveRegex(Php::Parameters& params);
// move the distance or not of mapped PCRE8 id
Php::Value moveRegId(Php::Parameters& params);
public:
void setExpSet(const IdList& list);
void setString(const char* ptr, unsigned int len);
void fn_setString(Str_ptr& sp);
//void setString(std::string &&m);
int fn_moveNextId();
void fn_setEOS(int id) { _eosId = id; }
void fn_setEOL(int id) { _eolId = id; }
void fn_setUnknown(int id) { _unknownId = id; }
bool fn_moveRegId(int id);
int fn_getId() const { return _token._id;}
void fn_setMap(Re8map_share& sm) { _remap = sm; }
void fn_copyIdList(Php::Value& v);
int fn_firstMatch(Pcre8_match& matches);
char32_t fn_getChar32() const { return _token._unicode; }
uint64_t fn_getOffset() const;
svx::string_view fn_substr(size_t start, size_t len);
unsigned char fn_peekByte() const;
unsigned char fn_movePeekByte();
// return view offset of end string
uint64_t fn_size() const;
// return view pointer of start string
const char* fn_data() const;
void fn_addOffset(unsigned int offset);
std::string fn_beforeChar(char32_t c) const;
//std::string& fn_moveValue(std::string& val) { val = std::move(_token._value); return val;}
void fn_peekToken(Token8* token);
uint64_t fn_peekChar(Token8* token);
void fn_acceptToken(Token8* token);
void fn_setSingles(CharMap_sp& sp);
Token8* fn_getToken(Token8 &token);
svx::string_view fn_getValue();
private:
int matchSP(Pcre8_share& sp, Pcre8_match& result);
void fn_setString(const char* ptr, uint64_t len);
void checkLineFeed(Token8* token);
Str_ptr _str;
uint64_t _index;
uint64_t _size;
Re8map_share _remap;
IdList _idlist; // current id list for first match
Token8 _token;
Pcre8_match _caps;
CharMap_sp _singles;
int _unknownId;
int _eolId;
int _eosId;
unsigned int _tokenLine;
bool _flagLF;
};
};
#endif