forked from mysticatea/regexpp
-
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathproperties.ts
185 lines (156 loc) · 8.25 KB
/
properties.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
/* This file was generated with ECMAScript specifications. */
class DataSet {
private _raw2018: string
private _set2018: Set<string> | undefined
private _raw2019: string
private _set2019: Set<string> | undefined
private _raw2020: string
private _set2020: Set<string> | undefined
private _raw2021: string
private _set2021: Set<string> | undefined
private _raw2022: string
private _set2022: Set<string> | undefined
private _raw2023: string
private _set2023: Set<string> | undefined
private _raw2024: string
private _set2024: Set<string> | undefined
private _raw2025: string
private _set2025: Set<string> | undefined
public constructor(
raw2018: string,
raw2019: string,
raw2020: string,
raw2021: string,
raw2022: string,
raw2023: string,
raw2024: string,
raw2025: string,
) {
this._raw2018 = raw2018
this._raw2019 = raw2019
this._raw2020 = raw2020
this._raw2021 = raw2021
this._raw2022 = raw2022
this._raw2023 = raw2023
this._raw2024 = raw2024
this._raw2025 = raw2025
}
public get es2018(): Set<string> {
return (
this._set2018 ?? (this._set2018 = new Set(this._raw2018.split(" ")))
)
}
public get es2019(): Set<string> {
return (
this._set2019 ?? (this._set2019 = new Set(this._raw2019.split(" ")))
)
}
public get es2020(): Set<string> {
return (
this._set2020 ?? (this._set2020 = new Set(this._raw2020.split(" ")))
)
}
public get es2021(): Set<string> {
return (
this._set2021 ?? (this._set2021 = new Set(this._raw2021.split(" ")))
)
}
public get es2022(): Set<string> {
return (
this._set2022 ?? (this._set2022 = new Set(this._raw2022.split(" ")))
)
}
public get es2023(): Set<string> {
return (
this._set2023 ?? (this._set2023 = new Set(this._raw2023.split(" ")))
)
}
public get es2024(): Set<string> {
return (
this._set2024 ?? (this._set2024 = new Set(this._raw2024.split(" ")))
)
}
public get es2025(): Set<string> {
return (
this._set2025 ?? (this._set2025 = new Set(this._raw2025.split(" ")))
)
}
}
const gcNameSet = new Set(["General_Category", "gc"])
const scNameSet = new Set(["Script", "Script_Extensions", "sc", "scx"])
const gcValueSets = new DataSet(
"C Cased_Letter Cc Cf Close_Punctuation Cn Co Combining_Mark Connector_Punctuation Control Cs Currency_Symbol Dash_Punctuation Decimal_Number Enclosing_Mark Final_Punctuation Format Initial_Punctuation L LC Letter Letter_Number Line_Separator Ll Lm Lo Lowercase_Letter Lt Lu M Mark Math_Symbol Mc Me Mn Modifier_Letter Modifier_Symbol N Nd Nl No Nonspacing_Mark Number Open_Punctuation Other Other_Letter Other_Number Other_Punctuation Other_Symbol P Paragraph_Separator Pc Pd Pe Pf Pi Po Private_Use Ps Punctuation S Sc Separator Sk Sm So Space_Separator Spacing_Mark Surrogate Symbol Titlecase_Letter Unassigned Uppercase_Letter Z Zl Zp Zs cntrl digit punct",
"",
"",
"",
"",
"",
"",
"",
)
const scValueSets = new DataSet(
"Adlam Adlm Aghb Ahom Anatolian_Hieroglyphs Arab Arabic Armenian Armi Armn Avestan Avst Bali Balinese Bamu Bamum Bass Bassa_Vah Batak Batk Beng Bengali Bhaiksuki Bhks Bopo Bopomofo Brah Brahmi Brai Braille Bugi Buginese Buhd Buhid Cakm Canadian_Aboriginal Cans Cari Carian Caucasian_Albanian Chakma Cham Cher Cherokee Common Copt Coptic Cprt Cuneiform Cypriot Cyrillic Cyrl Deseret Deva Devanagari Dsrt Dupl Duployan Egyp Egyptian_Hieroglyphs Elba Elbasan Ethi Ethiopic Geor Georgian Glag Glagolitic Gonm Goth Gothic Gran Grantha Greek Grek Gujarati Gujr Gurmukhi Guru Han Hang Hangul Hani Hano Hanunoo Hatr Hatran Hebr Hebrew Hira Hiragana Hluw Hmng Hung Imperial_Aramaic Inherited Inscriptional_Pahlavi Inscriptional_Parthian Ital Java Javanese Kaithi Kali Kana Kannada Katakana Kayah_Li Khar Kharoshthi Khmer Khmr Khoj Khojki Khudawadi Knda Kthi Lana Lao Laoo Latin Latn Lepc Lepcha Limb Limbu Lina Linb Linear_A Linear_B Lisu Lyci Lycian Lydi Lydian Mahajani Mahj Malayalam Mand Mandaic Mani Manichaean Marc Marchen Masaram_Gondi Meetei_Mayek Mend Mende_Kikakui Merc Mero Meroitic_Cursive Meroitic_Hieroglyphs Miao Mlym Modi Mong Mongolian Mro Mroo Mtei Mult Multani Myanmar Mymr Nabataean Narb Nbat New_Tai_Lue Newa Nko Nkoo Nshu Nushu Ogam Ogham Ol_Chiki Olck Old_Hungarian Old_Italic Old_North_Arabian Old_Permic Old_Persian Old_South_Arabian Old_Turkic Oriya Orkh Orya Osage Osge Osma Osmanya Pahawh_Hmong Palm Palmyrene Pau_Cin_Hau Pauc Perm Phag Phags_Pa Phli Phlp Phnx Phoenician Plrd Prti Psalter_Pahlavi Qaac Qaai Rejang Rjng Runic Runr Samaritan Samr Sarb Saur Saurashtra Sgnw Sharada Shavian Shaw Shrd Sidd Siddham SignWriting Sind Sinh Sinhala Sora Sora_Sompeng Soyo Soyombo Sund Sundanese Sylo Syloti_Nagri Syrc Syriac Tagalog Tagb Tagbanwa Tai_Le Tai_Tham Tai_Viet Takr Takri Tale Talu Tamil Taml Tang Tangut Tavt Telu Telugu Tfng Tglg Thaa Thaana Thai Tibetan Tibt Tifinagh Tirh Tirhuta Ugar Ugaritic Vai Vaii Wara Warang_Citi Xpeo Xsux Yi Yiii Zanabazar_Square Zanb Zinh Zyyy",
"Dogr Dogra Gong Gunjala_Gondi Hanifi_Rohingya Maka Makasar Medefaidrin Medf Old_Sogdian Rohg Sogd Sogdian Sogo",
"Elym Elymaic Hmnp Nand Nandinagari Nyiakeng_Puachue_Hmong Wancho Wcho",
"Chorasmian Chrs Diak Dives_Akuru Khitan_Small_Script Kits Yezi Yezidi",
"Cpmn Cypro_Minoan Old_Uyghur Ougr Tangsa Tnsa Toto Vith Vithkuqi",
"Gara Garay Gukh Gurung_Khema Hrkt Katakana_Or_Hiragana Kawi Kirat_Rai Krai Nag_Mundari Nagm Ol_Onal Onao Sunu Sunuwar Todhri Todr Tulu_Tigalari Tutg Unknown Zzzz",
"",
"",
)
const binPropertySets = new DataSet(
"AHex ASCII ASCII_Hex_Digit Alpha Alphabetic Any Assigned Bidi_C Bidi_Control Bidi_M Bidi_Mirrored CI CWCF CWCM CWKCF CWL CWT CWU Case_Ignorable Cased Changes_When_Casefolded Changes_When_Casemapped Changes_When_Lowercased Changes_When_NFKC_Casefolded Changes_When_Titlecased Changes_When_Uppercased DI Dash Default_Ignorable_Code_Point Dep Deprecated Dia Diacritic Emoji Emoji_Component Emoji_Modifier Emoji_Modifier_Base Emoji_Presentation Ext Extender Gr_Base Gr_Ext Grapheme_Base Grapheme_Extend Hex Hex_Digit IDC IDS IDSB IDST IDS_Binary_Operator IDS_Trinary_Operator ID_Continue ID_Start Ideo Ideographic Join_C Join_Control LOE Logical_Order_Exception Lower Lowercase Math NChar Noncharacter_Code_Point Pat_Syn Pat_WS Pattern_Syntax Pattern_White_Space QMark Quotation_Mark RI Radical Regional_Indicator SD STerm Sentence_Terminal Soft_Dotted Term Terminal_Punctuation UIdeo Unified_Ideograph Upper Uppercase VS Variation_Selector White_Space XIDC XIDS XID_Continue XID_Start space",
"Extended_Pictographic",
"",
"EBase EComp EMod EPres ExtPict",
"",
"",
"",
"",
)
const binPropertyOfStringsSets = new DataSet(
"",
"",
"",
"",
"",
"",
"Basic_Emoji Emoji_Keycap_Sequence RGI_Emoji RGI_Emoji_Flag_Sequence RGI_Emoji_Modifier_Sequence RGI_Emoji_Tag_Sequence RGI_Emoji_ZWJ_Sequence",
"",
)
export function isValidUnicodeProperty(
version: number,
name: string,
value: string,
): boolean {
if (gcNameSet.has(name)) {
return version >= 2018 && gcValueSets.es2018.has(value)
}
if (scNameSet.has(name)) {
return (
(version >= 2018 && scValueSets.es2018.has(value)) ||
(version >= 2019 && scValueSets.es2019.has(value)) ||
(version >= 2020 && scValueSets.es2020.has(value)) ||
(version >= 2021 && scValueSets.es2021.has(value)) ||
(version >= 2022 && scValueSets.es2022.has(value)) ||
(version >= 2023 && scValueSets.es2023.has(value))
)
}
return false
}
export function isValidLoneUnicodeProperty(
version: number,
value: string,
): boolean {
return (
(version >= 2018 && binPropertySets.es2018.has(value)) ||
(version >= 2019 && binPropertySets.es2019.has(value)) ||
(version >= 2021 && binPropertySets.es2021.has(value))
)
}
export function isValidLoneUnicodePropertyOfString(
version: number,
value: string,
): boolean {
return version >= 2024 && binPropertyOfStringsSets.es2024.has(value)
}