This repository was archived by the owner on Nov 22, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWordManager.cs
320 lines (271 loc) · 10.5 KB
/
WordManager.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
using System.Collections.Generic;
namespace followthewhiterabbit
{
/// <summary>
/// Special manager class for handling string-code related transformation
/// Groups every word into special entities which are anagrams on their own
/// Ex: "part" and "trap" are anagrams of same word for the algorithm and they are both in english dictionary
/// </summary>
public class WordManager
{
#region Constants
/* bit index of characters
.. 76 5432 1098 7654 3210 ..bit indexes
ai lnoo prss tttt uuyw
*/
public const string Characters = "ailnooprssttttuuyw";
public const int Max = 0x3ffff; // letter bits set
public const int MaxPower = 18;
#endregion
public WordManager()
{
Words = new Dictionary<int, Word>();
}
#region Public Members
public IDictionary<int, Word> Words { get; private set; }
/// <summary>
/// Reads every word from provided word list and adds into dictionary if possible
/// </summary>
/// <param name="wordlist">Word list</param>
/// <returns>Count of available words</returns>
public int ReadWordsFrom(string wordlist)
{
string[] english = wordlist.Split('\r', '\n');
//string[] english = System.IO.File.ReadAllLines(file);
int count = 0;
foreach (string s in english)
{
if (AddWord(s))
count++;
}
return count;
}
/// <summary>
/// Adds a word node to word dictionary
/// </summary>
/// <param name="word">Word to add to dictionary</param>
/// <returns>True if word is available, false otherwise</returns>
public bool AddWord(string word)
{
int wordCode = CodeFromWord(word);
if (wordCode == -1)
return false;
Word w = null;
if (!Words.TryGetValue(wordCode, out w))
{
w = new Word(wordCode);
Words.Add(wordCode, w);
}
w.Add(word);
return true;
}
/// <summary>
/// Finds words within a mask
/// </summary>
/// <param name="mask">mask as an int</param>
/// <param name="maxLength">Max length that word could be</param>
/// <returns>List of possible words</returns>
public IEnumerable<int> FindWords(int mask, int maxLength, int minLength)
{
foreach (Word w in Words.Values)
{
if (w.Power > maxLength)
continue;
if (w.Power < minLength)
continue;
if (w.IsAvailable(mask))
yield return w.Code;
}
}
#endregion
#region Static Members
/// <summary>
/// Finds power of a word given as integer, counts bits
/// </summary>
/// <param name="code">Word code</param>
/// <returns>Power of a word</returns>
public static int PowerOfWordCode(int code)
{
int power = 0;
for (int i = 0, c = 0x1; i < sizeof(int) * 8; i++)
{
if ((code & (c << i)) == (c << i))
++power;
}
return power;
}
/// <summary>
/// Calculates word from given bitset code
/// </summary>
/// <param name="code">Word code</param>
/// <returns>Letters of the word</returns>
public static string WordFromCode(int code)
{
string res = "";
for (int i = 0, c = 0x1; i < sizeof(int) * 8; i++)
{
if ((code & (c << i)) == (c << i))
res += Characters[Characters.Length - (i + 1)];
else
res += "_";
}
return res;
}
/// <summary>
/// Gets the single bit set code for a letter
/// Duplicated letters return the lowest bit set but 'dup' flag set
/// </summary>
/// <param name="c">Character</param>
/// <param name="dup">Duplicated flag</param>
/// <returns>Single bit set int or -1 if letter is unknown</returns>
private static int CodeFromLetter(char c, out bool dup)
{
dup = false;
// hardcoded for performance
switch (c)
{
case 'a': return 0x1 << 17;
case 'i': return 0x1 << 16;
case 'l': return 0x1 << 15;
case 'n': return 0x1 << 14;
case 'o': dup = true; return 0x1 << 12; // 12,13
case 'p': return 0x1 << 11;
case 'r': return 0x1 << 10;
case 's': dup = true; return 0x1 << 8; // 8,9
case 't': dup = true; return 0x1 << 4; // 4,5,6,7
case 'u': dup = true; return 0x1 << 2; // 2,3
case 'y': return 0x1 << 1;
case 'w': return 0x1 << 0;
default: return -1;
}
}
/// <summary>
/// Creates a code from a word with lowests bits set (for duplicate letters)
/// -1 if word cannot be made from the available characters
/// </summary>
/// <param name="word">Word</param>
/// <returns>-1 or word code</returns>
public static int CodeFromWord(string word)
{
string temp = word.Replace("\'", "").ToLower();
int wordCode = 0x0;
int available = Max;
// hardcoded for performance
int bit_o = 0;
int bit_s = 0;
int bit_t = 0;
int bit_u = 0;
for (int i = 0; i < temp.Length; i++)
{
char c = temp[i];
bool dup = false;
int charCode = CodeFromLetter(c, out dup);
// our characters cannot make up this word, anagram failure
if (charCode == -1) return -1;
// letter is not available, duplicated but not available
if ((available & charCode) != charCode)
return -1;
if (!dup)
available = available & ~charCode;
// for duplicated letters only
if (dup)
{
// even for duplicated chars we cannot make this word, we cannot make "sooon" for example
if ((bit_o == 2 && c == 'o') ||
(bit_s == 2 && c == 's') ||
(bit_t == 4 && c == 't') ||
(bit_u == 2 && c == 'u'))
return -1;
if (c == 'o')
{
charCode = charCode << bit_o;
bit_o++;
}
else if (c == 's')
{
charCode = charCode << bit_s;
bit_s++;
}
else if (c == 't')
{
charCode = charCode << bit_t;
bit_t++;
}
else if (c == 'u')
{
charCode = charCode << bit_u;
bit_u++;
}
if (bit_o == 2 || bit_s == 2 || bit_t == 4 || bit_u == 2)
available = available & ~charCode; // remove duplicated letter from available
}
wordCode = wordCode | charCode;
}
return wordCode;
}
/// <summary>
/// Counts set bits at specified index and count
/// </summary>
/// <param name="code">Value to check</param>
/// <param name="index">Start bit index</param>
/// <param name="count">Bit count to check</param>
/// <returns>Set bit count</returns>
private static int CountOfBits(int code, int index, int count)
{
int res = 0;
for (int i = 0, c = 0x1 << index; i < count; i++, c = c << 1)
if ((code & c) == c)
++res;
return res;
}
/// <summary>
/// Shifts partial bits of a code given
/// </summary>
/// <param name="code">Value to shift</param>
/// <param name="index">start bit index</param>
/// <param name="size">Max size of the block (more than 1 for duplicated letters)</param>
/// <param name="count">Shifting count</param>
/// <returns>The value with shifted bits</returns>
private static int ShiftBits(int code, int index, int size, int count)
{
if (size == count)
return code;
int block = 0;
for (int i = 0, c = 0x1; i < size; i++, index++)
block = block | (c << index);
int antiblock = ~block;
int inc = code & block;
int exc = code & antiblock;
inc = inc >> count;
return (code & exc) | inc;
}
/// <summary>
/// Uses up the wordCode bits from the actual code and also lowers the remaining high letter bits from actual if duplicated letters are not all used
/// </summary>
/// <param name="actual">Actual code (available letter code)</param>
/// <param name="wordCode">Parameter word code</param>
/// <returns>Remaining letter bit code</returns>
public static int SubtractCode(int actual, int wordCode)
{
// we cannot make up this word from the actual letters
if ((actual & wordCode) != wordCode)
return -1;
int temp = actual & ~wordCode;
// at this point temp contains the result but its duplicated letter bits are on highest bits so we need to make it lower now
// shift for o
int bitcount = CountOfBits(wordCode, 12, 2);
temp = ShiftBits(temp, 12, 2, bitcount);
// shift for s
bitcount = CountOfBits(wordCode, 8, 2);
temp = ShiftBits(temp, 8, 2, bitcount);
// shift for t
bitcount = CountOfBits(wordCode, 4, 4);
temp = ShiftBits(temp, 4, 4, bitcount);
// shift for u
bitcount = CountOfBits(wordCode, 2, 2);
temp = ShiftBits(temp, 2, 2, bitcount);
return temp;
}
#endregion
}
}