forked from TheAlgorithms/C-Sharp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
BoyerMoore.cs
167 lines (141 loc) · 5.79 KB
/
BoyerMoore.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
using System;
namespace Algorithms.Strings
{
/// <summary>
/// The idea: You compare the pattern with the text from right to left.
/// If the text symbol that is compared with the rightmost pattern symbol
/// does not occur in the pattern at all, then the pattern can be shifted
/// by m positions behind this text symbol.
/// Complexity:
/// Time: Preprocessing: O(m²)
/// Comparison: O(mn)
/// Space: O(m + a)
/// where m - pattern length
/// n - text length
/// a - alphabet length.
/// Source: https://www.inf.hs-flensburg.de/lang/algorithmen/pattern/bmen.htm
/// https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm.
/// </summary>
public static class BoyerMoore
{
/// <summary>
/// Finds the index of the first occurrence of the pattern <c>p</c> in <c>t</c>.
/// </summary>
/// <param name="t">Input text.</param>
/// <param name="p">Search pattern.</param>
/// <returns>Index of the pattern in text or -1 if the pattern was not found.</returns>
public static int FindFirstOccurrence(string t, string p)
{
// Pattern length
var m = p.Length;
// Text length
var n = t.Length;
// For each symbol of the alphabet, the position of its rightmost occurrence in the pattern,
// or -1 if the symbol does not occur in the pattern.
int[] badChar = BadCharacterRule(p, m);
// Each entry goodSuffix[i] contains the shift distance of the pattern
// if a mismatch at position i – 1 occurs, i.e. if the suffix of the pattern starting at position i has matched.
int[] goodSuffix = GoodSuffixRule(p, m);
// Index in text
var i = 0;
// Index in pattern
int j;
while (i <= n - m)
{
// Starting at end of pattern
j = m - 1;
// While matching
while (j >= 0 && p[j] == t[i + j])
{
j--;
}
// Pattern found
if (j < 0)
{
return i;
}
// Pattern is shifted by the maximum of the values
// given by the good-suffix and the bad-character heuristics
i += Math.Max(goodSuffix[j + 1], j - badChar[t[i + j]]);
}
// Pattern not found
return -1;
}
/// <summary>
/// Finds out the position of its rightmost occurrence in the pattern for each symbol of the alphabet,
/// or -1 if the symbol does not occur in the pattern.
/// </summary>
/// <param name="p">Search pattern.</param>
/// <param name="m">Length of the pattern.</param>
/// <returns>Array of the named postition for each symbol of the alphabet.</returns>
private static int[] BadCharacterRule(string p, int m)
{
// For each character (note that there are more than 256 characters)
int[] badChar = new int[256];
Array.Fill(badChar, -1);
// Iterate from left to right over the pattern
for (var j = 0; j < m; j++)
{
badChar[p[j]] = j;
}
return badChar;
}
/// <summary>
/// Finds out the shift distance of the pattern if a mismatch at position i – 1 occurs
/// for each character of the pattern, i.e. if the suffix of the pattern starting at position i has matched.
/// </summary>
/// <param name="p">Search pattern.</param>
/// <param name="m">Length of the pattern.</param>
/// <returns>Array of the named shift distance for each character of the pattern.</returns>
private static int[] GoodSuffixRule(string p, int m)
{
// CASE 1
// The matching suffix occurs somewhere else in the pattern
// --> matching suffix is a border of a suffix of the pattern
// f[i] contains starting position of the widest border of the suffix of the pattern beginning at position i
int[] f = new int[p.Length + 1];
// Suffix of p[m] has no border --> f[m] = m+1
f[m] = m + 1;
// Corresponding shift distance
int[] s = new int[p.Length + 1];
// Start of suffix including border of the pattern
// (hint: https://www.inf.hs-flensburg.de/lang/algorithmen/pattern/kmpen.htm#section2)
var i = m;
// Start of suffix of the pattern
var j = m + 1;
while (i > 0)
{
// checking if a shorter border that is already known can be extended to the left by the same symbol
while (j <= m && p[i - 1] != p[j - 1])
{
if (s[j] == 0)
{
s[j] = j - i;
}
j = f[j];
}
--i;
--j;
f[i] = j;
}
// CASE 2
// Only a part of the matching suffix occurs at the beginning of the pattern
// (filling remaining entries)
j = f[0];
for (i = 0; i <= m; i++)
{
// Starting postition of the greates border
if (s[i] == 0)
{
s[i] = j;
}
// From position i = j, it switches to the next narrower border f[j]
if (i == j)
{
j = f[j];
}
}
return s;
}
}
}