From 6217db4aba95f65852f487628676706c3d5c2a7c Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Thu, 15 Feb 2024 16:31:35 +0100 Subject: [PATCH 1/5] feat: basic `wordBoundary` implementations --- src/constructs/__tests__/anchors.test.tsx | 54 ++++++++++++++++++++--- src/constructs/anchors.ts | 12 +++++ src/index.ts | 2 +- test-utils/to-match-string.ts | 2 +- 4 files changed, 63 insertions(+), 7 deletions(-) diff --git a/src/constructs/__tests__/anchors.test.tsx b/src/constructs/__tests__/anchors.test.tsx index fe614d2..54bdc31 100644 --- a/src/constructs/__tests__/anchors.test.tsx +++ b/src/constructs/__tests__/anchors.test.tsx @@ -1,19 +1,63 @@ -import { endOfString, oneOrMore, startOfString } from '../..'; +import { + buildRegExp, + digit, + endOfString, + notWhitespace, + notWordBoundary, + oneOrMore, + startOfString, + wordBoundary, + zeroOrMore, +} from '../..'; -test('`startOfString` basic cases', () => { +test('`startOfString` pattern', () => { expect(startOfString).toEqualRegex(/^/); expect([startOfString, 'a', 'b']).toEqualRegex(/^ab/); }); -test('`startOfString` regex tests', () => { +test('`startOfString` matching', () => { expect([startOfString, oneOrMore('a')]).toMatchGroups('a aa aaa', ['a']); }); -test('`endOfString` basic cases', () => { +test('`endOfString` pattern', () => { expect(endOfString).toEqualRegex(/$/); expect(['a', 'b', endOfString]).toEqualRegex(/ab$/); }); -test('`endOfString` regex tests', () => { +test('`endOfString` matching', () => { expect([oneOrMore('a'), endOfString]).toMatchGroups('a aa aaa', ['aaa']); }); + +test('`wordBoundary` pattern', () => { + expect(wordBoundary).toEqualRegex(/\b/); + expect([wordBoundary, 'a', 'b']).toEqualRegex(/\bab/); +}); + +test('`wordBoundary` matching', () => { + expect( + buildRegExp([wordBoundary, 'a', zeroOrMore(notWhitespace)], { global: true }), + ).toMatchGroups('a ba ab aa', ['a', 'ab', 'aa']); + + expect( + buildRegExp([zeroOrMore(notWhitespace), 'a', wordBoundary], { global: true }), + ).toMatchGroups('a ba ab aa', ['a', 'ba', 'aa']); +}); + +test('`notWordBoundary` pattern', () => { + expect(notWordBoundary).toEqualRegex(/\B/); + expect([notWordBoundary, 'a', 'b']).toEqualRegex(/\Bab/); + expect(['a', notWordBoundary, 'b']).toEqualRegex(/a\Bb/); + expect(['a', 'b', notWordBoundary]).toEqualRegex(/ab\B/); +}); + +test('`notWordBoundary` matching', () => { + expect(buildRegExp([notWordBoundary, 'abc', digit], { global: true })).toMatchGroups( + 'abc1 xabc2 xxabc3', + ['abc2', 'abc3'], + ); + + expect(buildRegExp([digit, 'abc', notWordBoundary], { global: true })).toMatchGroups( + '1abc 2abcx 3abcxx', + ['2abc', '3abc'], + ); +}); diff --git a/src/constructs/anchors.ts b/src/constructs/anchors.ts index 6b7d822..d2ab45e 100644 --- a/src/constructs/anchors.ts +++ b/src/constructs/anchors.ts @@ -18,6 +18,18 @@ export const endOfString: Anchor = { encode: encodeAnchor, }; +export const wordBoundary: Anchor = { + type: 'anchor', + symbol: '\\b', + encode: encodeAnchor, +}; + +export const notWordBoundary: Anchor = { + type: 'anchor', + symbol: '\\B', + encode: encodeAnchor, +}; + function encodeAnchor(this: Anchor): EncodeResult { return { precedence: 'sequence', diff --git a/src/index.ts b/src/index.ts index 6441b91..1ba53a5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -2,7 +2,7 @@ export type * from './types'; export { buildPattern, buildRegExp } from './builders'; -export { endOfString, startOfString } from './constructs/anchors'; +export { endOfString, notWordBoundary, startOfString, wordBoundary } from './constructs/anchors'; export { capture } from './constructs/capture'; export { any, diff --git a/test-utils/to-match-string.ts b/test-utils/to-match-string.ts index badd266..0bae22d 100644 --- a/test-utils/to-match-string.ts +++ b/test-utils/to-match-string.ts @@ -17,7 +17,7 @@ export function toMatchString( message: () => this.utils.matcherHint('toMatchString', undefined, undefined, options) + '\n\n' + - `Expected string: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expected)}\n` + + `Expected: ${this.isNot ? 'not ' : ''} matching ${this.utils.printExpected(expected)}\n` + `Received pattern: ${this.utils.printReceived(receivedRegex.source)}`, }; } From 418fe1ff1e138d8265b782b112d7ac216a7aee3d Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Wed, 28 Feb 2024 16:50:09 +0100 Subject: [PATCH 2/5] docs: update examples --- docs/Examples.md | 75 +++++++++++++++++++++----- src/__tests__/example-find-suffixes.ts | 24 +++++++++ src/__tests__/example-find-words.ts | 23 ++++++++ 3 files changed, 109 insertions(+), 13 deletions(-) create mode 100644 src/__tests__/example-find-suffixes.ts create mode 100644 src/__tests__/example-find-words.ts diff --git a/docs/Examples.md b/docs/Examples.md index 6498d3e..cd1f578 100644 --- a/docs/Examples.md +++ b/docs/Examples.md @@ -40,7 +40,7 @@ const regex = buildRegExp( { ignoreCase: true }, ); -const isValid = regex.test("#ffffff"); +const isValid = regex.test('#ffffff'); ``` Encoded regex: `/^#?(?:[a-f\d]{6}|[a-f\d]{3})$/i`. @@ -70,7 +70,7 @@ const regex = buildRegExp([ endOfString, ]); -const isValid = regex.test("https://hello.github.com"); +const isValid = regex.test('https://hello.github.com'); ``` Encoded regex: `/^(?:(?:http|https):\/\/)?(?:(?:[a-z\d]|[a-z\d][a-z\d-]*[a-z\d])\.)+[a-z][a-z\d]+$/`. @@ -100,7 +100,7 @@ const regex = buildRegExp( { ignoreCase: true }, ); -const isValid = regex.test("user@example.com"); +const isValid = regex.test('user@example.com'); ``` Encoded regex: `/^[a-z\d._%+-]+@[a-z\d.-]+\.[a-z]{2,}$/i`. @@ -126,7 +126,7 @@ const regex = buildRegExp([ endOfString, ]); -const isValid = regex.test("1.0e+27"); +const isValid = regex.test('1.0e+27'); ``` Encoded regex: `/^[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$/`. @@ -190,6 +190,7 @@ See tests: [example-regexp.ts](../src/__tests__/example-regexp.ts). ## Simple password validation This regex corresponds to following password policy: + - at least one uppercase letter - at least one lowercase letter - at least one digit @@ -205,16 +206,16 @@ const atLeastEightChars = /.{8,}/; // Match const validPassword = buildRegExp([ - startOfString, - atLeastOneUppercase, - atLeastOneLowercase, - atLeastOneDigit, - atLeastOneSpecialChar, - atLeastEightChars, - endOfString + startOfString, + atLeastOneUppercase, + atLeastOneLowercase, + atLeastOneDigit, + atLeastOneSpecialChar, + atLeastEightChars, + endOfString, ]); -const isValid = regex.test("Aa$123456"); +const isValid = regex.test('Aa$123456'); ``` Encoded regex: `/^(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[^A-Za-z0-9\s])(?:.{8,})$/`. @@ -243,9 +244,57 @@ const currencyRegex = buildRegExp([ endOfString, ]); -const isValid = regex.test("£1,000"); +const isValid = regex.test('£1,000'); ``` Encoded regex: `/(?<=[$€£¥R₿])\s?\d{1,3}(?:,?\d{3})*(?:\.\d{2})?$/`. See tests: [example-currency.ts](../src/__tests__/example-currency.ts). + +## Finding specific whole words + +Ignoring cases where given word is part of a bigger word. + +```ts +const wordsToFind = ['word', 'date']; + +const regex = buildRegExp([ + wordBoundary, // match whole words only + choiceOf(...wordsToFind), + wordBoundary, +]); + +expect(regex).toMatchString('word'); +expect(regex).toMatchString('date'); + +expect(regex).not.toMatchString('sword'); +expect(regex).not.toMatchString('update'); +``` + +Encoded regex: `/\b(?:word|date)\b/`. + +See tests: [example-find-words.ts](../src/__tests__/example-find-words.ts). + +## Finding specific suffixes + +Ignoring cases where given word is part of a bigger word. + +```ts +const suffixesToFind = ['acy', 'ism']; + +const regex = buildRegExp([ + notWordBoundary, // match suffixes only + choiceOf(...suffixesToFind), + wordBoundary, +]); + +expect(regex).toMatchString('privacy '); +expect(regex).toMatchString('democracy'); + +expect(regex).not.toMatchString('acy'); +expect(regex).not.toMatchString('ism'); +``` + +Encoded regex: `/\B(?:acy|ism)\b/`. + +See tests: [example-find-suffixes.ts](../src/__tests__/example-find-suffixes.ts). diff --git a/src/__tests__/example-find-suffixes.ts b/src/__tests__/example-find-suffixes.ts new file mode 100644 index 0000000..b8f0fed --- /dev/null +++ b/src/__tests__/example-find-suffixes.ts @@ -0,0 +1,24 @@ +import { buildRegExp, choiceOf, notWordBoundary, wordBoundary } from '..'; + +test('example: find words with suffix', () => { + const suffixesToFind = ['acy', 'ism']; + + const regex = buildRegExp([ + notWordBoundary, // match suffixes only + choiceOf(...suffixesToFind), + wordBoundary, + ]); + + expect(regex).toMatchString('democracy'); + expect(regex).toMatchString('Bureaucracy'); + expect(regex).toMatchString('abc privacy '); + expect(regex).toMatchString('abc dynamism'); + expect(regex).toMatchString('realism abc'); + expect(regex).toMatchString('abc modernism abc'); + + expect(regex).not.toMatchString('abc acy'); + expect(regex).not.toMatchString('ism abc'); + expect(regex).not.toMatchString('dynamisms'); + + expect(regex).toEqualRegex(/\B(?:acy|ism)\b/); +}); diff --git a/src/__tests__/example-find-words.ts b/src/__tests__/example-find-words.ts new file mode 100644 index 0000000..d7f1254 --- /dev/null +++ b/src/__tests__/example-find-words.ts @@ -0,0 +1,23 @@ +import { buildRegExp, choiceOf, wordBoundary } from '..'; + +test('example: find specific words', () => { + const wordsToFind = ['word', 'date']; + + const regex = buildRegExp([ + wordBoundary, // match whole words only + choiceOf(...wordsToFind), + wordBoundary, + ]); + + expect(regex).toMatchString('word'); + expect(regex).toMatchString('some date'); + expect(regex).toMatchString('date and word'); + + expect(regex).not.toMatchString('sword'); + expect(regex).not.toMatchString('keywords'); + expect(regex).not.toMatchString('words'); + expect(regex).not.toMatchString('update'); + expect(regex).not.toMatchString('dates'); + + expect(regex).toEqualRegex(/\b(?:word|date)\b/); +}); From aac612551c495ee933a07a1dea33a86c2ee75f45 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Wed, 28 Feb 2024 17:02:45 +0100 Subject: [PATCH 3/5] docs: update docs --- README.md | 10 ++++---- docs/API.md | 67 +++++++++++++++++++++++++++++++++-------------------- 2 files changed, 46 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 06dba71..32ef271 100644 --- a/README.md +++ b/README.md @@ -21,11 +21,7 @@ This library allows users to create regular expressions in a structured way, mak const hexColor = /^#?([a-fA-F0-9]{6}|[a-fA-F0-9]{3})$/; // TS Regex Builder DSL -const hexDigit = charClass( - charRange('a', 'f'), - charRange('A', 'F'), - charRange('0', '9'), -); +const hexDigit = charClass(charRange('a', 'f'), charRange('A', 'F'), charRange('0', '9')); const hexColor = buildRegExp([ startOfString, @@ -66,6 +62,7 @@ const regex = buildRegExp(['Hello ', capture(oneOrMore(word))]); TS Regex Builder allows you to build complex regular expressions using domain-specific language. Terminology: + - regex construct (`RegexConstruct`) - common name for all regex constructs like character classes, quantifiers, and anchors. - regex element (`RegexElement`) - a fundamental building block of a regular expression, defined as either a regex construct, a string, or `RegExp` literal (`/.../`). - regex sequence (`RegexSequence`) - a sequence of regex elements forming a regular expression. For developer convenience, it also accepts a single element instead of an array. @@ -73,6 +70,7 @@ Terminology: Most of the regex constructs accept a regex sequence as their argument. Examples of sequences: + - single element (construct): `capture('Hello')` - single element (string): `'Hello'` - single element (`RegExp` literal): `/Hello/` @@ -152,6 +150,7 @@ See [Character Classes API doc](./docs/API.md##character-classes) for more info. | --------------- | ------------ | ------------------------------------------------------------------------ | | `startOfString` | `^` | Match the start of the string (or the start of a line in multiline mode) | | `endOfString` | `$` | Match the end of the string (or the end of a line in multiline mode) | +| `wordBoundary` | `\b` | Match the start or end of a word without consuming characters | See [Anchors API doc](./docs/API.md#anchors) for more info. @@ -182,7 +181,6 @@ TS Regex Builder is inspired by [Swift Regex Builder API](https://developer.appl - [Swift Regex Builder API docs](https://developer.apple.com/documentation/regexbuilder) - [Swift Evolution 351: Regex Builder DSL](https://github.com/apple/swift-evolution/blob/main/proposals/0351-regex-builder.md) - --- Made with [create-react-native-library](https://github.com/callstack/react-native-builder-bob) diff --git a/docs/API.md b/docs/API.md index 7ab8ff4..a738084 100644 --- a/docs/API.md +++ b/docs/API.md @@ -14,8 +14,7 @@ Fundamental building blocks of a regular expression, defined as either a regex c The common type for all regex constructs like character classes, quantifiers, and anchors. You should not need to use this type directly, it is returned by all regex construct functions. -Note: the shape of the `RegexConstruct` is considered private and may change in a breaking way without a major release. We will focus on maintaining the compatibility of regexes built with - +Note: the shape of the `RegexConstruct` is considered private and may change in a breaking way without a major release. We will focus on maintaining the compatibility of regexes built with ## Builder @@ -133,14 +132,15 @@ Quantifiers in regex define the number of occurrences to match for a pattern. function zeroOrMore( sequence: RegexSequence, options?: { - greedy?: boolean, // default=true - } -): ZeroOrMore + greedy?: boolean; // default=true + }, +): ZeroOrMore; ``` Regex syntax: -* `x*` for default greedy behavior (match as many characters as possible) -* `x*?` for non-greedy behavior (match as few characters as possible) + +- `x*` for default greedy behavior (match as many characters as possible) +- `x*?` for non-greedy behavior (match as few characters as possible) The `zeroOrMore` quantifier matches zero or more occurrences of a given pattern, allowing a flexible number of repetitions of that element. @@ -150,14 +150,15 @@ The `zeroOrMore` quantifier matches zero or more occurrences of a given pattern, function oneOrMore( sequence: RegexSequence, options?: { - greedy?: boolean, // default=true - } -): OneOrMore + greedy?: boolean; // default=true + }, +): OneOrMore; ``` Regex syntax: -* `x+` for default greedy behavior (match as many characters as possible) -* `x+?` for non-greedy behavior (match as few characters as possible) + +- `x+` for default greedy behavior (match as many characters as possible) +- `x+?` for non-greedy behavior (match as few characters as possible) The `oneOrMore` quantifier matches one or more occurrences of a given pattern, allowing a flexible number of repetitions of that element. @@ -167,14 +168,15 @@ The `oneOrMore` quantifier matches one or more occurrences of a given pattern, a function optional( sequence: RegexSequence, options?: { - greedy?: boolean, // default=true - } -): Optionally + greedy?: boolean; // default=true + }, +): Optionally; ``` Regex syntax: -* `x?` for default greedy behavior (match as many characters as possible) -* `x??` for non-greedy behavior (match as few characters as possible) + +- `x?` for default greedy behavior (match as many characters as possible) +- `x??` for non-greedy behavior (match as few characters as possible) The `optional` quantifier matches zero or one occurrence of a given pattern, making it optional. @@ -183,17 +185,20 @@ The `optional` quantifier matches zero or one occurrence of a given pattern, mak ```ts function repeat( sequence: RegexSequence, - options: number | { - min: number; - max?: number; - greedy?: boolean; // default=true - }, -): Repeat + options: + | number + | { + min: number; + max?: number; + greedy?: boolean; // default=true + }, +): Repeat; ``` Regex syntax: -* `x{n}`, `x{min,}`, `x{min, max}` for default greedy behavior (match as many characters as possible) -* `x{min,}?`, `x{min, max}?` for non-greedy behavior (match as few characters as possible) + +- `x{n}`, `x{min,}`, `x{min, max}` for default greedy behavior (match as many characters as possible) +- `x{min,}?`, `x{min, max}?` for non-greedy behavior (match as few characters as possible) The `repeat` quantifier in regex matches either exactly `count` times or between `min` and `max` times. If only `min` is provided, it matches at least `min` times. @@ -301,3 +306,15 @@ const endOfString: Anchor; - `startOfString` anchor matches the start of a string (or line, if multiline mode is enabled). Regex syntax: `^`. - `endOfString` anchor matches the end of a string (or line, if multiline mode is enabled). Regex syntax: `$`. + +### Word boundary + +```ts +const wordBoundary: Anchor; +const notWordBoundary: Anchor; +``` + +- `wordBoundary` matches the positions where a word character is not followed or preceded by another word character, effectively indicating the start or end of a word. Regex syntax: `\b`. +- `notWordBoundary` matches the positions where a word character is followed or preceded by another word character, indicating that it is not at the start or end of a word. Regex syntax: `\B`. + +Note: word characters are letters, digits, and underscore (`_`). Other special characters like `#`, `$`, etc are not considered word characters. From e88fc4e84d28c04dea32177c6531589b0e9e2363 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Wed, 28 Feb 2024 17:09:33 +0100 Subject: [PATCH 4/5] refactor: improve codecov --- src/encoder/encoder.ts | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/encoder/encoder.ts b/src/encoder/encoder.ts index 5deec03..a459b39 100644 --- a/src/encoder/encoder.ts +++ b/src/encoder/encoder.ts @@ -49,13 +49,9 @@ function encodeText(text: string): EncodeResult { function encodeRegExp(regexp: RegExp): EncodeResult { const pattern = regexp.source; - if (pattern.length === 0) { - throw new Error('`encodeRegExp`: received regexp should not be empty'); - } - // Encode at safe precedence return { - precedence: isAtomicPattern(regexp.source) ? 'atom' : 'disjunction', + precedence: isAtomicPattern(pattern) ? 'atom' : 'disjunction', pattern, }; } From 43e025a7449784b45969877724e2d96bdedf8420 Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Wed, 28 Feb 2024 17:19:48 +0100 Subject: [PATCH 5/5] refactor: improve test naming --- src/constructs/__tests__/capture.test.tsx | 4 ++-- .../__tests__/character-class.test.ts | 18 +++++++------- src/constructs/__tests__/choice-of.test.ts | 8 +++---- src/constructs/__tests__/lookahead.test.ts | 14 +++++------ src/constructs/__tests__/lookbehind.test.ts | 12 +++++----- .../__tests__/negative-lookahead.test.ts | 10 ++++---- .../__tests__/negative-lookbehind.test.ts | 8 +++---- src/constructs/__tests__/quantifiers.test.tsx | 24 +++++++++++-------- src/constructs/__tests__/repeat.test.tsx | 8 +++---- 9 files changed, 55 insertions(+), 51 deletions(-) diff --git a/src/constructs/__tests__/capture.test.tsx b/src/constructs/__tests__/capture.test.tsx index 30ce902..422f27f 100644 --- a/src/constructs/__tests__/capture.test.tsx +++ b/src/constructs/__tests__/capture.test.tsx @@ -1,13 +1,13 @@ import { capture, oneOrMore } from '../..'; -test('`capture` base cases', () => { +test('`capture` pattern', () => { expect(capture('a')).toEqualRegex(/(a)/); expect(capture('abc')).toEqualRegex(/(abc)/); expect(capture(oneOrMore('abc'))).toEqualRegex(/((?:abc)+)/); expect(oneOrMore(capture('abc'))).toEqualRegex(/(abc)+/); }); -test('`capture` captures group', () => { +test('`capture` matching', () => { expect(capture('b')).toMatchGroups('ab', ['b', 'b']); expect(['a', capture('b')]).toMatchGroups('ab', ['ab', 'b']); expect(['a', capture('b'), capture('c')]).toMatchGroups('abc', ['abc', 'b', 'c']); diff --git a/src/constructs/__tests__/character-class.test.ts b/src/constructs/__tests__/character-class.test.ts index b25c39c..bc06abd 100644 --- a/src/constructs/__tests__/character-class.test.ts +++ b/src/constructs/__tests__/character-class.test.ts @@ -89,7 +89,7 @@ test('`charClass` throws on inverted arguments', () => { ); }); -test('`charRange` base cases', () => { +test('`charRange` pattern', () => { expect(charRange('a', 'z')).toEqualRegex(/[a-z]/); expect(['x', charRange('0', '9')]).toEqualRegex(/x[0-9]/); expect([charRange('A', 'F'), 'x']).toEqualRegex(/[A-F]x/); @@ -107,7 +107,7 @@ test('`charRange` throws on incorrect arguments', () => { ); }); -test('`anyOf` base cases', () => { +test('`anyOf` pattern', () => { expect(anyOf('a')).toEqualRegex(/a/); expect(['x', anyOf('a'), 'x']).toEqualRegex(/xax/); expect(anyOf('ab')).toEqualRegex(/[ab]/); @@ -115,21 +115,21 @@ test('`anyOf` base cases', () => { expect(['x', anyOf('ab'), 'x']).toEqualRegex(/x[ab]x/); }); -test('`anyOf` with quantifiers', () => { +test('`anyOf` pattern with quantifiers', () => { expect(['x', oneOrMore(anyOf('abc')), 'x']).toEqualRegex(/x[abc]+x/); expect(['x', optional(anyOf('abc')), 'x']).toEqualRegex(/x[abc]?x/); expect(['x', zeroOrMore(anyOf('abc')), 'x']).toEqualRegex(/x[abc]*x/); }); -test('`anyOf` escapes special characters', () => { +test('`anyOf` pattern escapes special characters', () => { expect(anyOf('abc-+.]\\')).toEqualRegex(/[abc+.\]\\-]/); }); -test('`anyOf` moves hyphen to the last position', () => { +test('`anyOf` pattern moves hyphen to the last position', () => { expect(anyOf('a-bc')).toEqualRegex(/[abc-]/); }); -test('`anyOf` edge case caret and hyphen', () => { +test('`anyOf` pattern edge case caret and hyphen', () => { expect(anyOf('^-')).toEqualRegex(/[\^-]/); expect(anyOf('-^')).toEqualRegex(/[\^-]/); expect(anyOf('-^a')).toEqualRegex(/[a^-]/); @@ -141,17 +141,17 @@ test('`anyOf` throws on empty text', () => { ); }); -test('`inverted` character class', () => { +test('`inverted` character class pattern', () => { expect(inverted(anyOf('a'))).toEqualRegex(/[^a]/); expect(inverted(anyOf('abc'))).toEqualRegex(/[^abc]/); }); -test('`inverted` character class double inversion', () => { +test('`inverted` character class pattern double inversion', () => { expect(inverted(inverted(anyOf('a')))).toEqualRegex(/a/); expect(inverted(inverted(anyOf('abc')))).toEqualRegex(/[abc]/); }); -test('`inverted` character class execution', () => { +test('`inverted` character class matching', () => { expect(inverted(anyOf('a'))).not.toMatchString('aa'); expect(inverted(anyOf('a'))).toMatchGroups('aba', ['b']); }); diff --git a/src/constructs/__tests__/choice-of.test.ts b/src/constructs/__tests__/choice-of.test.ts index c8234d9..cc2a3a6 100644 --- a/src/constructs/__tests__/choice-of.test.ts +++ b/src/constructs/__tests__/choice-of.test.ts @@ -1,13 +1,13 @@ import { choiceOf, oneOrMore, repeat, zeroOrMore } from '../..'; -test('`choiceOf` using basic strings', () => { +test('`choiceOf` pattern', () => { expect(choiceOf('a')).toEqualRegex(/a/); expect(choiceOf('a', 'b')).toEqualRegex(/a|b/); expect(choiceOf('a', 'b', 'c')).toEqualRegex(/a|b|c/); expect(choiceOf('aaa', 'bbb')).toEqualRegex(/aaa|bbb/); }); -test('`choiceOf` used in sequence', () => { +test('`choiceOf` pattern in sequence', () => { expect(['x', choiceOf('a'), 'x']).toEqualRegex(/xax/); expect([choiceOf('a', 'b'), 'x']).toEqualRegex(/(?:a|b)x/); expect(['x', choiceOf('a', 'b')]).toEqualRegex(/x(?:a|b)/); @@ -19,13 +19,13 @@ test('`choiceOf` used in sequence', () => { expect(choiceOf('aaa', 'bbb')).toEqualRegex(/aaa|bbb/); }); -test('`choiceOf` with sequence options', () => { +test('`choiceOf` pattern with sequence options', () => { expect([choiceOf(['a', 'b'])]).toEqualRegex(/ab/); expect([choiceOf(['a', 'b'], ['c', 'd'])]).toEqualRegex(/ab|cd/); expect([choiceOf(['a', zeroOrMore('b')], [oneOrMore('c'), 'd'])]).toEqualRegex(/ab*|c+d/); }); -test('`choiceOf` using nested regex', () => { +test('`choiceOf` pattern using nested regex', () => { expect(choiceOf(oneOrMore('a'), zeroOrMore('b'))).toEqualRegex(/a+|b*/); expect(choiceOf(repeat('a', { min: 1, max: 3 }), repeat('bx', 5))).toEqualRegex( /a{1,3}|(?:bx){5}/, diff --git a/src/constructs/__tests__/lookahead.test.ts b/src/constructs/__tests__/lookahead.test.ts index c16844e..64d76dc 100644 --- a/src/constructs/__tests__/lookahead.test.ts +++ b/src/constructs/__tests__/lookahead.test.ts @@ -3,38 +3,38 @@ import { digit, word } from '../character-class'; import { lookahead } from '../lookahead'; import { oneOrMore, zeroOrMore } from '../quantifiers'; -test('`Positive lookahead` base cases', () => { +test('`lookahead` pattern', () => { expect(lookahead('a')).toEqualRegex(/(?=a)/); expect([digit, lookahead('abc')]).toEqualRegex(/\d(?=abc)/); expect(lookahead(oneOrMore('abc'))).toEqualRegex(/(?=(?:abc)+)/); expect([zeroOrMore(word), lookahead('abc')]).toEqualRegex(/\w*(?=abc)/); }); -test('`Positive lookahead` use cases', () => { +test('`lookahead` matching', () => { expect([oneOrMore(digit), lookahead('$')]).toMatchString('1 turkey costs 30$'); expect(['q', lookahead('u')]).toMatchString('queen'); expect(['a', lookahead('b'), lookahead('c')]).not.toMatchString('abc'); expect(['a', lookahead(capture('bba'))]).toMatchGroups('abba', ['a', 'bba']); }); -test('`Positive lookahead` with multiple elements', () => { +test('`lookahead` matching with multiple elements', () => { expect(lookahead(['a', 'b', 'c'])).toEqualRegex(/(?=abc)/); }); -test('`Positive lookahead` with nested constructs', () => { +test('`lookahead` matching with nested constructs', () => { expect(lookahead(oneOrMore(capture('abc')))).toEqualRegex(/(?=(abc)+)/); expect(lookahead([zeroOrMore(word), capture('abc')])).toEqualRegex(/(?=\w*(abc))/); }); -test('`Positive lookahead` with special characters', () => { +test('`lookahead` matching with special characters', () => { expect(lookahead(['$', capture('abc')])).toEqualRegex(/(?=\$(abc))/); expect(lookahead(['q', capture('u')])).toEqualRegex(/(?=q(u))/); }); -test('`Positive lookahead` with capture group', () => { +test('`lookahead` matching with capture group', () => { expect(lookahead(capture('bba'))).toEqualRegex(/(?=(bba))/); }); -test('`Positive lookahead` with digit character class', () => { +test('`lookahead` matching with digit character class', () => { expect(lookahead([digit, 'abc'])).toEqualRegex(/(?=\dabc)/); }); diff --git a/src/constructs/__tests__/lookbehind.test.ts b/src/constructs/__tests__/lookbehind.test.ts index 897b438..16053be 100644 --- a/src/constructs/__tests__/lookbehind.test.ts +++ b/src/constructs/__tests__/lookbehind.test.ts @@ -2,14 +2,14 @@ import { anyOf, digit, whitespace, word } from '../character-class'; import { lookbehind } from '../lookbehind'; import { oneOrMore, zeroOrMore } from '../quantifiers'; -test('` lookbehind` base cases', () => { +test('`lookbehind` pattern', () => { expect(lookbehind('a')).toEqualRegex(/(?<=a)/); expect(lookbehind('abc')).toEqualRegex(/(?<=abc)/); expect(lookbehind(oneOrMore('abc'))).toEqualRegex(/(?<=(?:abc)+)/); expect(lookbehind('abc')).toEqualRegex(/(?<=abc)/); }); -test('`Positve lookbehind` use cases', () => { +test('`lookbehind` matching', () => { expect([zeroOrMore(whitespace), word, lookbehind('s'), oneOrMore(whitespace)]).toMatchString( 'too many cats to feed.', ); @@ -30,25 +30,25 @@ test('`Positve lookbehind` use cases', () => { expect([anyOf('+-'), oneOrMore(digit), lookbehind('-')]).not.toMatchString('+123'); }); -test('` lookbehind` with multiple elements', () => { +test('`lookbehind` matching with multiple elements', () => { expect(lookbehind(['abc', 'def'])).toEqualRegex(/(?<=abcdef)/); expect(lookbehind([oneOrMore('abc'), 'def'])).toEqualRegex(/(?<=(?:abc)+def)/); expect(lookbehind(['abc', oneOrMore('def')])).toEqualRegex(/(?<=abc(?:def)+)/); }); -test('` lookbehind` with special characters', () => { +test('`lookbehind` matching with special characters', () => { expect(lookbehind(['$', '+'])).toEqualRegex(/(?<=\$\+)/); expect(lookbehind(['[', ']'])).toEqualRegex(/(?<=\[\])/); expect(lookbehind(['\\', '\\'])).toEqualRegex(/(?<=\\\\)/); }); -test('` lookbehind` with quantifiers', () => { +test('`lookbehind` matching with quantifiers', () => { expect(lookbehind(zeroOrMore('abc'))).toEqualRegex(/(?<=(?:abc)*)/); expect(lookbehind(oneOrMore('abc'))).toEqualRegex(/(?<=(?:abc)+)/); expect(lookbehind(['abc', zeroOrMore('def')])).toEqualRegex(/(?<=abc(?:def)*)/); }); -test('` lookbehind` with character classes', () => { +test('`lookbehind` matching with character classes', () => { expect(lookbehind(word)).toEqualRegex(/(?<=\w)/); expect(lookbehind(whitespace)).toEqualRegex(/(?<=\s)/); expect(lookbehind(digit)).toEqualRegex(/(?<=\d)/); diff --git a/src/constructs/__tests__/negative-lookahead.test.ts b/src/constructs/__tests__/negative-lookahead.test.ts index dcea00e..3938117 100644 --- a/src/constructs/__tests__/negative-lookahead.test.ts +++ b/src/constructs/__tests__/negative-lookahead.test.ts @@ -3,14 +3,14 @@ import { oneOrMore, zeroOrMore } from '../quantifiers'; import { anyOf, digit } from '../character-class'; import { capture } from '../capture'; -test('`Negative Lookahead` base cases', () => { +test('`negativeLookahead` pattern', () => { expect(negativeLookahead('a')).toEqualRegex(/(?!a)/); expect(negativeLookahead('abc')).toEqualRegex(/(?!abc)/); expect(negativeLookahead(oneOrMore('abc'))).toEqualRegex(/(?!(?:abc)+)/); expect(oneOrMore(negativeLookahead('abc'))).toEqualRegex(/(?!abc)+/); }); -test('`Negative Lookahead` use cases', () => { +test('`negativeLookahead` matching', () => { expect([negativeLookahead('$'), oneOrMore(digit)]).toMatchString('1 turkey costs 30$'); expect([negativeLookahead('a'), 'b']).toMatchString('abba'); expect(['a', negativeLookahead(capture('bba'))]).not.toMatchGroups('abba', ['a', 'bba']); @@ -18,19 +18,19 @@ test('`Negative Lookahead` use cases', () => { expect([negativeLookahead('-'), anyOf('+-'), zeroOrMore(digit)]).toMatchString('+123'); }); -test('`Negative Lookahead` with multiple elements', () => { +test('`negativeLookahead` matching with multiple elements', () => { expect(negativeLookahead(['abc', 'def'])).toEqualRegex(/(?!abcdef)/); expect(negativeLookahead([oneOrMore('abc'), 'def'])).toEqualRegex(/(?!(?:abc)+def)/); expect(negativeLookahead(['abc', oneOrMore('def')])).toEqualRegex(/(?!abc(?:def)+)/); }); -test('`Negative Lookahead` with special characters', () => { +test('`negativeLookahead` matching with special characters', () => { expect(negativeLookahead(['$', '+'])).toEqualRegex(/(?!\$\+)/); expect(negativeLookahead(['[', ']'])).toEqualRegex(/(?!\[\])/); expect(negativeLookahead(['\\', '\\'])).toEqualRegex(/(?!\\\\)/); }); -test('`Negative Lookahead` with quantifiers', () => { +test('`negativeLookahead` matching with quantifiers', () => { expect(negativeLookahead(zeroOrMore('abc'))).toEqualRegex(/(?!(?:abc)*)/); expect(negativeLookahead(oneOrMore('abc'))).toEqualRegex(/(?!(?:abc)+)/); expect(negativeLookahead(['abc', zeroOrMore('def')])).toEqualRegex(/(?!abc(?:def)*)/); diff --git a/src/constructs/__tests__/negative-lookbehind.test.ts b/src/constructs/__tests__/negative-lookbehind.test.ts index 2b2e380..aa17c5d 100644 --- a/src/constructs/__tests__/negative-lookbehind.test.ts +++ b/src/constructs/__tests__/negative-lookbehind.test.ts @@ -1,25 +1,25 @@ import { negativeLookbehind } from '../negative-lookbehind'; import { oneOrMore } from '../quantifiers'; -test('`Negative Lookbehind` with single character', () => { +test('`negativeLookbehind` pattern', () => { expect(negativeLookbehind('a')).toEqualRegex(/(? { +test('`negativeLookbehind` matching with multiple characters', () => { expect(negativeLookbehind('abc')).toEqualRegex(/(? { +test('`negativeLookbehind` matching with quantifiers', () => { expect(negativeLookbehind(oneOrMore('abc'))).toEqualRegex(/(? { +test('`negativeLookbehind` matching with special characters', () => { expect(negativeLookbehind('-')).toEqualRegex(/(? { +test('`oneOrMore` quantifier pattern', () => { expect(oneOrMore('a')).toEqualRegex(/a+/); expect(oneOrMore('ab')).toEqualRegex(/(?:ab)+/); }); -test('`optional` quantifier', () => { +test('`optional` quantifier pattern', () => { expect(optional('a')).toEqualRegex(/a?/); expect(optional('ab')).toEqualRegex(/(?:ab)?/); }); -test('`zeroOrMore` quantifier', () => { +test('`zeroOrMore` quantifier pattern', () => { expect(zeroOrMore('a')).toEqualRegex(/a*/); expect(zeroOrMore('ab')).toEqualRegex(/(?:ab)*/); }); -test('`oneOrMore` does not generate capture when grouping', () => { +test('`oneOrMore` matching does not generate capture when grouping', () => { expect(oneOrMore('aa')).toMatchGroups('aa', ['aa']); }); -test('`optional` does not generate capture when grouping', () => { +test('`optional` matching does not generate capture when grouping', () => { expect(optional('aa')).toMatchGroups('aa', ['aa']); }); -test('`zeroOrMore` does not generate capture when grouping', () => { +test('`zeroOrMore` matching does not generate capture when grouping', () => { expect(zeroOrMore('aa')).toMatchGroups('aa', ['aa']); }); -test('base quantifiers optimize grouping for atoms', () => { +test('base quantifiers patterns optimize grouping for atoms', () => { expect(oneOrMore(digit)).toEqualRegex(/\d+/); expect(optional(digit)).toEqualRegex(/\d?/); expect(zeroOrMore(digit)).toEqualRegex(/\d*/); @@ -37,7 +37,7 @@ test('base quantifiers optimize grouping for atoms', () => { expect(zeroOrMore('a')).toEqualRegex(/a*/); }); -test('greedy quantifiers', () => { +test('greedy quantifiers patterns', () => { expect(oneOrMore('a', { greedy: true })).toEqualRegex(/a+/); expect(oneOrMore('ab', { greedy: true })).toEqualRegex(/(?:ab)+/); @@ -48,7 +48,7 @@ test('greedy quantifiers', () => { expect(zeroOrMore('ab', { greedy: true })).toEqualRegex(/(?:ab)*/); }); -test('non-greedy quantifiers', () => { +test('non-greedy quantifiers patterns', () => { expect(oneOrMore('a', { greedy: false })).toEqualRegex(/a+?/); expect(oneOrMore('ab', { greedy: false })).toEqualRegex(/(?:ab)+?/); @@ -59,11 +59,15 @@ test('non-greedy quantifiers', () => { expect(zeroOrMore('ab', { greedy: false })).toEqualRegex(/(?:ab)*?/); }); -test('showcase: greedy vs non-greedy quantifiers', () => { +test('greedy quantifiers matching', () => { const html = '
Hello World!
'; const greedyTag = buildRegExp(['<', oneOrMore(any), '>'], { global: true }); expect(greedyTag).toMatchGroups(html, ['
Hello World!
']); +}); + +test('non-greedy quantifiers matching', () => { + const html = '
Hello World!
'; const nonGreedyTag = buildRegExp(['<', oneOrMore(any, { greedy: false }), '>'], { global: true }); expect(nonGreedyTag).toMatchGroups(html, ['
', '', '', '
']); diff --git a/src/constructs/__tests__/repeat.test.tsx b/src/constructs/__tests__/repeat.test.tsx index be7cc37..af61fa4 100644 --- a/src/constructs/__tests__/repeat.test.tsx +++ b/src/constructs/__tests__/repeat.test.tsx @@ -1,6 +1,6 @@ import { digit, oneOrMore, repeat, zeroOrMore } from '../..'; -test('`repeat` quantifier', () => { +test('`repeat` quantifier pattern', () => { expect(['a', repeat('b', { min: 1, max: 5 })]).toEqualRegex(/ab{1,5}/); expect(['a', repeat('b', { min: 1 })]).toEqualRegex(/ab{1,}/); expect(['a', repeat('b', 1)]).toEqualRegex(/ab{1}/); @@ -9,7 +9,7 @@ test('`repeat` quantifier', () => { expect(repeat(['text', ' ', oneOrMore('d')], 5)).toEqualRegex(/(?:text d+){5}/); }); -test('`repeat` optimizes grouping for atoms', () => { +test('`repeat` pattern optimizes grouping for atoms', () => { expect(repeat(digit, 2)).toEqualRegex(/\d{2}/); expect(repeat(digit, { min: 2 })).toEqualRegex(/\d{2,}/); expect(repeat(digit, { min: 1, max: 5 })).toEqualRegex(/\d{1,5}/); @@ -21,12 +21,12 @@ test('`repeat` throws on no children', () => { ); }); -test('greedy `repeat` quantifier', () => { +test('greedy `repeat` quantifier pattern', () => { expect(repeat('a', { min: 1, greedy: true })).toEqualRegex(/a{1,}/); expect(repeat('a', { min: 1, max: 5, greedy: true })).toEqualRegex(/a{1,5}/); }); -test('non-greedy `repeat` quantifier', () => { +test('non-greedy `repeat` quantifier pattern', () => { expect(repeat('a', { min: 1, greedy: false })).toEqualRegex(/a{1,}?/); expect(repeat('a', { min: 1, max: 5, greedy: false })).toEqualRegex(/a{1,5}?/); });