From 12785b8a86908bf1944c34d3e9755a8da774384f Mon Sep 17 00:00:00 2001 From: Nick Frasser Date: Sat, 25 Feb 2023 12:50:10 -0500 Subject: [PATCH 1/3] Improved parsing of URLs with symbols --- packages/linkifyjs/src/parser.js | 7 +++++-- test/spec/linkifyjs/parser.test.js | 12 ++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/packages/linkifyjs/src/parser.js b/packages/linkifyjs/src/parser.js index f8b9d74..d97f8f9 100644 --- a/packages/linkifyjs/src/parser.js +++ b/packages/linkifyjs/src/parser.js @@ -184,11 +184,11 @@ export function init({ groups }) { tt(DomainDotTldColonPort, tk.SLASH, Url); // Note that domains that begin with schemes are treated slighly differently - const UriPrefix = tt(Scheme, tk.COLON); // e.g., 'mailto:' or 'http://' + const SchemeColon = tt(Scheme, tk.COLON); // e.g., 'mailto:' const SlashSchemeColon = tt(SlashScheme, tk.COLON); // e.g., 'http:' const SlashSchemeColonSlash = tt(SlashSchemeColon, tk.SLASH); // e.g., 'http:/' - tt(SlashSchemeColonSlash, tk.SLASH, UriPrefix); + const UriPrefix = tt(SlashSchemeColonSlash, tk.SLASH); // e.g., 'http://' // Scheme states can transition to domain states ta(Scheme, groups.domain, Domain); @@ -199,7 +199,10 @@ export function init({ groups }) { tt(SlashScheme, tk.HYPHEN, DomainHyphen); // Force URL with scheme prefix followed by anything sane + ta(SchemeColon, groups.domain, Url); + tt(SchemeColon, tk.SLASH, Url); ta(UriPrefix, groups.domain, Url); + ta(UriPrefix, qsAccepting, Url); tt(UriPrefix, tk.SLASH, Url); // URL, followed by an opening bracket diff --git a/test/spec/linkifyjs/parser.test.js b/test/spec/linkifyjs/parser.test.js index 3a91aef..9a31a02 100644 --- a/test/spec/linkifyjs/parser.test.js +++ b/test/spec/linkifyjs/parser.test.js @@ -220,6 +220,10 @@ const tests = [ '~@example.org', [Email], ['~@example.org'] + ], [ + '~emersion/soju-dev@lists.sr.ht', + [Email], + ['~emersion/soju-dev@lists.sr.ht'] ], [ 'test@example2.com', [Email], @@ -228,10 +232,6 @@ const tests = [ 'noreply@500px.so', [Email], ['noreply@500px.so'] - ], [ - '~emersion/soju-dev@lists.sr.ht', - [Email], - ['~emersion/soju-dev@lists.sr.ht'] ], [ 'http@example.com', [Email], @@ -264,6 +264,10 @@ const tests = [ 'Hello\nWorld', [Text, Nl, Text], ['Hello', '\n', 'World'], + ], [ + 'And http://↑↑↓↓←→←→ba.tk/ is also a URL', + [Text, Url, Text], + ['And ', 'http://↑↑↓↓←→←→ba.tk/', ' is also a URL'] ] ]; From 0cfea9f9766c8288b415396f4aad2e00e737d557 Mon Sep 17 00:00:00 2001 From: Nick Frasser Date: Sat, 25 Mar 2023 10:01:05 -0400 Subject: [PATCH 2/3] Remove NUM from LocalPartAccepting token group Interferes with domain tokens when parsing numbers such as www.drive1.com --- packages/linkifyjs/src/parser.js | 1 - test/spec/linkifyjs/parser.test.js | 6 +++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/linkifyjs/src/parser.js b/packages/linkifyjs/src/parser.js index d97f8f9..e135407 100644 --- a/packages/linkifyjs/src/parser.js +++ b/packages/linkifyjs/src/parser.js @@ -81,7 +81,6 @@ export function init({ groups }) { tk.DOLLAR, tk.EQUALS, tk.HYPHEN, - tk.NUM, tk.OPENBRACE, tk.PERCENT, tk.PIPE, diff --git a/test/spec/linkifyjs/parser.test.js b/test/spec/linkifyjs/parser.test.js index 9a31a02..ee12d75 100644 --- a/test/spec/linkifyjs/parser.test.js +++ b/test/spec/linkifyjs/parser.test.js @@ -268,7 +268,11 @@ const tests = [ 'And http://↑↑↓↓←→←→ba.tk/ is also a URL', [Text, Url, Text], ['And ', 'http://↑↑↓↓←→←→ba.tk/', ' is also a URL'] - ] + ], [ + 'This Url www.drive1.com with www and digits also www.500px.com', + [Text, Url, Text, Url], + ['This Url ', 'www.drive1.com', ' with www and digits also ', 'www.500px.com'] + ], ]; From 844921ca1e91e26d1b57abce91cde3a39974c2c5 Mon Sep 17 00:00:00 2001 From: Nick Frasser Date: Sat, 25 Mar 2023 10:01:27 -0400 Subject: [PATCH 3/3] Complex email HTML test --- test/spec/html/email.html | 36 ++++++++++++++++++++++++++++++++++ test/spec/html/options.js | 1 + test/spec/linkify-html.test.js | 4 ++++ 3 files changed, 41 insertions(+) create mode 100644 test/spec/html/email.html diff --git a/test/spec/html/email.html b/test/spec/html/email.html new file mode 100644 index 0000000..781b573 --- /dev/null +++ b/test/spec/html/email.html @@ -0,0 +1,36 @@ + + +-
                                                           
+ + diff --git a/test/spec/html/options.js b/test/spec/html/options.js index 8264422..0862616 100644 --- a/test/spec/html/options.js +++ b/test/spec/html/options.js @@ -17,6 +17,7 @@ export default { .split('\n'), extra: fs.readFileSync(__dirname + '/extra.html', 'utf8').trim(), // for jQuery plugin tests + email: fs.readFileSync(__dirname + '/email.html', 'utf8').trim(), // for linkify-html performance tests altOptions: { className: 'linkified', rel: 'nofollow', diff --git a/test/spec/linkify-html.test.js b/test/spec/linkify-html.test.js index d3011e4..d14afd3 100644 --- a/test/spec/linkify-html.test.js +++ b/test/spec/linkify-html.test.js @@ -199,4 +199,8 @@ describe('linkify-html', () => { const input = '這禮拜是我們新的循環 (3/23-4/19), 我將於這週日給 Jeffrey 補課,並且我們會在這期間選另外一個可以上課的日期。'; expect(linkifyHtml(input)).to.be.ok; }); + + it('Handles complex email page', () => { + expect(linkifyHtml(htmlOptions.email)).to.be.ok; + }); });