Skip to content

Commit a4ee0c3

Browse files
authored
Merge pull request #835 from streamich/normalize-html-import
Normalize HTML import
2 parents 3ec34f2 + 0679e0f commit a4ee0c3

File tree

5 files changed

+205
-27
lines changed

5 files changed

+205
-27
lines changed

src/json-crdt-extensions/peritext/registry/SliceRegistry.ts

+10-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1+
import {SliceBehavior} from '../slice/constants';
2+
import {CommonSliceType} from '../slice';
13
import type {PeritextMlElement} from '../block/types';
24
import type {NodeBuilder} from '../../../json-crdt-patch';
3-
import {SliceBehavior} from '../slice/constants';
45
import type {JsonMlElement} from 'very-small-parser/lib/html/json-ml/types';
56
import type {FromHtmlConverter, SliceTypeDefinition, ToHtmlConverter} from './types';
67

8+
/**
9+
* @todo Consider moving the registry under the `/transfer` directory.
10+
*/
711
export class SliceRegistry {
812
private map: Map<string | number, SliceTypeDefinition<any, any, any>> = new Map();
913
private toHtmlMap: Map<string | number, ToHtmlConverter<any>> = new Map();
@@ -14,17 +18,20 @@ export class SliceRegistry {
1418
def: SliceTypeDefinition<Type, Schema, Inline>,
1519
): void {
1620
const {type, toHtml, fromHtml} = def;
17-
this.map.set(type, def);
21+
const fromHtmlMap = this.fromHtmlMap;
1822
if (toHtml) this.toHtmlMap.set(type, toHtml);
1923
if (fromHtml) {
20-
const fromHtmlMap = this.fromHtmlMap;
2124
for (const htmlTag in fromHtml) {
2225
const converter = fromHtml[htmlTag];
2326
const converters = fromHtmlMap.get(htmlTag) ?? [];
2427
converters.push([def, converter]);
2528
fromHtmlMap.set(htmlTag, converters);
2629
}
2730
}
31+
const tag = CommonSliceType[type as any];
32+
if (tag && typeof tag === 'string') {
33+
fromHtmlMap.set(tag, [[def, () => [type, null]]]);
34+
}
2835
}
2936

3037
public def<Type extends number | string, Schema extends NodeBuilder, Inline extends boolean = true>(

src/json-crdt-extensions/peritext/transfer/__tests__/PeritextDataTransfer.spec.ts

+67-4
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ describe('Markdown', () => {
195195
expect(md2).toBe('ab\n\nc__123__\n\n++456++d');
196196
});
197197

198-
test.skip('can insert Markdown with inline line breaks', () => {
198+
test('can insert Markdown with inline line breaks', () => {
199199
const {peritext, transfer} = setup();
200200
const md = '1\n2\n3';
201201
peritext.strApi().ins(0, 'ab');
@@ -204,9 +204,72 @@ describe('Markdown', () => {
204204
peritext.refresh();
205205
const all = peritext.rangeAll()!;
206206
const html = transfer.toHtml(all);
207-
console.log(html);
208-
expect(html).toBe('<p>a123b</p>');
207+
expect(html).toBe('<p>a1 2 3b</p>');
209208
const md2 = transfer.toMarkdown(all);
210-
expect(md2).toBe('a123b');
209+
expect(md2).toBe('a1 2 3b');
210+
});
211+
212+
test('can insert a blockquote and a paragraph into empty string', () => {
213+
const {peritext, transfer} = setup();
214+
const md = '> blockquote';
215+
transfer.fromMarkdown(0, md);
216+
peritext.refresh();
217+
const all = peritext.rangeAll()!;
218+
const html = transfer.toHtml(all);
219+
expect(html).toBe('<blockquote><p>blockquote</p></blockquote>');
220+
const md2 = transfer.toMarkdown(all);
221+
console.log(md2);
222+
expect(md2).toBe('> blockquote');
223+
});
224+
225+
test('can insert a blockquote', () => {
226+
const {peritext, transfer} = setup();
227+
const md = '> blockquote';
228+
peritext.strApi().ins(0, 'ab');
229+
peritext.refresh();
230+
transfer.fromMarkdown(1, md);
231+
peritext.refresh();
232+
const all = peritext.rangeAll()!;
233+
const html = transfer.toHtml(all);
234+
expect(html).toBe('<p>a</p><blockquote><p>blockquoteb</p></blockquote>');
235+
const md2 = transfer.toMarkdown(all);
236+
expect(md2).toBe('a\n\n> blockquoteb');
237+
});
238+
239+
test('can insert a blockquote and a paragraph', () => {
240+
const {peritext, transfer} = setup();
241+
const md = '> blockquote\n\nparagraph';
242+
peritext.strApi().ins(0, 'ab');
243+
peritext.refresh();
244+
transfer.fromMarkdown(1, md);
245+
peritext.refresh();
246+
const all = peritext.rangeAll()!;
247+
const html = transfer.toHtml(all);
248+
expect(html).toBe('<p>a</p><blockquote><p>blockquote</p></blockquote><p>paragraphb</p>');
249+
const md2 = transfer.toMarkdown(all);
250+
expect(md2).toBe('a\n\n> blockquote\n\nparagraphb');
251+
});
252+
253+
test('can insert realistic 3 paragraphs of Markdown', () => {
254+
const {peritext, transfer} = setup();
255+
const md =
256+
'The German __automotive sector__ is in the process of _cutting ' +
257+
'thousands of jobs_ as it grapples with a global shift toward electric vehicles ' +
258+
'— a transformation Musk himself has been at the forefront of.' +
259+
'\n\n' +
260+
'> To be or not to be, that is the question.' +
261+
'\n\n' +
262+
'A `ClipboardEvent` is dispatched for copy, cut, and paste events, and it contains ' +
263+
'a `clipboardData` property of type `DataTransfer`. The `DataTransfer` object ' +
264+
'is used by the Clipboard Events API to hold multiple representations of data.';
265+
peritext.strApi().ins(0, 'ab');
266+
peritext.refresh();
267+
transfer.fromMarkdown(1, md);
268+
peritext.refresh();
269+
// console.log(peritext.blocks + '');
270+
const all = peritext.rangeAll()!;
271+
const md2 = transfer.toMarkdown(all);
272+
// console.log(md2);
273+
expect(md2).toBe('a' + md + 'b');
211274
});
212275
});

src/json-crdt-extensions/peritext/transfer/__tests__/import-html.spec.ts

+51
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,57 @@ describe('.fromHtml()', () => {
3333
],
3434
]);
3535
});
36+
37+
test('can import a single <blockquote> block', () => {
38+
const html = '<blockquote>2b||!2b</blockquote>';
39+
const peritextMl = fromHtml(html);
40+
expect(peritextMl).toEqual(['', null, [CommonSliceType.blockquote, null, '2b||!2b']]);
41+
});
42+
43+
test('can import a single <blockquote> block with nested single <p>', () => {
44+
const html = '<blockquote><p>2b||!2b</p></blockquote>';
45+
const peritextMl = fromHtml(html);
46+
expect(peritextMl).toEqual(['', null, [CommonSliceType.blockquote, null, [CommonSliceType.p, null, '2b||!2b']]]);
47+
});
48+
49+
test('can import a single <blockquote> block after a <p> block', () => {
50+
const html = '<p>123</p><blockquote>2b||!2b</blockquote>';
51+
const peritextMl = fromHtml(html);
52+
expect(peritextMl).toEqual([
53+
'',
54+
null,
55+
[CommonSliceType.p, null, '123'],
56+
[CommonSliceType.blockquote, null, '2b||!2b'],
57+
]);
58+
});
59+
60+
test('can import a single <blockquote> block with nested single <p>, after a <p> block', () => {
61+
const html = '<p>123</p><blockquote><p>2b||!2b</p></blockquote>';
62+
const peritextMl = fromHtml(html);
63+
expect(peritextMl).toEqual([
64+
'',
65+
null,
66+
[CommonSliceType.p, null, '123'],
67+
[CommonSliceType.blockquote, null, [CommonSliceType.p, null, '2b||!2b']],
68+
]);
69+
});
70+
71+
test('can import a single <blockquote> block with nested single <p>, after a <p> block with inline formatting', () => {
72+
const html = '<p><b>1</b><code>2</code>3</p><blockquote><p>2b||!2b</p></blockquote>';
73+
const peritextMl = fromHtml(html);
74+
expect(peritextMl).toEqual([
75+
'',
76+
null,
77+
[
78+
CommonSliceType.p,
79+
null,
80+
[CommonSliceType.b, {behavior: SliceBehavior.One, inline: true}, '1'],
81+
[CommonSliceType.code, {behavior: SliceBehavior.One, inline: true}, '2'],
82+
'3',
83+
],
84+
[CommonSliceType.blockquote, null, [CommonSliceType.p, null, '2b||!2b']],
85+
]);
86+
});
3687
});
3788

3889
describe('.toViewRange()', () => {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import {CommonSliceType} from '../../slice';
2+
import {SliceBehavior} from '../../slice/constants';
3+
import {fromMarkdown} from '../import-markdown';
4+
5+
describe('fromMarkdown()', () => {
6+
test('a single paragraph', () => {
7+
const text = 'Hello world';
8+
const peritextMl = fromMarkdown(text);
9+
expect(peritextMl).toEqual(['', null, [CommonSliceType.p, null, 'Hello world']]);
10+
});
11+
12+
test('multi-block realistic example', () => {
13+
const text =
14+
'The German __automotive sector__ is in the process of *cutting \n' +
15+
'thousands of jobs* as it grapples with a global shift toward electric vehicles \n' +
16+
'— a transformation Musk himself has been at the forefront of.\n' +
17+
'\n' +
18+
'> To be or not to be, that is the question.\n' +
19+
'\n' +
20+
'A `ClipboardEvent` is dispatched for copy, cut, and paste events, and it contains \n' +
21+
'a `clipboardData` property of type `DataTransfer`. The `DataTransfer` object \n' +
22+
'is used by the Clipboard Events API to hold multiple representations of data.\n';
23+
const peritextMl = fromMarkdown(text);
24+
expect(peritextMl).toEqual([
25+
'',
26+
null,
27+
[
28+
CommonSliceType.p,
29+
null,
30+
'The German ',
31+
[CommonSliceType.b, {behavior: SliceBehavior.One, inline: true}, 'automotive sector'],
32+
' is in the process of ',
33+
[CommonSliceType.i, {behavior: SliceBehavior.One, inline: true}, 'cutting thousands of jobs'],
34+
' as it grapples with a global shift toward electric vehicles — a transformation Musk himself has been at the forefront of.',
35+
],
36+
[CommonSliceType.blockquote, null, [CommonSliceType.p, null, 'To be or not to be, that is the question.']],
37+
[
38+
CommonSliceType.p,
39+
null,
40+
'A ',
41+
[CommonSliceType.code, {behavior: SliceBehavior.One, inline: true}, 'ClipboardEvent'],
42+
' is dispatched for copy, cut, and paste events, and it contains a ',
43+
[CommonSliceType.code, {behavior: SliceBehavior.One, inline: true}, 'clipboardData'],
44+
' property of type ',
45+
[CommonSliceType.code, {behavior: SliceBehavior.One, inline: true}, 'DataTransfer'],
46+
'. The ',
47+
[CommonSliceType.code, {behavior: SliceBehavior.One, inline: true}, 'DataTransfer'],
48+
' object is used by the Clipboard Events API to hold multiple representations of data.',
49+
],
50+
]);
51+
});
52+
});

src/json-crdt-extensions/peritext/transfer/import-html.ts

+25-20
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,15 @@ import type {SliceRegistry} from '../registry/SliceRegistry';
1414
import type {ViewStyle, ViewRange, ViewSlice} from '../editor/types';
1515
import type {ClipboardData} from './export-html';
1616

17+
/**
18+
* @todo Implement HTML normalization function, ensure that:
19+
*
20+
* - <blockquote> and <p> nodes are treated correctly, especially when sole node
21+
* is nested.
22+
* - list nodes are treated correctly.
23+
* - <svg> nodes are converted to Base64 and inlined as data URL images.
24+
*/
25+
1726
/**
1827
* Flattens a {@link PeritextMlNode} tree structure into a {@link ViewRange}
1928
* flat string with annotation ranges.
@@ -22,31 +31,33 @@ class ViewRangeBuilder {
2231
private text = '';
2332
private slices: ViewSlice[] = [];
2433

25-
private build0(node: PeritextMlNode, depth = 0): void {
26-
const skipWhitespace = depth < 2;
34+
private build0(node: PeritextMlNode, path: (string | number)[]): boolean {
35+
const skipWhitespace = path.length < 2;
2736
if (typeof node === 'string') {
28-
if (skipWhitespace && !node.trim()) return;
37+
if (skipWhitespace && !node.trim()) return false;
2938
this.text += node;
30-
return;
39+
return false;
3140
}
3241
const [type, attr] = node;
3342
const start = this.text.length;
3443
const length = node.length;
3544
const inline = !!attr?.inline;
36-
const hasType = !!type || type === 0;
37-
if (hasType && !inline) {
45+
const hasType = type === 0 || !!type;
46+
const firstChild = node[2] as PeritextMlNode;
47+
const isFirstChildInline = firstChild && (typeof firstChild === 'string' || firstChild[1]?.inline);
48+
if (hasType && !inline && isFirstChildInline) {
3849
this.text += '\n';
3950
const header =
4051
(SliceBehavior.Marker << SliceHeaderShift.Behavior) +
4152
(Anchor.Before << SliceHeaderShift.X1Anchor) +
4253
(Anchor.Before << SliceHeaderShift.X2Anchor);
43-
const slice: ViewSlice = [header, start, start, type];
54+
const slice: ViewSlice = [header, start, start, path.length ? [...path, type] : type];
4455
const data = attr?.data;
4556
if (data) slice.push(data);
4657
this.slices.push(slice);
4758
}
48-
for (let i = 2; i < length; i++) this.build0(node[i] as PeritextMlNode, depth + 1);
49-
if (hasType) {
59+
for (let i = 2; i < length; i++) this.build0(node[i] as PeritextMlNode, type === '' ? path : [...path, type]);
60+
if (hasType && inline) {
5061
let end: number = 0,
5162
header: number = 0;
5263
if (inline) {
@@ -62,10 +73,11 @@ class ViewRangeBuilder {
6273
this.slices.push(slice);
6374
}
6475
}
76+
return false;
6577
}
6678

6779
public build(node: PeritextMlNode): ViewRange {
68-
this.build0(node);
80+
this.build0(node, []);
6981
const view: ViewRange = [this.text, 0, this.slices];
7082
return view;
7183
}
@@ -115,7 +127,7 @@ export const fromJsonMl = (jsonml: JsonMlNode, registry: SliceRegistry = default
115127
node[0] = res[0];
116128
node[1] = res[1];
117129
} else {
118-
node[0] = SliceTypeName[tag as any] ?? tag;
130+
if (typeof tag === 'string') node[0] = SliceTypeName[tag as any] ?? tag;
119131
const attr = jsonml[1] || {};
120132
let data = null;
121133
if (attr['data-attr'] !== void 0) {
@@ -126,15 +138,8 @@ export const fromJsonMl = (jsonml: JsonMlNode, registry: SliceRegistry = default
126138
const inline = inlineHtmlTag || attr['data-inline'] === 'true';
127139
if (data || inline) node[1] = {data, inline};
128140
}
129-
if (typeof node[0] === 'number' && node[0] < 0) {
130-
const attr = node[1] || {};
131-
attr.inline = true;
132-
node[1] = attr;
133-
}
134-
if (node.length < 3) {
135-
const attr = node[1] || {};
136-
if (attr.inline) return '';
137-
}
141+
if (typeof node[0] === 'number' && node[0] < 0) (node[1] ||= {}).inline = true;
142+
if (node.length < 3 && (node[1] || {}).inline) return '';
138143
return node;
139144
};
140145

0 commit comments

Comments
 (0)