diff --git a/README.md b/README.md index eda254f..a5923d6 100755 --- a/README.md +++ b/README.md @@ -80,7 +80,8 @@ import { NodeHtmlMarkdown, NodeHtmlMarkdownOptions } from 'node-html-markdown' NodeHtmlMarkdown.translate( /* html */ `hello`, /* options (optional) */ {}, - /* customTranslators (optional) */ undefined + /* customTranslators (optional) */ undefined, + /* customCodeBlockTranslators (optional) */ undefined ); // Multiple files @@ -90,7 +91,8 @@ NodeHtmlMarkdown.translate( 'file2.html': `goodbye` }, /* options (optional) */ {}, - /* customTranslators (optional) */ undefined + /* customTranslators (optional) */ undefined, + /* customCodeBlockTranslators (optional) */ undefined ); @@ -101,7 +103,8 @@ NodeHtmlMarkdown.translate( const nhm = new NodeHtmlMarkdown( /* options (optional) */ {}, - /* customTransformers (optional) */ undefined + /* customTransformers (optional) */ undefined, + /* customCodeBlockTranslators (optional) */ undefined ); // Single file @@ -160,12 +163,12 @@ export interface NodeHtmlMarkdownOptions { /** * Supplied elements will be ignored (ignores inner text does not parse children) */ - readonly ignore?: string[], + ignore?: string[], /** * Supplied elements will be treated as blocks (surrounded with blank lines) */ - readonly blockElements?: string[], + blockElements?: string[], /** * Max consecutive new lines allowed @@ -225,6 +228,8 @@ __For detail on how to use them see__: - [translator.ts](https://github.com/crosstype/node-html-markdown/blob/master/src/translator.ts) - Documentation for `TranslatorConfig` - [config.ts](https://github.com/crosstype/node-html-markdown/blob/master/src/config.ts) - Translators in `defaultTranslators` +The `NodeHtmlMarkdown#codeBlockTranslators` property is a collection of translators which handles elements within a `
` block.
+
 ## Further improvements
 
 Being a performance-centric library, we're always interested in further improvements. 
diff --git a/src/config.ts b/src/config.ts
index d8fb420..5f10f13 100755
--- a/src/config.ts
+++ b/src/config.ts
@@ -171,11 +171,13 @@ export const defaultTranslators: TranslatorConfigObject = {
         noEscape: true,
         prefix: codeFence + language + '\n',
         postfix: '\n' + codeFence,
+        childTranslators: visitor.instance.codeBlockTranslators
       }
     } else {
       return {
         noEscape: true,
-        postprocess: ({ content }) => content.replace(/^/gm, '    ')
+        postprocess: ({ content }) => content.replace(/^/gm, '    '),
+        childTranslators: visitor.instance.codeBlockTranslators
       }
     }
   },
@@ -215,6 +217,15 @@ export const defaultTranslators: TranslatorConfigObject = {
   },
 }
 
+export const defaultCodeBlockTranslators: TranslatorConfigObject = {
+  'br': { content: `\n`, recurse: false },
+  'hr': { content: '---', recurse: false },
+  'h1,h2,h3,h4,h5,h6': { prefix: '[', postfix: ']' },
+  'ol,ul': defaultTranslators['ol,ul'],
+  'li': defaultTranslators['li'],
+  'img': { recurse: false }
+}
+
 // endregion
 
 
diff --git a/src/main.ts b/src/main.ts
index b03f92d..74b7070 100755
--- a/src/main.ts
+++ b/src/main.ts
@@ -1,6 +1,8 @@
 import { NodeHtmlMarkdownOptions } from './options';
 import { TranslatorCollection, TranslatorConfigObject } from './translator';
-import { defaultBlockElements, defaultIgnoreElements, defaultOptions, defaultTranslators } from './config';
+import {
+  defaultBlockElements, defaultCodeBlockTranslators, defaultIgnoreElements, defaultOptions, defaultTranslators
+} from './config';
 import { parseHTML } from './utilities';
 import { getMarkdownForHtmlNodes } from './visitor';
 
@@ -21,22 +23,33 @@ type Options = Partial
 
 export class NodeHtmlMarkdown {
   public translators = new TranslatorCollection();
+  public codeBlockTranslators = new TranslatorCollection();
   public readonly options: NodeHtmlMarkdownOptions
 
-  constructor(options?: Options, customTranslators?: TranslatorConfigObject) {
+  constructor(options?: Options, customTranslators?: TranslatorConfigObject, customCodeBlockTranslators?: TranslatorConfigObject) {
     /* Setup Options */
     this.options = { ...defaultOptions, ...options };
     const ignoredElements = this.options.ignore?.concat(defaultIgnoreElements) ?? defaultIgnoreElements;
     const blockElements = this.options.blockElements?.concat(defaultBlockElements) ?? defaultBlockElements;
 
     /* Setup Translator Bases */
-    ignoredElements?.forEach(el => this.translators.set(el, { ignore: true, recurse: false }));
-    blockElements?.forEach(el => this.translators.set(el, { surroundingNewlines: 2 }));
+    ignoredElements?.forEach(el => {
+      this.translators.set(el, { ignore: true, recurse: false });
+      this.codeBlockTranslators.set(el, { ignore: true, recurse: false });
+    })
+
+    blockElements?.forEach(el => {
+      this.translators.set(el, { surroundingNewlines: 2 });
+      this.codeBlockTranslators.set(el, { surroundingNewlines: 2 });
+    });
 
     /* Add and merge bases with default and custom translator configs */
     for (const [ elems, cfg ] of Object.entries({ ...defaultTranslators, ...customTranslators }))
       this.translators.set(elems, cfg, true);
 
+    for (const [ elems, cfg ] of Object.entries({ ...defaultCodeBlockTranslators, ...customCodeBlockTranslators }))
+      this.codeBlockTranslators.set(elems, cfg, true);
+
     // TODO - Workaround for upstream issue (may not be fixed) - https://github.com/taoqf/node-html-parser/issues/78
     if (!this.options.textReplace) this.options.textReplace = [];
     this.options.textReplace.push([ /^/gmi, '' ]);
@@ -50,15 +63,15 @@ export class NodeHtmlMarkdown {
   /**
    * Translate HTML source text to markdown
    */
-  static translate(html: string, options?: Options, customTranslators?: TranslatorConfigObject): string
+  static translate(html: string, options?: Options, customTranslators?: TranslatorConfigObject, customCodeBlockTranslators?: TranslatorConfigObject): string
   /**
    * Translate collection of HTML source text to markdown
    */
-  static translate(files: FileCollection, options?: Options, customTranslators?: TranslatorConfigObject): FileCollection
-  static translate(htmlOrFiles: string | FileCollection, opt?: Options, trans?: TranslatorConfigObject):
+  static translate(files: FileCollection, options?: Options, customTranslators?: TranslatorConfigObject, customCodeBlockTranslators?: TranslatorConfigObject): FileCollection
+  static translate(htmlOrFiles: string | FileCollection, opt?: Options, customTranslators?: TranslatorConfigObject, customCodeBlockTranslators?: TranslatorConfigObject):
     string | FileCollection
   {
-    return NodeHtmlMarkdown.prototype.translateWorker.call(new NodeHtmlMarkdown(opt, trans), htmlOrFiles);
+    return NodeHtmlMarkdown.prototype.translateWorker.call(new NodeHtmlMarkdown(opt, customTranslators, customCodeBlockTranslators), htmlOrFiles);
   }
 
   // endregion
diff --git a/src/translator.ts b/src/translator.ts
index d4a1628..238bd48 100755
--- a/src/translator.ts
+++ b/src/translator.ts
@@ -79,6 +79,11 @@ export type TranslatorConfig = {
    * Keep whitespace as it is
    */
   preserveWhitespace?: boolean
+
+  /**
+   * Custom translator collection to use for child HTML nodes
+   */
+  childTranslators?: TranslatorCollection
 }
 
 export enum PostProcessResult {
diff --git a/src/visitor.ts b/src/visitor.ts
index 515d78a..a763a95 100755
--- a/src/visitor.ts
+++ b/src/visitor.ts
@@ -3,7 +3,7 @@ import { ElementNode, HtmlNode, isElementNode, isTextNode } from './nodes';
 import { getChildNodes, getTrailingWhitespaceInfo, perfStart, perfStop, trimNewLines } from './utilities';
 import {
   createTranslatorContext, isTranslatorConfig, PostProcessResult, TranslatorConfig, TranslatorConfigFactory,
-  TranslatorContext
+  TranslatorConfigObject, TranslatorContext
 } from './translator';
 import { NodeHtmlMarkdownOptions } from './options';
 import { contentlessElements } from './config';
@@ -19,6 +19,7 @@ export interface NodeMetadata {
   listItemNumber?: number
   noEscape?: boolean
   preserveWhitespace?: boolean
+  translators?: TranslatorConfigObject
 }
 
 export type NodeMetadataMap = Map
@@ -160,7 +161,7 @@ export class Visitor {
     if (textOnly || !isElementNode(node)) return;
 
     /* Handle element node */
-    const { instance: { translators } } = this;
+    const translators = metadata?.translators ?? this.instance.translators;
     const translatorCfgOrFactory = translators[node.tagName] as TranslatorConfig | TranslatorConfigFactory;
 
     /* Update metadata with list detail */
@@ -202,9 +203,9 @@ export class Visitor {
     // Skip and don't check children if ignore flag set
     if (cfg.ignore) return;
 
-    /* Update metadata for noEscape flag */
-    if (cfg.noEscape && !metadata?.noEscape) {
-      metadata = { ...metadata, noEscape: true };
+    /* Update metadata if needed */
+    if ((cfg.noEscape && !metadata?.noEscape) || (cfg.childTranslators && !metadata?.translators)) {
+      metadata = { ...metadata, noEscape: cfg.noEscape, translators: cfg.childTranslators };
       this.nodeMetadata.set(node, metadata);
     }
 
diff --git a/test/default-tags-codeblock.test.ts b/test/default-tags-codeblock.test.ts
new file mode 100755
index 0000000..4cf5146
--- /dev/null
+++ b/test/default-tags-codeblock.test.ts
@@ -0,0 +1,80 @@
+// noinspection HtmlUnknownTarget
+
+import { NodeHtmlMarkdown } from '../src';
+
+
+/* ****************************************************************************************************************** *
+ * Tests
+ * ****************************************************************************************************************** */
+
+// Note: Newline handling for block elements within code blocks is not very clean. This can be fixed later.
+describe(`Default Tags`, () => {
+  let instance: NodeHtmlMarkdown;
+  const translateAsBlock = (html: string) => instance.translate(`
${html}
`); + const getExpected = (s: string) => '```\n' + s + '\n```'; + beforeAll(() => { + instance = new NodeHtmlMarkdown(); + }); + + test(`Line Break (br)`, () => { + const res = translateAsBlock(`a
b`); + expect(res).toBe(getExpected(`a\nb`)); + }); + + test(`Horizontal Rule (hr)`, () => { + const res = translateAsBlock(`a
b`); + expect(res).toBe(getExpected(`a\n\n---\n\nb`)); + }); + + test(`Non-processed Elements (b, strong, del, s, strike, em, i, pre, code, blockquote, a)`, () => { + const tags = [ 'b', 'strong', 'del', 's', 'strike', 'em', 'i', 'code', 'a', 'pre', 'blockquote' ]; + const html = tags.map(t => `<${t}>${t}`).join(' '); + const exp = 'b strong del s strike em i code a \n\npre\n\n blockquote\n\n'; + + const res = translateAsBlock(html); + expect(res).toBe(getExpected(exp)); + }); + + test(`Image (img)`, () => { + const res = translateAsBlock(`ab`); + expect(res).toBe(getExpected(`ab`)); + }); + + test(`Headings (h1, h2, h3, h4, h5, h6)`, () => { + let nodes: string[] = []; + for (let i = 1; i < 8; i++) nodes.push(`a`); + const res = translateAsBlock(nodes.join('')); + expect(res).toBe(getExpected('\n[a]\n'.repeat(6) + '\na')); + }); + + // Note: Newline handling here for block elements is unusual + describe(`Lists (ol + li, ul + li)`, () => { + test(`Multi-level Ordered List`, () => { + const res = translateAsBlock(` +
    +
  1. a

    b
  2. +
  3. +
  4. b +
    1. c
      d
    +
    • e
      f
    +
  5. +
+ `); + expect(res).toBe(getExpected(` \n \n1. a \nb\n \n \n2. b \n 1. c \n d \n \n * e \n f\n \n `)); + }); + + test(`Multi-level Unordered List`, () => { + const res = translateAsBlock(` +
    +
  • a

    b
  • +
  • +
  • b +
    • c
      d
    +
    1. e
      f
    +
  • +
+ `); + expect(res).toBe(getExpected(` \n \n* a \nb\n \n \n* b \n * c \n d \n \n 1. e \n f\n \n `)); + }); + }); +});