Skip to content

Commit

Permalink
Add wrapping of attributes. Doesn't work though, but thats for nynodata
Browse files Browse the repository at this point in the history
  • Loading branch information
gunnarvelle authored and Jonas-C committed May 22, 2024
1 parent 53b765c commit 6ef2ee1
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 9 deletions.
8 changes: 1 addition & 7 deletions src/components/NynorskTranslateProvider.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,7 @@ export const useTranslateToNN = () => {
const content = get(element, field);
if (content) {
const isArray = Array.isArray(content);
// Our backend uses Jsoup to encode html. However, > is not encoded, and nynodata expects it to be. As such, we have to parse
// the entire html string and reencode it using an xmlSerializer.
const parsed =
type === "html" && !isArray
? xmlSerializer.serializeToString(domParser.parseFromString(content, "text/html").body!)
: content;
acc[field] = { content: parsed, type, isArray };
acc[field] = { content, type, isArray };
}
return acc;
}, {});
Expand Down
29 changes: 27 additions & 2 deletions src/server/translate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
*
*/

import { load } from "cheerio";
import { CheerioAPI, load } from "cheerio";
import FormData from "form-data";
import { JSDOM } from "jsdom";
import fetch from "node-fetch";
import queryString from "query-string";
import serialize from "w3c-xmlserializer";
import errorLogger from "./logger";
import config, { getEnvironmentVariabel } from "../config";
import { ApiTranslateType } from "../interfaces";
Expand Down Expand Up @@ -42,6 +44,16 @@ const headers = user
}
: undefined;

const wrapAttribute = (html: CheerioAPI, element: any, attribute: string, selector: string) => {
const value = html(element).attr(attribute) ?? "";
if (!value) return;
const innerHtml = load(value);
html(selector).each((_, el) => {
html(el).wrap("<ndlaskip></ndlaskip>");
});
html(element).attr(attribute, innerHtml("body").html());
};

const doFetch = (name: string, element: ApiTranslateType): Promise<ResponseType> => {
if (element.type === "text") {
const parsedContent = element.isArray ? element.content.join("|") : element.content;
Expand All @@ -68,7 +80,20 @@ const doFetch = (name: string, element: ApiTranslateType): Promise<ResponseType>
html("math").each((_, el) => {
html(el).wrap("<ndlaskip></ndlaskip>");
});
const buffer = Buffer.from(html.html());
html("ndlaembed").each((_, el) => {
wrapAttribute(html, el, "data-caption", "span[lang]");
wrapAttribute(html, el, "data-title", "span[lang]");
wrapAttribute(html, el, "data-subtitle", "span[lang]");
wrapAttribute(html, el, "data-description", "span[lang]");
wrapAttribute(html, el, "data-url-text", "span[lang]");
});
const content = html.html();

// Our backend uses Jsoup to encode html. However, > is not encoded, and nynodata expects it to be. As such, we have to parse
// the entire html string and reencode it using an xmlSerializer.
const dom = new JSDOM(content);
const sanitized = serialize(dom.window.document);
const buffer = Buffer.from(sanitized);
const params = { stilmal };

formData.append("file", buffer, { filename: `${name}.html` });
Expand Down

0 comments on commit 6ef2ee1

Please # to comment.