From eb71a79ac732a54d8898a468d03588273312dc6d Mon Sep 17 00:00:00 2001
From: Roger Tuan
Date: Wed, 7 Jan 2026 13:50:09 -0800
Subject: [PATCH 1/5] Add new tests and clarify function name
---
.../__tests__/all.test.ts | 231 ++++++++++++------
1 file changed, 159 insertions(+), 72 deletions(-)
diff --git a/packages/html-to-structured-text/__tests__/all.test.ts b/packages/html-to-structured-text/__tests__/all.test.ts
index 46fb180..b14ac40 100644
--- a/packages/html-to-structured-text/__tests__/all.test.ts
+++ b/packages/html-to-structured-text/__tests__/all.test.ts
@@ -1,13 +1,14 @@
/* eslint-disable @typescript-eslint/ban-ts-comment */
// @ts-nocheck
-import { parse5ToStructuredText, Options } from '../src';
+import { Options, parse5ToStructuredText } from '../src';
import parse5 from 'parse5';
import { allowedChildren, Span, validate } from 'datocms-structured-text-utils';
-import { findAll, find, visit, CONTINUE } from 'unist-utils-core';
+import { CONTINUE, find, findAll, visit } from 'unist-utils-core';
import googleDocsPreprocessor from '../src/preprocessors/google-docs';
-function htmlToStructuredText(html: string, options: Options = {}) {
+// This is a shim for NodeJS. The actual CMS software uses DOMParser and can have different output, unfortunately
+function parse5HtmlToStructuredText(html: string, options: Options = {}) {
return parse5ToStructuredText(
parse5.parse(html, {
sourceCodeLocationInfo: true,
@@ -19,14 +20,14 @@ function htmlToStructuredText(html: string, options: Options = {}) {
describe('htmlToStructuredText', () => {
it('works with empty document', async () => {
const html = '';
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result).toMatchInlineSnapshot(`null`);
});
it('ignores doctype and HTML comments', async () => {
const html = ` `;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result).toMatchInlineSnapshot(`null`);
});
@@ -52,7 +53,7 @@ describe('htmlToStructuredText', () => {
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result).toMatchInlineSnapshot(`null`);
});
@@ -63,7 +64,7 @@ describe('htmlToStructuredText', () => {
twice
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
handlers: {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
text: async (createNode, node, context) => {
@@ -93,7 +94,7 @@ describe('htmlToStructuredText', () => {
const html = `
twice
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
handlers: {
p: (createNode, node, context) => {
return [
@@ -115,7 +116,7 @@ describe('htmlToStructuredText', () => {
already wrapped
needs wrapping
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
handlers: {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
unknown: (createNode, node, context) => {
@@ -144,7 +145,7 @@ describe('htmlToStructuredText', () => {
const html = `
span
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
handlers: {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
custom: async (createNode, node, context) => {
@@ -164,7 +165,7 @@ describe('htmlToStructuredText', () => {
override
regular paragraph
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
handlers: {
blockquote: async (createNode, node, context) => {
// turn a blockquote into a paragraph
@@ -188,7 +189,7 @@ describe('htmlToStructuredText', () => {
already wrapped
needs wrapping
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children.map((child) => child.type))
.toMatchInlineSnapshot(`
@@ -214,7 +215,7 @@ describe('htmlToStructuredText', () => {
hyperlink
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(
@@ -250,7 +251,7 @@ describe('htmlToStructuredText', () => {
const html = `
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
handlers: {
base: async (createNode, node, context) => {
expect(context.global.baseUrl).toBe(null);
@@ -274,7 +275,7 @@ describe('htmlToStructuredText', () => {
const html = `
contact
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
const { meta } = find(result.document, 'link');
expect(meta).toMatchInlineSnapshot(`
@@ -300,7 +301,7 @@ describe('htmlToStructuredText', () => {
contact
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(find(result.document, 'link').url).toBe(
'https://datocms.com/contact',
@@ -312,7 +313,7 @@ describe('htmlToStructuredText', () => {
contact
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(find(result.document, 'link').url).toBe(
'https://datocms.com/contact',
@@ -324,7 +325,7 @@ describe('htmlToStructuredText', () => {
contact
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(find(result.document, 'link').url).toBe(
'https://datocms.com/t/contact',
@@ -336,7 +337,7 @@ describe('htmlToStructuredText', () => {
contact
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(find(result.document, 'link').url).toBe(
'https://datocms.com/b/contact',
@@ -348,7 +349,7 @@ describe('htmlToStructuredText', () => {
contact
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
shared: {
baseUrl: 'http://acme.com',
},
@@ -364,7 +365,7 @@ describe('htmlToStructuredText', () => {
contact
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
shared: {
baseUrl: 'http://acme.com',
baseUrlFound: true,
@@ -387,7 +388,7 @@ describe('htmlToStructuredText', () => {
span
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'paragraph')).toHaveLength(1);
const spans = findAll(result.document, 'span');
@@ -427,7 +428,7 @@ describe('htmlToStructuredText', () => {
nested implicit paragraph
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children.map((child) => child.type))
.toMatchInlineSnapshot(`
@@ -448,7 +449,7 @@ describe('htmlToStructuredText', () => {
[span becomes simple text]
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children).toMatchInlineSnapshot(`
Array [
@@ -486,7 +487,7 @@ describe('htmlToStructuredText', () => {
[separate paragraph]
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children).toMatchInlineSnapshot(`
Array [
@@ -518,7 +519,7 @@ describe('htmlToStructuredText', () => {
const html = `
needs wrapping
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].type).toBe('heading');
expect(result.document.children[0].children[0].type).toBe('span');
@@ -529,7 +530,7 @@ describe('htmlToStructuredText', () => {
needs wrapping
hello
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children.map((child) => child.type))
.toMatchInlineSnapshot(`
@@ -546,7 +547,7 @@ describe('htmlToStructuredText', () => {
needs wrapping
hello
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
allowedHeadingLevels: [2],
});
expect(validate(result).valid).toBeTruthy();
@@ -564,7 +565,7 @@ describe('htmlToStructuredText', () => {
const html = `
p not allowed inside h1
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'paragraph')).toHaveLength(0);
});
@@ -573,7 +574,7 @@ describe('htmlToStructuredText', () => {
const html = `
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].children).toMatchInlineSnapshot(`
Array [
@@ -608,7 +609,7 @@ describe('htmlToStructuredText', () => {
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'heading')).toHaveLength(0);
});
@@ -617,7 +618,7 @@ describe('htmlToStructuredText', () => {
const html = `
dato
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
allowedBlocks: [],
});
expect(validate(result).valid).toBeTruthy();
@@ -632,7 +633,7 @@ describe('htmlToStructuredText', () => {
const html = `
<import src="file.html" />
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0]).toMatchInlineSnapshot(`
Object {
@@ -649,7 +650,7 @@ describe('htmlToStructuredText', () => {
dast()
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(find(result.document, 'paragraph')).toBeTruthy();
expect(findAll(result.document, 'code')).toHaveLength(0);
@@ -668,7 +669,7 @@ describe('htmlToStructuredText', () => {
const html = `
dast()
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'code')).toHaveLength(1);
expect(findAll(result.document, 'code')[0]).toMatchInlineSnapshot(`
@@ -684,7 +685,7 @@ describe('htmlToStructuredText', () => {
const html = `
dast()
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'code')).toHaveLength(1);
expect(findAll(result.document, 'code')[0].language).toBeFalsy();
@@ -694,7 +695,7 @@ describe('htmlToStructuredText', () => {
const html = `
let dato
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
allowedBlocks: [],
});
expect(validate(result).valid).toBeTruthy();
@@ -707,7 +708,7 @@ describe('htmlToStructuredText', () => {
const html = `
foo
bar
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children).toMatchInlineSnapshot(`
Array [
@@ -727,7 +728,7 @@ describe('htmlToStructuredText', () => {
1
2
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children.map((child) => child.type))
.toMatchInlineSnapshot(`
@@ -758,7 +759,7 @@ describe('htmlToStructuredText', () => {
const html = `
1
2
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children).toMatchInlineSnapshot(`
Array [
@@ -793,7 +794,7 @@ describe('htmlToStructuredText', () => {
const html = `
dato
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
allowedBlocks: [],
});
expect(validate(result).valid).toBeTruthy();
@@ -808,7 +809,7 @@ describe('htmlToStructuredText', () => {
const html = `
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].style).toBe('bulleted');
});
@@ -817,7 +818,7 @@ describe('htmlToStructuredText', () => {
const html = `
- test
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].style).toBe('numbered');
});
@@ -831,7 +832,7 @@ describe('htmlToStructuredText', () => {
4
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(
find(result.document, 'list').children.every(
@@ -846,7 +847,7 @@ describe('htmlToStructuredText', () => {
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(find(find(result.document, 'list'), 'list')).toBeTruthy();
});
@@ -857,7 +858,7 @@ describe('htmlToStructuredText', () => {
1
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'blockquote')).toHaveLength(0);
expect(find(result.document, 'span').value).toBe('1');
@@ -869,7 +870,7 @@ describe('htmlToStructuredText', () => {
1
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'h1')).toHaveLength(0);
expect(find(result.document, 'span').value).toBe('1');
@@ -881,7 +882,7 @@ describe('htmlToStructuredText', () => {
1
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'code')).toHaveLength(0);
expect(find(result.document, 'span').value).toBe('1');
@@ -894,7 +895,7 @@ describe('htmlToStructuredText', () => {
3
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'link')).toHaveLength(2);
const items = findAll(result.document, 'listItem').map((listItem) =>
@@ -923,7 +924,7 @@ describe('htmlToStructuredText', () => {
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
const lists = findAll(result.document, 'list');
expect(lists).toHaveLength(1);
@@ -945,7 +946,7 @@ describe('htmlToStructuredText', () => {
1
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
const thematicBreaks = findAll(result.document, 'thematicBreak');
expect(thematicBreaks).toHaveLength(1);
@@ -957,7 +958,7 @@ describe('htmlToStructuredText', () => {
dato
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
allowedBlocks: [],
});
expect(validate(result).valid).toBeTruthy();
@@ -972,7 +973,7 @@ describe('htmlToStructuredText', () => {
const html = `
1
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].type).toBe('paragraph');
expect(find(find(result.document, 'paragraph'), 'link')).toBeTruthy();
@@ -983,7 +984,7 @@ describe('htmlToStructuredText', () => {
const html = `
1
2
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].type).toBe('heading');
expect(find(find(result.document, 'heading'), 'link')).toBeTruthy();
@@ -994,7 +995,7 @@ describe('htmlToStructuredText', () => {
const html = `
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'heading')).toHaveLength(0);
});
@@ -1004,7 +1005,7 @@ describe('htmlToStructuredText', () => {
const html = `
dato
2
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
allowedBlocks: [],
});
expect(validate(result).valid).toBeTruthy();
@@ -1043,7 +1044,7 @@ describe('htmlToStructuredText', () => {
const html = `
<${tagName}>${markName}${tagName}>
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
const span = find(result.document, 'span');
expect(span.marks).toBeTruthy();
@@ -1057,7 +1058,7 @@ describe('htmlToStructuredText', () => {
const html = `
<${tagName}>${markName}${tagName}>
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
allowedMarks: [],
});
expect(validate(result).valid).toBeTruthy();
@@ -1070,7 +1071,7 @@ describe('htmlToStructuredText', () => {
const html = `
emstrong-emu-strong-emstrong-emem
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(
findAll(result.document, 'span')
@@ -1093,7 +1094,7 @@ describe('htmlToStructuredText', () => {
describe('code', () => {
it('turns inline code tags to span with code mark', async () => {
const html = `To make it even easier to offer responsive, progressive images on your projects, we released a package called react-datocms that exposes an <Image /> component and pairs perfectly with the responsiveImage query.
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'code')).toHaveLength(0);
const spans = findAll(result.document, 'span').filter(
@@ -1114,7 +1115,7 @@ describe('htmlToStructuredText', () => {
const html = `
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result).toBe(null);
});
@@ -1123,7 +1124,7 @@ describe('htmlToStructuredText', () => {
const html = `
hello
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].type).toBe('paragraph');
expect(result.document.children).toHaveLength(1);
@@ -1138,7 +1139,7 @@ describe('htmlToStructuredText', () => {
const html = `
hello
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].type).toBe('paragraph');
expect(result.document.children).toHaveLength(1);
@@ -1153,7 +1154,7 @@ describe('htmlToStructuredText', () => {
const html = `
hello
world
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].type).toBe('paragraph');
expect(result.document.children).toHaveLength(1);
@@ -1168,7 +1169,7 @@ describe('htmlToStructuredText', () => {
const html = `
hello
world
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].type).toBe('paragraph');
expect(result.document.children).toHaveLength(1);
@@ -1183,7 +1184,7 @@ describe('htmlToStructuredText', () => {
const html = `
hello
world
`;
- const result = await htmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredText(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children.map((c) => c.type).join(',')).toBe(
'paragraph,paragraph',
@@ -1284,7 +1285,7 @@ describe('htmlToStructuredText', () => {
const html = `
heading
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
preprocess: (tree) => {
findAll(tree, (node) => {
if (node.type === 'element' && node.tagName === 'p') {
@@ -1319,7 +1320,7 @@ describe('htmlToStructuredText', () => {
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
preprocess: liftImages,
handlers: {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
@@ -1416,7 +1417,7 @@ describe('htmlToStructuredText', () => {
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
preprocess: liftImages,
handlers: {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
@@ -1470,7 +1471,7 @@ describe('htmlToStructuredText', () => {
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
preprocess: liftImages,
handlers: {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
@@ -1528,7 +1529,7 @@ describe('htmlToStructuredText', () => {
item 3
`;
- const result = await htmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredText(html, {
preprocess: (tree) => {
visit(tree, (node, index, parents) => {
if (node.tagName === 'img' && parents.length > 1) {
@@ -1566,7 +1567,7 @@ describe('htmlToStructuredText', () => {
describe('preprocessors', () => {
describe('Google Docs', () => {
const googleDocsToStructuredText = (html: string, options: Options) =>
- htmlToStructuredText(html, {
+ parse5HtmlToStructuredText(html, {
...options,
preprocess: googleDocsPreprocessor,
});
@@ -1660,5 +1661,91 @@ describe('preprocessors', () => {
expect(spans).toHaveLength(1);
expect(spans[0].marks).toEqual(['strong']);
});
+
+ /* Different browsers will copy different HTML from GDocs
+ See https://community.datocms.com/t/structure-text-editor-does-not-copy-paste-styles-anymore/8466/8 for discussion
+ You can use this gdoc as a test: https://docs.google.com/document/d/1H2WeqDd-VWywlds5big1_YVSNx3rrFn6FsIFVDuSWeg/edit?usp=sharing
+ And this tool to get the raw HTML in the clipboard: https://evercoder.github.io/clipboard-inspector/
+ */
+ describe('correctly preserves rich text formatting across browsers', () => {
+ const assertRichTextPaste = async (html: string) => {
+ const result = await googleDocsToStructuredText(html);
+
+ expect(validate(result).valid).toBeTruthy();
+
+ // "This is a" should not be marked
+ expect(
+ findAll(
+ result.document,
+ (node) =>
+ node.type === 'span' &&
+ node.value === 'This is a ' &&
+ node.marks === undefined,
+ ),
+ ).toHaveLength(1);
+
+ // "test" should be bold
+ expect(
+ findAll(
+ result.document,
+ (node) =>
+ node.type === 'span' &&
+ node.value === 'test' &&
+ Array.isArray(node.marks) &&
+ node.marks.length === 1 &&
+ node.marks[0] === 'strong',
+ ),
+ ).toHaveLength(1);
+
+ // "rich" is italics
+ expect(
+ findAll(
+ result.document,
+ (node) =>
+ node.type === 'span' &&
+ node.value === 'rich' &&
+ Array.isArray(node.marks) &&
+ node.marks.length === 1 &&
+ node.marks[0] === 'emphasis',
+ ),
+ ).toHaveLength(1);
+
+ // "text" is bold, underline, and italics
+ expect(
+ findAll(
+ result.document,
+ (node) =>
+ node.type === 'span' &&
+ node.value === 'text' &&
+ Array.isArray(node.marks) &&
+ node.marks.length === 3 &&
+ node.marks.includes('strong') &&
+ node.marks.includes('emphasis') &&
+ node.marks.includes('underline'),
+ ),
+ ).toHaveLength(1);
+ };
+
+ it('when copied from Firefox (Jan 2026)', async () => {
+ const firefoxHtml = `
+ This is a test of rich text.
+ `;
+ assertRichTextPaste(firefoxHtml);
+ });
+
+ it('when copied from Chrome (Jan 2026)', async () => {
+ const chromeHtml = `
+ This is a test of rich text.
+ `;
+ await assertRichTextPaste(chromeHtml);
+ });
+
+ it('when copied from Safari (Jan 2026)', async () => {
+ const safariHtml = `
+ This is a test of rich text.
+ `;
+ await assertRichTextPaste(safariHtml);
+ });
+ });
});
});
From 6d691e8e44fd34ee786cc8b7b86705887425b637 Mon Sep 17 00:00:00 2001
From: Roger Tuan
Date: Wed, 7 Jan 2026 14:04:11 -0800
Subject: [PATCH 2/5] Clarify function name
---
.../__tests__/all.test.ts | 144 +++++++++---------
1 file changed, 73 insertions(+), 71 deletions(-)
diff --git a/packages/html-to-structured-text/__tests__/all.test.ts b/packages/html-to-structured-text/__tests__/all.test.ts
index b14ac40..b796483 100644
--- a/packages/html-to-structured-text/__tests__/all.test.ts
+++ b/packages/html-to-structured-text/__tests__/all.test.ts
@@ -7,8 +7,10 @@ import { allowedChildren, Span, validate } from 'datocms-structured-text-utils';
import { CONTINUE, find, findAll, visit } from 'unist-utils-core';
import googleDocsPreprocessor from '../src/preprocessors/google-docs';
-// This is a shim for NodeJS. The actual CMS software uses DOMParser and can have different output, unfortunately
-function parse5HtmlToStructuredText(html: string, options: Options = {}) {
+/* This is a shim for NodeJS. The actual CMS software usually uses DOMParser.
+ Don't call it htmlToStructuredText() because there is an exported function
+ by that same name already (in ../src) */
+function parse5HtmlToStructuredTextShim(html: string, options: Options = {}) {
return parse5ToStructuredText(
parse5.parse(html, {
sourceCodeLocationInfo: true,
@@ -20,14 +22,14 @@ function parse5HtmlToStructuredText(html: string, options: Options = {}) {
describe('htmlToStructuredText', () => {
it('works with empty document', async () => {
const html = '';
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result).toMatchInlineSnapshot(`null`);
});
it('ignores doctype and HTML comments', async () => {
const html = ` `;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result).toMatchInlineSnapshot(`null`);
});
@@ -53,7 +55,7 @@ describe('htmlToStructuredText', () => {
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result).toMatchInlineSnapshot(`null`);
});
@@ -64,7 +66,7 @@ describe('htmlToStructuredText', () => {
twice
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
handlers: {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
text: async (createNode, node, context) => {
@@ -94,7 +96,7 @@ describe('htmlToStructuredText', () => {
const html = `
twice
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
handlers: {
p: (createNode, node, context) => {
return [
@@ -116,7 +118,7 @@ describe('htmlToStructuredText', () => {
already wrapped
needs wrapping
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
handlers: {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
unknown: (createNode, node, context) => {
@@ -145,7 +147,7 @@ describe('htmlToStructuredText', () => {
const html = `
span
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
handlers: {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
custom: async (createNode, node, context) => {
@@ -165,7 +167,7 @@ describe('htmlToStructuredText', () => {
override
regular paragraph
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
handlers: {
blockquote: async (createNode, node, context) => {
// turn a blockquote into a paragraph
@@ -189,7 +191,7 @@ describe('htmlToStructuredText', () => {
already wrapped
needs wrapping
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children.map((child) => child.type))
.toMatchInlineSnapshot(`
@@ -215,7 +217,7 @@ describe('htmlToStructuredText', () => {
hyperlink
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(
@@ -251,7 +253,7 @@ describe('htmlToStructuredText', () => {
const html = `
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
handlers: {
base: async (createNode, node, context) => {
expect(context.global.baseUrl).toBe(null);
@@ -275,7 +277,7 @@ describe('htmlToStructuredText', () => {
const html = `
contact
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
const { meta } = find(result.document, 'link');
expect(meta).toMatchInlineSnapshot(`
@@ -301,7 +303,7 @@ describe('htmlToStructuredText', () => {
contact
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(find(result.document, 'link').url).toBe(
'https://datocms.com/contact',
@@ -313,7 +315,7 @@ describe('htmlToStructuredText', () => {
contact
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(find(result.document, 'link').url).toBe(
'https://datocms.com/contact',
@@ -325,7 +327,7 @@ describe('htmlToStructuredText', () => {
contact
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(find(result.document, 'link').url).toBe(
'https://datocms.com/t/contact',
@@ -337,7 +339,7 @@ describe('htmlToStructuredText', () => {
contact
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(find(result.document, 'link').url).toBe(
'https://datocms.com/b/contact',
@@ -349,7 +351,7 @@ describe('htmlToStructuredText', () => {
contact
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
shared: {
baseUrl: 'http://acme.com',
},
@@ -365,7 +367,7 @@ describe('htmlToStructuredText', () => {
contact
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
shared: {
baseUrl: 'http://acme.com',
baseUrlFound: true,
@@ -388,7 +390,7 @@ describe('htmlToStructuredText', () => {
span
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'paragraph')).toHaveLength(1);
const spans = findAll(result.document, 'span');
@@ -428,7 +430,7 @@ describe('htmlToStructuredText', () => {
nested implicit paragraph
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children.map((child) => child.type))
.toMatchInlineSnapshot(`
@@ -449,7 +451,7 @@ describe('htmlToStructuredText', () => {
[span becomes simple text]
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children).toMatchInlineSnapshot(`
Array [
@@ -487,7 +489,7 @@ describe('htmlToStructuredText', () => {
[separate paragraph]
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children).toMatchInlineSnapshot(`
Array [
@@ -519,7 +521,7 @@ describe('htmlToStructuredText', () => {
const html = `
needs wrapping
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].type).toBe('heading');
expect(result.document.children[0].children[0].type).toBe('span');
@@ -530,7 +532,7 @@ describe('htmlToStructuredText', () => {
needs wrapping
hello
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children.map((child) => child.type))
.toMatchInlineSnapshot(`
@@ -547,7 +549,7 @@ describe('htmlToStructuredText', () => {
needs wrapping
hello
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
allowedHeadingLevels: [2],
});
expect(validate(result).valid).toBeTruthy();
@@ -565,7 +567,7 @@ describe('htmlToStructuredText', () => {
const html = `
p not allowed inside h1
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'paragraph')).toHaveLength(0);
});
@@ -574,7 +576,7 @@ describe('htmlToStructuredText', () => {
const html = `
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].children).toMatchInlineSnapshot(`
Array [
@@ -609,7 +611,7 @@ describe('htmlToStructuredText', () => {
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'heading')).toHaveLength(0);
});
@@ -618,7 +620,7 @@ describe('htmlToStructuredText', () => {
const html = `
dato
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
allowedBlocks: [],
});
expect(validate(result).valid).toBeTruthy();
@@ -633,7 +635,7 @@ describe('htmlToStructuredText', () => {
const html = `
<import src="file.html" />
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0]).toMatchInlineSnapshot(`
Object {
@@ -650,7 +652,7 @@ describe('htmlToStructuredText', () => {
dast()
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(find(result.document, 'paragraph')).toBeTruthy();
expect(findAll(result.document, 'code')).toHaveLength(0);
@@ -669,7 +671,7 @@ describe('htmlToStructuredText', () => {
const html = `
dast()
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'code')).toHaveLength(1);
expect(findAll(result.document, 'code')[0]).toMatchInlineSnapshot(`
@@ -685,7 +687,7 @@ describe('htmlToStructuredText', () => {
const html = `
dast()
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'code')).toHaveLength(1);
expect(findAll(result.document, 'code')[0].language).toBeFalsy();
@@ -695,7 +697,7 @@ describe('htmlToStructuredText', () => {
const html = `
let dato
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
allowedBlocks: [],
});
expect(validate(result).valid).toBeTruthy();
@@ -708,7 +710,7 @@ describe('htmlToStructuredText', () => {
const html = `
foo
bar
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children).toMatchInlineSnapshot(`
Array [
@@ -728,7 +730,7 @@ describe('htmlToStructuredText', () => {
1
2
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children.map((child) => child.type))
.toMatchInlineSnapshot(`
@@ -759,7 +761,7 @@ describe('htmlToStructuredText', () => {
const html = `
1
2
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children).toMatchInlineSnapshot(`
Array [
@@ -794,7 +796,7 @@ describe('htmlToStructuredText', () => {
const html = `
dato
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
allowedBlocks: [],
});
expect(validate(result).valid).toBeTruthy();
@@ -809,7 +811,7 @@ describe('htmlToStructuredText', () => {
const html = `
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].style).toBe('bulleted');
});
@@ -818,7 +820,7 @@ describe('htmlToStructuredText', () => {
const html = `
- test
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].style).toBe('numbered');
});
@@ -832,7 +834,7 @@ describe('htmlToStructuredText', () => {
4
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(
find(result.document, 'list').children.every(
@@ -847,7 +849,7 @@ describe('htmlToStructuredText', () => {
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(find(find(result.document, 'list'), 'list')).toBeTruthy();
});
@@ -858,7 +860,7 @@ describe('htmlToStructuredText', () => {
1
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'blockquote')).toHaveLength(0);
expect(find(result.document, 'span').value).toBe('1');
@@ -870,7 +872,7 @@ describe('htmlToStructuredText', () => {
1
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'h1')).toHaveLength(0);
expect(find(result.document, 'span').value).toBe('1');
@@ -882,7 +884,7 @@ describe('htmlToStructuredText', () => {
1
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'code')).toHaveLength(0);
expect(find(result.document, 'span').value).toBe('1');
@@ -895,7 +897,7 @@ describe('htmlToStructuredText', () => {
3
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'link')).toHaveLength(2);
const items = findAll(result.document, 'listItem').map((listItem) =>
@@ -924,7 +926,7 @@ describe('htmlToStructuredText', () => {
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
const lists = findAll(result.document, 'list');
expect(lists).toHaveLength(1);
@@ -946,7 +948,7 @@ describe('htmlToStructuredText', () => {
1
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
const thematicBreaks = findAll(result.document, 'thematicBreak');
expect(thematicBreaks).toHaveLength(1);
@@ -958,7 +960,7 @@ describe('htmlToStructuredText', () => {
dato
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
allowedBlocks: [],
});
expect(validate(result).valid).toBeTruthy();
@@ -973,7 +975,7 @@ describe('htmlToStructuredText', () => {
const html = `
1
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].type).toBe('paragraph');
expect(find(find(result.document, 'paragraph'), 'link')).toBeTruthy();
@@ -984,7 +986,7 @@ describe('htmlToStructuredText', () => {
const html = `
1
2
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].type).toBe('heading');
expect(find(find(result.document, 'heading'), 'link')).toBeTruthy();
@@ -995,7 +997,7 @@ describe('htmlToStructuredText', () => {
const html = `
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'heading')).toHaveLength(0);
});
@@ -1005,7 +1007,7 @@ describe('htmlToStructuredText', () => {
const html = `
dato
2
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
allowedBlocks: [],
});
expect(validate(result).valid).toBeTruthy();
@@ -1044,7 +1046,7 @@ describe('htmlToStructuredText', () => {
const html = `
<${tagName}>${markName}${tagName}>
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
const span = find(result.document, 'span');
expect(span.marks).toBeTruthy();
@@ -1058,7 +1060,7 @@ describe('htmlToStructuredText', () => {
const html = `
<${tagName}>${markName}${tagName}>
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
allowedMarks: [],
});
expect(validate(result).valid).toBeTruthy();
@@ -1071,7 +1073,7 @@ describe('htmlToStructuredText', () => {
const html = `
emstrong-emu-strong-emstrong-emem
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(
findAll(result.document, 'span')
@@ -1094,7 +1096,7 @@ describe('htmlToStructuredText', () => {
describe('code', () => {
it('turns inline code tags to span with code mark', async () => {
const html = `To make it even easier to offer responsive, progressive images on your projects, we released a package called react-datocms that exposes an <Image /> component and pairs perfectly with the responsiveImage query.
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(findAll(result.document, 'code')).toHaveLength(0);
const spans = findAll(result.document, 'span').filter(
@@ -1115,7 +1117,7 @@ describe('htmlToStructuredText', () => {
const html = `
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result).toBe(null);
});
@@ -1124,7 +1126,7 @@ describe('htmlToStructuredText', () => {
const html = `
hello
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].type).toBe('paragraph');
expect(result.document.children).toHaveLength(1);
@@ -1139,7 +1141,7 @@ describe('htmlToStructuredText', () => {
const html = `
hello
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].type).toBe('paragraph');
expect(result.document.children).toHaveLength(1);
@@ -1154,7 +1156,7 @@ describe('htmlToStructuredText', () => {
const html = `
hello
world
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].type).toBe('paragraph');
expect(result.document.children).toHaveLength(1);
@@ -1169,7 +1171,7 @@ describe('htmlToStructuredText', () => {
const html = `
hello
world
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children[0].type).toBe('paragraph');
expect(result.document.children).toHaveLength(1);
@@ -1184,7 +1186,7 @@ describe('htmlToStructuredText', () => {
const html = `
hello
world
`;
- const result = await parse5HtmlToStructuredText(html);
+ const result = await parse5HtmlToStructuredTextShim(html);
expect(validate(result).valid).toBeTruthy();
expect(result.document.children.map((c) => c.type).join(',')).toBe(
'paragraph,paragraph',
@@ -1285,7 +1287,7 @@ describe('htmlToStructuredText', () => {
const html = `
heading
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
preprocess: (tree) => {
findAll(tree, (node) => {
if (node.type === 'element' && node.tagName === 'p') {
@@ -1320,7 +1322,7 @@ describe('htmlToStructuredText', () => {
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
preprocess: liftImages,
handlers: {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
@@ -1417,7 +1419,7 @@ describe('htmlToStructuredText', () => {
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
preprocess: liftImages,
handlers: {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
@@ -1471,7 +1473,7 @@ describe('htmlToStructuredText', () => {
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
preprocess: liftImages,
handlers: {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
@@ -1529,7 +1531,7 @@ describe('htmlToStructuredText', () => {
item 3
`;
- const result = await parse5HtmlToStructuredText(html, {
+ const result = await parse5HtmlToStructuredTextShim(html, {
preprocess: (tree) => {
visit(tree, (node, index, parents) => {
if (node.tagName === 'img' && parents.length > 1) {
@@ -1567,7 +1569,7 @@ describe('htmlToStructuredText', () => {
describe('preprocessors', () => {
describe('Google Docs', () => {
const googleDocsToStructuredText = (html: string, options: Options) =>
- parse5HtmlToStructuredText(html, {
+ parse5HtmlToStructuredTextShim(html, {
...options,
preprocess: googleDocsPreprocessor,
});
From f2282506f50b6d229b8eedff5887da12d0f44725 Mon Sep 17 00:00:00 2001
From: Roger Tuan
Date: Wed, 7 Jan 2026 14:09:10 -0800
Subject: [PATCH 3/5] Maintain old import order
---
packages/html-to-structured-text/__tests__/all.test.ts | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/packages/html-to-structured-text/__tests__/all.test.ts b/packages/html-to-structured-text/__tests__/all.test.ts
index b796483..f0e919f 100644
--- a/packages/html-to-structured-text/__tests__/all.test.ts
+++ b/packages/html-to-structured-text/__tests__/all.test.ts
@@ -1,11 +1,10 @@
/* eslint-disable @typescript-eslint/ban-ts-comment */
// @ts-nocheck
-import { Options, parse5ToStructuredText } from '../src';
+import { parse5ToStructuredText, Options } from '../src';
import parse5 from 'parse5';
import { allowedChildren, Span, validate } from 'datocms-structured-text-utils';
-import { CONTINUE, find, findAll, visit } from 'unist-utils-core';
-import googleDocsPreprocessor from '../src/preprocessors/google-docs';
+import { findAll, find, visit, CONTINUE } from 'unist-utils-core';
/* This is a shim for NodeJS. The actual CMS software usually uses DOMParser.
Don't call it htmlToStructuredText() because there is an exported function
From c5ddac9e9c83c31143bb6b910ce5ae73e34a8b23 Mon Sep 17 00:00:00 2001
From: Roger Tuan
Date: Wed, 7 Jan 2026 14:10:21 -0800
Subject: [PATCH 4/5] Add missing import
---
packages/html-to-structured-text/__tests__/all.test.ts | 1 +
1 file changed, 1 insertion(+)
diff --git a/packages/html-to-structured-text/__tests__/all.test.ts b/packages/html-to-structured-text/__tests__/all.test.ts
index f0e919f..d5dd9a9 100644
--- a/packages/html-to-structured-text/__tests__/all.test.ts
+++ b/packages/html-to-structured-text/__tests__/all.test.ts
@@ -5,6 +5,7 @@ import { parse5ToStructuredText, Options } from '../src';
import parse5 from 'parse5';
import { allowedChildren, Span, validate } from 'datocms-structured-text-utils';
import { findAll, find, visit, CONTINUE } from 'unist-utils-core';
+import googleDocsPreprocessor from '../src/preprocessors/google-docs';
/* This is a shim for NodeJS. The actual CMS software usually uses DOMParser.
Don't call it htmlToStructuredText() because there is an exported function
From c15779a8bdf217d9971e9e7376c2aad1cf3fdcbd Mon Sep 17 00:00:00 2001
From: Roger Tuan
Date: Wed, 7 Jan 2026 14:15:10 -0800
Subject: [PATCH 5/5] Missing an await
---
packages/html-to-structured-text/__tests__/all.test.ts | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/packages/html-to-structured-text/__tests__/all.test.ts b/packages/html-to-structured-text/__tests__/all.test.ts
index d5dd9a9..be608b3 100644
--- a/packages/html-to-structured-text/__tests__/all.test.ts
+++ b/packages/html-to-structured-text/__tests__/all.test.ts
@@ -1732,7 +1732,7 @@ describe('preprocessors', () => {
const firefoxHtml = `
This is a test of rich text.
`;
- assertRichTextPaste(firefoxHtml);
+ await assertRichTextPaste(firefoxHtml);
});
it('when copied from Chrome (Jan 2026)', async () => {