From eb71a79ac732a54d8898a468d03588273312dc6d Mon Sep 17 00:00:00 2001 From: Roger Tuan Date: Wed, 7 Jan 2026 13:50:09 -0800 Subject: [PATCH 1/5] Add new tests and clarify function name --- .../__tests__/all.test.ts | 231 ++++++++++++------ 1 file changed, 159 insertions(+), 72 deletions(-) diff --git a/packages/html-to-structured-text/__tests__/all.test.ts b/packages/html-to-structured-text/__tests__/all.test.ts index 46fb180..b14ac40 100644 --- a/packages/html-to-structured-text/__tests__/all.test.ts +++ b/packages/html-to-structured-text/__tests__/all.test.ts @@ -1,13 +1,14 @@ /* eslint-disable @typescript-eslint/ban-ts-comment */ // @ts-nocheck -import { parse5ToStructuredText, Options } from '../src'; +import { Options, parse5ToStructuredText } from '../src'; import parse5 from 'parse5'; import { allowedChildren, Span, validate } from 'datocms-structured-text-utils'; -import { findAll, find, visit, CONTINUE } from 'unist-utils-core'; +import { CONTINUE, find, findAll, visit } from 'unist-utils-core'; import googleDocsPreprocessor from '../src/preprocessors/google-docs'; -function htmlToStructuredText(html: string, options: Options = {}) { +// This is a shim for NodeJS. The actual CMS software uses DOMParser and can have different output, unfortunately +function parse5HtmlToStructuredText(html: string, options: Options = {}) { return parse5ToStructuredText( parse5.parse(html, { sourceCodeLocationInfo: true, @@ -19,14 +20,14 @@ function htmlToStructuredText(html: string, options: Options = {}) { describe('htmlToStructuredText', () => { it('works with empty document', async () => { const html = ''; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result).toMatchInlineSnapshot(`null`); }); it('ignores doctype and HTML comments', async () => { const html = ` `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result).toMatchInlineSnapshot(`null`); }); @@ -52,7 +53,7 @@ describe('htmlToStructuredText', () => { `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result).toMatchInlineSnapshot(`null`); }); @@ -63,7 +64,7 @@ describe('htmlToStructuredText', () => {

twice

`; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { handlers: { // eslint-disable-next-line @typescript-eslint/no-unused-vars text: async (createNode, node, context) => { @@ -93,7 +94,7 @@ describe('htmlToStructuredText', () => { const html = `

twice

`; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { handlers: { p: (createNode, node, context) => { return [ @@ -115,7 +116,7 @@ describe('htmlToStructuredText', () => {

already wrapped

needs wrapping `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { handlers: { // eslint-disable-next-line @typescript-eslint/no-unused-vars unknown: (createNode, node, context) => { @@ -144,7 +145,7 @@ describe('htmlToStructuredText', () => { const html = ` span `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { handlers: { // eslint-disable-next-line @typescript-eslint/no-unused-vars custom: async (createNode, node, context) => { @@ -164,7 +165,7 @@ describe('htmlToStructuredText', () => {
override

regular paragraph

`; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { handlers: { blockquote: async (createNode, node, context) => { // turn a blockquote into a paragraph @@ -188,7 +189,7 @@ describe('htmlToStructuredText', () => {

already wrapped

needs wrapping `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children.map((child) => child.type)) .toMatchInlineSnapshot(` @@ -214,7 +215,7 @@ describe('htmlToStructuredText', () => { hyperlink `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect( @@ -250,7 +251,7 @@ describe('htmlToStructuredText', () => { const html = ` `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { handlers: { base: async (createNode, node, context) => { expect(context.global.baseUrl).toBe(null); @@ -274,7 +275,7 @@ describe('htmlToStructuredText', () => { const html = ` contact `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); const { meta } = find(result.document, 'link'); expect(meta).toMatchInlineSnapshot(` @@ -300,7 +301,7 @@ describe('htmlToStructuredText', () => { contact `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(find(result.document, 'link').url).toBe( 'https://datocms.com/contact', @@ -312,7 +313,7 @@ describe('htmlToStructuredText', () => { contact `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(find(result.document, 'link').url).toBe( 'https://datocms.com/contact', @@ -324,7 +325,7 @@ describe('htmlToStructuredText', () => { contact `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(find(result.document, 'link').url).toBe( 'https://datocms.com/t/contact', @@ -336,7 +337,7 @@ describe('htmlToStructuredText', () => { contact `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(find(result.document, 'link').url).toBe( 'https://datocms.com/b/contact', @@ -348,7 +349,7 @@ describe('htmlToStructuredText', () => { contact `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { shared: { baseUrl: 'http://acme.com', }, @@ -364,7 +365,7 @@ describe('htmlToStructuredText', () => { contact `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { shared: { baseUrl: 'http://acme.com', baseUrlFound: true, @@ -387,7 +388,7 @@ describe('htmlToStructuredText', () => { span `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'paragraph')).toHaveLength(1); const spans = findAll(result.document, 'span'); @@ -427,7 +428,7 @@ describe('htmlToStructuredText', () => { nested implicit paragraph `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children.map((child) => child.type)) .toMatchInlineSnapshot(` @@ -448,7 +449,7 @@ describe('htmlToStructuredText', () => { [span becomes simple text]

`; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children).toMatchInlineSnapshot(` Array [ @@ -486,7 +487,7 @@ describe('htmlToStructuredText', () => {
[separate paragraph]

`; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children).toMatchInlineSnapshot(` Array [ @@ -518,7 +519,7 @@ describe('htmlToStructuredText', () => { const html = `

needs wrapping

`; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].type).toBe('heading'); expect(result.document.children[0].children[0].type).toBe('span'); @@ -529,7 +530,7 @@ describe('htmlToStructuredText', () => { needs wrapping

hello

`; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children.map((child) => child.type)) .toMatchInlineSnapshot(` @@ -546,7 +547,7 @@ describe('htmlToStructuredText', () => {

needs wrapping

hello

`; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { allowedHeadingLevels: [2], }); expect(validate(result).valid).toBeTruthy(); @@ -564,7 +565,7 @@ describe('htmlToStructuredText', () => { const html = `

p not allowed inside h1

`; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'paragraph')).toHaveLength(0); }); @@ -573,7 +574,7 @@ describe('htmlToStructuredText', () => { const html = `

span link

`; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].children).toMatchInlineSnapshot(` Array [ @@ -608,7 +609,7 @@ describe('htmlToStructuredText', () => { `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'heading')).toHaveLength(0); }); @@ -617,7 +618,7 @@ describe('htmlToStructuredText', () => { const html = `

dato

`; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { allowedBlocks: [], }); expect(validate(result).valid).toBeTruthy(); @@ -632,7 +633,7 @@ describe('htmlToStructuredText', () => { const html = `
<import src="file.html" />
`; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0]).toMatchInlineSnapshot(` Object { @@ -649,7 +650,7 @@ describe('htmlToStructuredText', () => {
  • dast()
  • `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(find(result.document, 'paragraph')).toBeTruthy(); expect(findAll(result.document, 'code')).toHaveLength(0); @@ -668,7 +669,7 @@ describe('htmlToStructuredText', () => { const html = ` dast() `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'code')).toHaveLength(1); expect(findAll(result.document, 'code')[0]).toMatchInlineSnapshot(` @@ -684,7 +685,7 @@ describe('htmlToStructuredText', () => { const html = ` dast() `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'code')).toHaveLength(1); expect(findAll(result.document, 'code')[0].language).toBeFalsy(); @@ -694,7 +695,7 @@ describe('htmlToStructuredText', () => { const html = ` let dato `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { allowedBlocks: [], }); expect(validate(result).valid).toBeTruthy(); @@ -707,7 +708,7 @@ describe('htmlToStructuredText', () => { const html = `
    foo
    bar
    `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children).toMatchInlineSnapshot(` Array [ @@ -727,7 +728,7 @@ describe('htmlToStructuredText', () => {
    1
    2
    `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children.map((child) => child.type)) .toMatchInlineSnapshot(` @@ -758,7 +759,7 @@ describe('htmlToStructuredText', () => { const html = `
    1
    2
    `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children).toMatchInlineSnapshot(` Array [ @@ -793,7 +794,7 @@ describe('htmlToStructuredText', () => { const html = `
    dato
    `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { allowedBlocks: [], }); expect(validate(result).valid).toBeTruthy(); @@ -808,7 +809,7 @@ describe('htmlToStructuredText', () => { const html = ` `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].style).toBe('bulleted'); }); @@ -817,7 +818,7 @@ describe('htmlToStructuredText', () => { const html = `
    1. test
    `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].style).toBe('numbered'); }); @@ -831,7 +832,7 @@ describe('htmlToStructuredText', () => {
  • 4

  • `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect( find(result.document, 'list').children.every( @@ -846,7 +847,7 @@ describe('htmlToStructuredText', () => {
  • `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(find(find(result.document, 'list'), 'list')).toBeTruthy(); }); @@ -857,7 +858,7 @@ describe('htmlToStructuredText', () => {
  • 1
  • `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'blockquote')).toHaveLength(0); expect(find(result.document, 'span').value).toBe('1'); @@ -869,7 +870,7 @@ describe('htmlToStructuredText', () => {
  • 1

  • `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'h1')).toHaveLength(0); expect(find(result.document, 'span').value).toBe('1'); @@ -881,7 +882,7 @@ describe('htmlToStructuredText', () => {
  • 1
  • `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'code')).toHaveLength(0); expect(find(result.document, 'span').value).toBe('1'); @@ -894,7 +895,7 @@ describe('htmlToStructuredText', () => { 3 `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'link')).toHaveLength(2); const items = findAll(result.document, 'listItem').map((listItem) => @@ -923,7 +924,7 @@ describe('htmlToStructuredText', () => { `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); const lists = findAll(result.document, 'list'); expect(lists).toHaveLength(1); @@ -945,7 +946,7 @@ describe('htmlToStructuredText', () => {
    1
    `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); const thematicBreaks = findAll(result.document, 'thematicBreak'); expect(thematicBreaks).toHaveLength(1); @@ -957,7 +958,7 @@ describe('htmlToStructuredText', () => {
  • dato
  • `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { allowedBlocks: [], }); expect(validate(result).valid).toBeTruthy(); @@ -972,7 +973,7 @@ describe('htmlToStructuredText', () => { const html = ` 1 `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].type).toBe('paragraph'); expect(find(find(result.document, 'paragraph'), 'link')).toBeTruthy(); @@ -983,7 +984,7 @@ describe('htmlToStructuredText', () => { const html = `

    1

    2
    `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].type).toBe('heading'); expect(find(find(result.document, 'heading'), 'link')).toBeTruthy(); @@ -994,7 +995,7 @@ describe('htmlToStructuredText', () => { const html = ` `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'heading')).toHaveLength(0); }); @@ -1004,7 +1005,7 @@ describe('htmlToStructuredText', () => { const html = `

    dato

    2
    `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { allowedBlocks: [], }); expect(validate(result).valid).toBeTruthy(); @@ -1043,7 +1044,7 @@ describe('htmlToStructuredText', () => { const html = `

    <${tagName}>${markName}

    `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); const span = find(result.document, 'span'); expect(span.marks).toBeTruthy(); @@ -1057,7 +1058,7 @@ describe('htmlToStructuredText', () => { const html = `

    <${tagName}>${markName}

    `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { allowedMarks: [], }); expect(validate(result).valid).toBeTruthy(); @@ -1070,7 +1071,7 @@ describe('htmlToStructuredText', () => { const html = `

    emstrong-emu-strong-emstrong-emem

    `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect( findAll(result.document, 'span') @@ -1093,7 +1094,7 @@ describe('htmlToStructuredText', () => { describe('code', () => { it('turns inline code tags to span with code mark', async () => { const html = `

    To make it even easier to offer responsive, progressive images on your projects, we released a package called react-datocms that exposes an <Image /> component and pairs perfectly with the responsiveImage query.

    `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'code')).toHaveLength(0); const spans = findAll(result.document, 'span').filter( @@ -1114,7 +1115,7 @@ describe('htmlToStructuredText', () => { const html = `
    `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result).toBe(null); }); @@ -1123,7 +1124,7 @@ describe('htmlToStructuredText', () => { const html = `

    hello `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].type).toBe('paragraph'); expect(result.document.children).toHaveLength(1); @@ -1138,7 +1139,7 @@ describe('htmlToStructuredText', () => { const html = ` hello

    `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].type).toBe('paragraph'); expect(result.document.children).toHaveLength(1); @@ -1153,7 +1154,7 @@ describe('htmlToStructuredText', () => { const html = ` hello
    world `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].type).toBe('paragraph'); expect(result.document.children).toHaveLength(1); @@ -1168,7 +1169,7 @@ describe('htmlToStructuredText', () => { const html = `

    hello
    world

    `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].type).toBe('paragraph'); expect(result.document.children).toHaveLength(1); @@ -1183,7 +1184,7 @@ describe('htmlToStructuredText', () => { const html = `

    hello


    world
    `; - const result = await htmlToStructuredText(html); + const result = await parse5HtmlToStructuredText(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children.map((c) => c.type).join(',')).toBe( 'paragraph,paragraph', @@ -1284,7 +1285,7 @@ describe('htmlToStructuredText', () => { const html = `

    heading

    `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { preprocess: (tree) => { findAll(tree, (node) => { if (node.type === 'element' && node.tagName === 'p') { @@ -1319,7 +1320,7 @@ describe('htmlToStructuredText', () => { `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { preprocess: liftImages, handlers: { // eslint-disable-next-line @typescript-eslint/no-unused-vars @@ -1416,7 +1417,7 @@ describe('htmlToStructuredText', () => { `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { preprocess: liftImages, handlers: { // eslint-disable-next-line @typescript-eslint/no-unused-vars @@ -1470,7 +1471,7 @@ describe('htmlToStructuredText', () => { `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { preprocess: liftImages, handlers: { // eslint-disable-next-line @typescript-eslint/no-unused-vars @@ -1528,7 +1529,7 @@ describe('htmlToStructuredText', () => {
  • item 3
  • `; - const result = await htmlToStructuredText(html, { + const result = await parse5HtmlToStructuredText(html, { preprocess: (tree) => { visit(tree, (node, index, parents) => { if (node.tagName === 'img' && parents.length > 1) { @@ -1566,7 +1567,7 @@ describe('htmlToStructuredText', () => { describe('preprocessors', () => { describe('Google Docs', () => { const googleDocsToStructuredText = (html: string, options: Options) => - htmlToStructuredText(html, { + parse5HtmlToStructuredText(html, { ...options, preprocess: googleDocsPreprocessor, }); @@ -1660,5 +1661,91 @@ describe('preprocessors', () => { expect(spans).toHaveLength(1); expect(spans[0].marks).toEqual(['strong']); }); + + /* Different browsers will copy different HTML from GDocs + See https://community.datocms.com/t/structure-text-editor-does-not-copy-paste-styles-anymore/8466/8 for discussion + You can use this gdoc as a test: https://docs.google.com/document/d/1H2WeqDd-VWywlds5big1_YVSNx3rrFn6FsIFVDuSWeg/edit?usp=sharing + And this tool to get the raw HTML in the clipboard: https://evercoder.github.io/clipboard-inspector/ + */ + describe('correctly preserves rich text formatting across browsers', () => { + const assertRichTextPaste = async (html: string) => { + const result = await googleDocsToStructuredText(html); + + expect(validate(result).valid).toBeTruthy(); + + // "This is a" should not be marked + expect( + findAll( + result.document, + (node) => + node.type === 'span' && + node.value === 'This is a ' && + node.marks === undefined, + ), + ).toHaveLength(1); + + // "test" should be bold + expect( + findAll( + result.document, + (node) => + node.type === 'span' && + node.value === 'test' && + Array.isArray(node.marks) && + node.marks.length === 1 && + node.marks[0] === 'strong', + ), + ).toHaveLength(1); + + // "rich" is italics + expect( + findAll( + result.document, + (node) => + node.type === 'span' && + node.value === 'rich' && + Array.isArray(node.marks) && + node.marks.length === 1 && + node.marks[0] === 'emphasis', + ), + ).toHaveLength(1); + + // "text" is bold, underline, and italics + expect( + findAll( + result.document, + (node) => + node.type === 'span' && + node.value === 'text' && + Array.isArray(node.marks) && + node.marks.length === 3 && + node.marks.includes('strong') && + node.marks.includes('emphasis') && + node.marks.includes('underline'), + ), + ).toHaveLength(1); + }; + + it('when copied from Firefox (Jan 2026)', async () => { + const firefoxHtml = ` +

    This is a test of rich text.

    + `; + assertRichTextPaste(firefoxHtml); + }); + + it('when copied from Chrome (Jan 2026)', async () => { + const chromeHtml = ` +

    This is a test of rich text.


    + `; + await assertRichTextPaste(chromeHtml); + }); + + it('when copied from Safari (Jan 2026)', async () => { + const safariHtml = ` +

    This is a test of rich text.



    + `; + await assertRichTextPaste(safariHtml); + }); + }); }); }); From 6d691e8e44fd34ee786cc8b7b86705887425b637 Mon Sep 17 00:00:00 2001 From: Roger Tuan Date: Wed, 7 Jan 2026 14:04:11 -0800 Subject: [PATCH 2/5] Clarify function name --- .../__tests__/all.test.ts | 144 +++++++++--------- 1 file changed, 73 insertions(+), 71 deletions(-) diff --git a/packages/html-to-structured-text/__tests__/all.test.ts b/packages/html-to-structured-text/__tests__/all.test.ts index b14ac40..b796483 100644 --- a/packages/html-to-structured-text/__tests__/all.test.ts +++ b/packages/html-to-structured-text/__tests__/all.test.ts @@ -7,8 +7,10 @@ import { allowedChildren, Span, validate } from 'datocms-structured-text-utils'; import { CONTINUE, find, findAll, visit } from 'unist-utils-core'; import googleDocsPreprocessor from '../src/preprocessors/google-docs'; -// This is a shim for NodeJS. The actual CMS software uses DOMParser and can have different output, unfortunately -function parse5HtmlToStructuredText(html: string, options: Options = {}) { +/* This is a shim for NodeJS. The actual CMS software usually uses DOMParser. + Don't call it htmlToStructuredText() because there is an exported function + by that same name already (in ../src) */ +function parse5HtmlToStructuredTextShim(html: string, options: Options = {}) { return parse5ToStructuredText( parse5.parse(html, { sourceCodeLocationInfo: true, @@ -20,14 +22,14 @@ function parse5HtmlToStructuredText(html: string, options: Options = {}) { describe('htmlToStructuredText', () => { it('works with empty document', async () => { const html = ''; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result).toMatchInlineSnapshot(`null`); }); it('ignores doctype and HTML comments', async () => { const html = ` `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result).toMatchInlineSnapshot(`null`); }); @@ -53,7 +55,7 @@ describe('htmlToStructuredText', () => { `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result).toMatchInlineSnapshot(`null`); }); @@ -64,7 +66,7 @@ describe('htmlToStructuredText', () => {

    twice

    `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { handlers: { // eslint-disable-next-line @typescript-eslint/no-unused-vars text: async (createNode, node, context) => { @@ -94,7 +96,7 @@ describe('htmlToStructuredText', () => { const html = `

    twice

    `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { handlers: { p: (createNode, node, context) => { return [ @@ -116,7 +118,7 @@ describe('htmlToStructuredText', () => {

    already wrapped

    needs wrapping `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { handlers: { // eslint-disable-next-line @typescript-eslint/no-unused-vars unknown: (createNode, node, context) => { @@ -145,7 +147,7 @@ describe('htmlToStructuredText', () => { const html = ` span `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { handlers: { // eslint-disable-next-line @typescript-eslint/no-unused-vars custom: async (createNode, node, context) => { @@ -165,7 +167,7 @@ describe('htmlToStructuredText', () => {
    override

    regular paragraph

    `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { handlers: { blockquote: async (createNode, node, context) => { // turn a blockquote into a paragraph @@ -189,7 +191,7 @@ describe('htmlToStructuredText', () => {

    already wrapped

    needs wrapping `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children.map((child) => child.type)) .toMatchInlineSnapshot(` @@ -215,7 +217,7 @@ describe('htmlToStructuredText', () => { hyperlink `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect( @@ -251,7 +253,7 @@ describe('htmlToStructuredText', () => { const html = ` `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { handlers: { base: async (createNode, node, context) => { expect(context.global.baseUrl).toBe(null); @@ -275,7 +277,7 @@ describe('htmlToStructuredText', () => { const html = ` contact `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); const { meta } = find(result.document, 'link'); expect(meta).toMatchInlineSnapshot(` @@ -301,7 +303,7 @@ describe('htmlToStructuredText', () => { contact `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(find(result.document, 'link').url).toBe( 'https://datocms.com/contact', @@ -313,7 +315,7 @@ describe('htmlToStructuredText', () => { contact `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(find(result.document, 'link').url).toBe( 'https://datocms.com/contact', @@ -325,7 +327,7 @@ describe('htmlToStructuredText', () => { contact `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(find(result.document, 'link').url).toBe( 'https://datocms.com/t/contact', @@ -337,7 +339,7 @@ describe('htmlToStructuredText', () => { contact `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(find(result.document, 'link').url).toBe( 'https://datocms.com/b/contact', @@ -349,7 +351,7 @@ describe('htmlToStructuredText', () => { contact `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { shared: { baseUrl: 'http://acme.com', }, @@ -365,7 +367,7 @@ describe('htmlToStructuredText', () => { contact `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { shared: { baseUrl: 'http://acme.com', baseUrlFound: true, @@ -388,7 +390,7 @@ describe('htmlToStructuredText', () => { span `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'paragraph')).toHaveLength(1); const spans = findAll(result.document, 'span'); @@ -428,7 +430,7 @@ describe('htmlToStructuredText', () => { nested implicit paragraph `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children.map((child) => child.type)) .toMatchInlineSnapshot(` @@ -449,7 +451,7 @@ describe('htmlToStructuredText', () => { [span becomes simple text]

    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children).toMatchInlineSnapshot(` Array [ @@ -487,7 +489,7 @@ describe('htmlToStructuredText', () => {
    [separate paragraph]

    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children).toMatchInlineSnapshot(` Array [ @@ -519,7 +521,7 @@ describe('htmlToStructuredText', () => { const html = `

    needs wrapping

    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].type).toBe('heading'); expect(result.document.children[0].children[0].type).toBe('span'); @@ -530,7 +532,7 @@ describe('htmlToStructuredText', () => { needs wrapping

    hello

    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children.map((child) => child.type)) .toMatchInlineSnapshot(` @@ -547,7 +549,7 @@ describe('htmlToStructuredText', () => {

    needs wrapping

    hello

    `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { allowedHeadingLevels: [2], }); expect(validate(result).valid).toBeTruthy(); @@ -565,7 +567,7 @@ describe('htmlToStructuredText', () => { const html = `

    p not allowed inside h1

    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'paragraph')).toHaveLength(0); }); @@ -574,7 +576,7 @@ describe('htmlToStructuredText', () => { const html = `

    span link

    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].children).toMatchInlineSnapshot(` Array [ @@ -609,7 +611,7 @@ describe('htmlToStructuredText', () => { `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'heading')).toHaveLength(0); }); @@ -618,7 +620,7 @@ describe('htmlToStructuredText', () => { const html = `

    dato

    `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { allowedBlocks: [], }); expect(validate(result).valid).toBeTruthy(); @@ -633,7 +635,7 @@ describe('htmlToStructuredText', () => { const html = `
    <import src="file.html" />
    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0]).toMatchInlineSnapshot(` Object { @@ -650,7 +652,7 @@ describe('htmlToStructuredText', () => {
  • dast()
  • `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(find(result.document, 'paragraph')).toBeTruthy(); expect(findAll(result.document, 'code')).toHaveLength(0); @@ -669,7 +671,7 @@ describe('htmlToStructuredText', () => { const html = ` dast() `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'code')).toHaveLength(1); expect(findAll(result.document, 'code')[0]).toMatchInlineSnapshot(` @@ -685,7 +687,7 @@ describe('htmlToStructuredText', () => { const html = ` dast() `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'code')).toHaveLength(1); expect(findAll(result.document, 'code')[0].language).toBeFalsy(); @@ -695,7 +697,7 @@ describe('htmlToStructuredText', () => { const html = ` let dato `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { allowedBlocks: [], }); expect(validate(result).valid).toBeTruthy(); @@ -708,7 +710,7 @@ describe('htmlToStructuredText', () => { const html = `
    foo
    bar
    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children).toMatchInlineSnapshot(` Array [ @@ -728,7 +730,7 @@ describe('htmlToStructuredText', () => {
    1
    2
    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children.map((child) => child.type)) .toMatchInlineSnapshot(` @@ -759,7 +761,7 @@ describe('htmlToStructuredText', () => { const html = `
    1
    2
    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children).toMatchInlineSnapshot(` Array [ @@ -794,7 +796,7 @@ describe('htmlToStructuredText', () => { const html = `
    dato
    `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { allowedBlocks: [], }); expect(validate(result).valid).toBeTruthy(); @@ -809,7 +811,7 @@ describe('htmlToStructuredText', () => { const html = ` `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].style).toBe('bulleted'); }); @@ -818,7 +820,7 @@ describe('htmlToStructuredText', () => { const html = `
    1. test
    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].style).toBe('numbered'); }); @@ -832,7 +834,7 @@ describe('htmlToStructuredText', () => {
  • 4

  • `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect( find(result.document, 'list').children.every( @@ -847,7 +849,7 @@ describe('htmlToStructuredText', () => {
    • 1
  • `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(find(find(result.document, 'list'), 'list')).toBeTruthy(); }); @@ -858,7 +860,7 @@ describe('htmlToStructuredText', () => {
  • 1
  • `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'blockquote')).toHaveLength(0); expect(find(result.document, 'span').value).toBe('1'); @@ -870,7 +872,7 @@ describe('htmlToStructuredText', () => {
  • 1

  • `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'h1')).toHaveLength(0); expect(find(result.document, 'span').value).toBe('1'); @@ -882,7 +884,7 @@ describe('htmlToStructuredText', () => {
  • 1
  • `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'code')).toHaveLength(0); expect(find(result.document, 'span').value).toBe('1'); @@ -895,7 +897,7 @@ describe('htmlToStructuredText', () => { 3 `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'link')).toHaveLength(2); const items = findAll(result.document, 'listItem').map((listItem) => @@ -924,7 +926,7 @@ describe('htmlToStructuredText', () => { `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); const lists = findAll(result.document, 'list'); expect(lists).toHaveLength(1); @@ -946,7 +948,7 @@ describe('htmlToStructuredText', () => {
    1
    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); const thematicBreaks = findAll(result.document, 'thematicBreak'); expect(thematicBreaks).toHaveLength(1); @@ -958,7 +960,7 @@ describe('htmlToStructuredText', () => {
  • dato
  • `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { allowedBlocks: [], }); expect(validate(result).valid).toBeTruthy(); @@ -973,7 +975,7 @@ describe('htmlToStructuredText', () => { const html = ` 1 `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].type).toBe('paragraph'); expect(find(find(result.document, 'paragraph'), 'link')).toBeTruthy(); @@ -984,7 +986,7 @@ describe('htmlToStructuredText', () => { const html = `

    1

    2
    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].type).toBe('heading'); expect(find(find(result.document, 'heading'), 'link')).toBeTruthy(); @@ -995,7 +997,7 @@ describe('htmlToStructuredText', () => { const html = ` `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'heading')).toHaveLength(0); }); @@ -1005,7 +1007,7 @@ describe('htmlToStructuredText', () => { const html = `

    dato

    2
    `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { allowedBlocks: [], }); expect(validate(result).valid).toBeTruthy(); @@ -1044,7 +1046,7 @@ describe('htmlToStructuredText', () => { const html = `

    <${tagName}>${markName}

    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); const span = find(result.document, 'span'); expect(span.marks).toBeTruthy(); @@ -1058,7 +1060,7 @@ describe('htmlToStructuredText', () => { const html = `

    <${tagName}>${markName}

    `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { allowedMarks: [], }); expect(validate(result).valid).toBeTruthy(); @@ -1071,7 +1073,7 @@ describe('htmlToStructuredText', () => { const html = `

    emstrong-emu-strong-emstrong-emem

    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect( findAll(result.document, 'span') @@ -1094,7 +1096,7 @@ describe('htmlToStructuredText', () => { describe('code', () => { it('turns inline code tags to span with code mark', async () => { const html = `

    To make it even easier to offer responsive, progressive images on your projects, we released a package called react-datocms that exposes an <Image /> component and pairs perfectly with the responsiveImage query.

    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(findAll(result.document, 'code')).toHaveLength(0); const spans = findAll(result.document, 'span').filter( @@ -1115,7 +1117,7 @@ describe('htmlToStructuredText', () => { const html = `
    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result).toBe(null); }); @@ -1124,7 +1126,7 @@ describe('htmlToStructuredText', () => { const html = `

    hello `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].type).toBe('paragraph'); expect(result.document.children).toHaveLength(1); @@ -1139,7 +1141,7 @@ describe('htmlToStructuredText', () => { const html = ` hello

    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].type).toBe('paragraph'); expect(result.document.children).toHaveLength(1); @@ -1154,7 +1156,7 @@ describe('htmlToStructuredText', () => { const html = ` hello
    world `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].type).toBe('paragraph'); expect(result.document.children).toHaveLength(1); @@ -1169,7 +1171,7 @@ describe('htmlToStructuredText', () => { const html = `

    hello
    world

    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children[0].type).toBe('paragraph'); expect(result.document.children).toHaveLength(1); @@ -1184,7 +1186,7 @@ describe('htmlToStructuredText', () => { const html = `

    hello


    world
    `; - const result = await parse5HtmlToStructuredText(html); + const result = await parse5HtmlToStructuredTextShim(html); expect(validate(result).valid).toBeTruthy(); expect(result.document.children.map((c) => c.type).join(',')).toBe( 'paragraph,paragraph', @@ -1285,7 +1287,7 @@ describe('htmlToStructuredText', () => { const html = `

    heading

    `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { preprocess: (tree) => { findAll(tree, (node) => { if (node.type === 'element' && node.tagName === 'p') { @@ -1320,7 +1322,7 @@ describe('htmlToStructuredText', () => { `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { preprocess: liftImages, handlers: { // eslint-disable-next-line @typescript-eslint/no-unused-vars @@ -1417,7 +1419,7 @@ describe('htmlToStructuredText', () => { `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { preprocess: liftImages, handlers: { // eslint-disable-next-line @typescript-eslint/no-unused-vars @@ -1471,7 +1473,7 @@ describe('htmlToStructuredText', () => { `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { preprocess: liftImages, handlers: { // eslint-disable-next-line @typescript-eslint/no-unused-vars @@ -1529,7 +1531,7 @@ describe('htmlToStructuredText', () => {
  • item 3
  • `; - const result = await parse5HtmlToStructuredText(html, { + const result = await parse5HtmlToStructuredTextShim(html, { preprocess: (tree) => { visit(tree, (node, index, parents) => { if (node.tagName === 'img' && parents.length > 1) { @@ -1567,7 +1569,7 @@ describe('htmlToStructuredText', () => { describe('preprocessors', () => { describe('Google Docs', () => { const googleDocsToStructuredText = (html: string, options: Options) => - parse5HtmlToStructuredText(html, { + parse5HtmlToStructuredTextShim(html, { ...options, preprocess: googleDocsPreprocessor, }); From f2282506f50b6d229b8eedff5887da12d0f44725 Mon Sep 17 00:00:00 2001 From: Roger Tuan Date: Wed, 7 Jan 2026 14:09:10 -0800 Subject: [PATCH 3/5] Maintain old import order --- packages/html-to-structured-text/__tests__/all.test.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/html-to-structured-text/__tests__/all.test.ts b/packages/html-to-structured-text/__tests__/all.test.ts index b796483..f0e919f 100644 --- a/packages/html-to-structured-text/__tests__/all.test.ts +++ b/packages/html-to-structured-text/__tests__/all.test.ts @@ -1,11 +1,10 @@ /* eslint-disable @typescript-eslint/ban-ts-comment */ // @ts-nocheck -import { Options, parse5ToStructuredText } from '../src'; +import { parse5ToStructuredText, Options } from '../src'; import parse5 from 'parse5'; import { allowedChildren, Span, validate } from 'datocms-structured-text-utils'; -import { CONTINUE, find, findAll, visit } from 'unist-utils-core'; -import googleDocsPreprocessor from '../src/preprocessors/google-docs'; +import { findAll, find, visit, CONTINUE } from 'unist-utils-core'; /* This is a shim for NodeJS. The actual CMS software usually uses DOMParser. Don't call it htmlToStructuredText() because there is an exported function From c5ddac9e9c83c31143bb6b910ce5ae73e34a8b23 Mon Sep 17 00:00:00 2001 From: Roger Tuan Date: Wed, 7 Jan 2026 14:10:21 -0800 Subject: [PATCH 4/5] Add missing import --- packages/html-to-structured-text/__tests__/all.test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/html-to-structured-text/__tests__/all.test.ts b/packages/html-to-structured-text/__tests__/all.test.ts index f0e919f..d5dd9a9 100644 --- a/packages/html-to-structured-text/__tests__/all.test.ts +++ b/packages/html-to-structured-text/__tests__/all.test.ts @@ -5,6 +5,7 @@ import { parse5ToStructuredText, Options } from '../src'; import parse5 from 'parse5'; import { allowedChildren, Span, validate } from 'datocms-structured-text-utils'; import { findAll, find, visit, CONTINUE } from 'unist-utils-core'; +import googleDocsPreprocessor from '../src/preprocessors/google-docs'; /* This is a shim for NodeJS. The actual CMS software usually uses DOMParser. Don't call it htmlToStructuredText() because there is an exported function From c15779a8bdf217d9971e9e7376c2aad1cf3fdcbd Mon Sep 17 00:00:00 2001 From: Roger Tuan Date: Wed, 7 Jan 2026 14:15:10 -0800 Subject: [PATCH 5/5] Missing an await --- packages/html-to-structured-text/__tests__/all.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/html-to-structured-text/__tests__/all.test.ts b/packages/html-to-structured-text/__tests__/all.test.ts index d5dd9a9..be608b3 100644 --- a/packages/html-to-structured-text/__tests__/all.test.ts +++ b/packages/html-to-structured-text/__tests__/all.test.ts @@ -1732,7 +1732,7 @@ describe('preprocessors', () => { const firefoxHtml = `

    This is a test of rich text.

    `; - assertRichTextPaste(firefoxHtml); + await assertRichTextPaste(firefoxHtml); }); it('when copied from Chrome (Jan 2026)', async () => {