From c401cb84e33a2aa0bb76da39dc4358f134ef2be6 Mon Sep 17 00:00:00 2001 From: Ryan Nystrom Date: Tue, 3 Dec 2024 14:40:19 -0500 Subject: [PATCH 1/2] fix: trim ld+json before parsing --- lib/extract.ts | 2 +- tests/unit/static.spec.ts | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/lib/extract.ts b/lib/extract.ts index 0c6bf52..f50fb74 100644 --- a/lib/extract.ts +++ b/lib/extract.ts @@ -95,7 +95,7 @@ export default function extractMetaTags(body: string, options: OpenGraphScraperO $('script').each((index, script) => { if (script.attribs.type && script.attribs.type === 'application/ld+json') { if (!ogObject.jsonLD) ogObject.jsonLD = []; - let scriptText = $(script).text(); + let scriptText = $(script).text().trim(); if (scriptText) { scriptText = scriptText.replace(/(\r\n|\n|\r)/gm, ''); // remove newlines scriptText = unescapeScriptText(scriptText); diff --git a/tests/unit/static.spec.ts b/tests/unit/static.spec.ts index d7c3ccd..5ac1790 100644 --- a/tests/unit/static.spec.ts +++ b/tests/unit/static.spec.ts @@ -257,6 +257,28 @@ describe('static check meta tags', function () { }); }); + it('jsonLD - empty with whitespace', function () { + const metaHTML = ` + + `; + + mockAgent.get('http://www.test.com') + .intercept({ path: '/' }) + .reply(200, metaHTML); + + return ogs({ url: 'www.test.com' }) + .then(function (data) { + expect(data.result.success).to.be.eql(true); + expect(data.result.requestUrl).to.be.eql('http://www.test.com'); + expect(data.result.jsonLD).to.be.eql([{ foo: ' bar ' }]); + expect(data.html).to.be.eql(metaHTML); + expect(data.response).to.be.a('response'); + }); + }); + it('encoding - utf-8', function () { /* eslint-disable max-len */ const metaHTML = ` From 51cd42d0f98dfc6303f7eb2c68775f898868df8d Mon Sep 17 00:00:00 2001 From: Ryan Nystrom Date: Tue, 3 Dec 2024 15:08:18 -0500 Subject: [PATCH 2/2] fix tests --- tests/unit/static.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/static.spec.ts b/tests/unit/static.spec.ts index 5ac1790..6a29541 100644 --- a/tests/unit/static.spec.ts +++ b/tests/unit/static.spec.ts @@ -273,7 +273,7 @@ describe('static check meta tags', function () { .then(function (data) { expect(data.result.success).to.be.eql(true); expect(data.result.requestUrl).to.be.eql('http://www.test.com'); - expect(data.result.jsonLD).to.be.eql([{ foo: ' bar ' }]); + expect(data.result.jsonLD).to.be.eql([]); expect(data.html).to.be.eql(metaHTML); expect(data.response).to.be.a('response'); });