diff --git a/spec/dotted_tagname_spec.js b/spec/dotted_tagname_spec.js new file mode 100644 index 00000000..0ce450b0 --- /dev/null +++ b/spec/dotted_tagname_spec.js @@ -0,0 +1,182 @@ +"use strict"; + +import { XMLParser } from "../src/fxp.js"; + +describe("XMLParser with dotted tag names", function () { + + it("should correctly parse tags containing dots", function () { + const xmlData = ` + + + Acme Corp + US + + + `; + + const expected = { + "root": { + "catalog": { + "product": { + "vendor.name": "Acme Corp", + "vendor.country": "US" + } + } + } + }; + + const parser = new XMLParser(); + const result = parser.parse(xmlData); + expect(result).toEqual(expected); + }); + + it("should correctly parse nested dotted tag names", function () { + const xmlData = `val`; + + const expected = { + "root": { + "a.b": { + "c.d": "val" + } + } + }; + + const parser = new XMLParser(); + const result = parser.parse(xmlData); + expect(result).toEqual(expected); + }); + + it("should correctly parse tag names with multiple dots", function () { + const xmlData = `val`; + + const expected = { + "root": { + "a.b.c": "val" + } + }; + + const parser = new XMLParser(); + const result = parser.parse(xmlData); + expect(result).toEqual(expected); + }); + + it("should correctly parse self-closing tags with dots", function () { + const xmlData = ``; + + const expected = { + "root": { + "a.b": "" + } + }; + + const parser = new XMLParser(); + const result = parser.parse(xmlData); + expect(result).toEqual(expected); + }); + + it("should correctly parse dotted tags with attributes", function () { + const xmlData = `val`; + + const expected = { + "root": { + "a.b": { + "#text": "val", + "@_id": "1" + } + } + }; + + const parser = new XMLParser({ ignoreAttributes: false }); + const result = parser.parse(xmlData); + expect(result).toEqual(expected); + }); + + it("should correctly parse repeated dotted tags without jPath accumulation", function () { + const xmlData = '' + 'x'.repeat(100) + ''; + + const parser = new XMLParser(); + const result = parser.parse(xmlData); + + expect(result.root.item.length).toBe(100); + for (const item of result.root.item) { + expect(item["a.b"]).toBe("x"); + } + }); + + it("should correctly parse dotted tags with preserveOrder", function () { + const xmlData = `val`; + + const parser = new XMLParser({ preserveOrder: true }); + const result = parser.parse(xmlData); + + expect(result[0]["root"][0]["a.b"][0]["#text"]).toBe("val"); + }); + + it("should correctly handle dotted tags mixed with normal tags", function () { + const xmlData = ` + text + value + text2 + `; + + const expected = { + "root": { + "normal": "text", + "dotted.tag": "value", + "another": "text2" + } + }; + + const parser = new XMLParser(); + const result = parser.parse(xmlData); + expect(result).toEqual(expected); + }); + + it("should correctly parse dotted tags with unpaired tags", function () { + const xmlData = ` + val +
+ val2 +
`; + + const expected = { + "root": { + "a.b": "val", + "br": "", + "c.d": "val2" + } + }; + + const parser = new XMLParser({ unpairedTags: ["br"] }); + const result = parser.parse(xmlData); + expect(result).toEqual(expected); + }); + + it("should correctly parse dotted tags with stopNodes", function () { + const xmlData = `

raw

`; + + const expected = { + "root": { + "a.b": "

raw

" + } + }; + + const parser = new XMLParser({ stopNodes: ["root.a.b"] }); + const result = parser.parse(xmlData); + expect(result).toEqual(expected); + }); + + it("should not degrade to O(n²) with many dotted tags", function () { + const n = 5000; + const xml = '' + 'x'.repeat(n) + ''; + + const parser = new XMLParser(); + const start = performance.now(); + parser.parse(xml); + const elapsed = performance.now() - start; + + // With the bug, this would take many seconds due to O(n²) jPath growth. + // With the fix, this should complete well under 2 seconds. + expect(elapsed).toBeLessThan(2000); + }); +}); diff --git a/src/xmlparser/OrderedObjParser.js b/src/xmlparser/OrderedObjParser.js index 187b5dee..1c16fd7a 100644 --- a/src/xmlparser/OrderedObjParser.js +++ b/src/xmlparser/OrderedObjParser.js @@ -238,18 +238,15 @@ const parseXml = function (xmlData) { } //check if last tag of nested tag was unpaired tag - const lastTagName = jPath.substring(jPath.lastIndexOf(".") + 1); + const lastTagName = currentNode ? currentNode.tagname : ""; if (tagName && this.options.unpairedTags.indexOf(tagName) !== -1) { throw new Error(`Unpaired tag can not be used as closing tag: `); } - let propIndex = 0 if (lastTagName && this.options.unpairedTags.indexOf(lastTagName) !== -1) { - propIndex = jPath.lastIndexOf('.', jPath.lastIndexOf('.') - 1) + jPath = popTagFromJPath(jPath, lastTagName); this.tagsNodeStack.pop(); - } else { - propIndex = jPath.lastIndexOf("."); } - jPath = jPath.substring(0, propIndex); + jPath = popTagFromJPath(jPath, tagName); currentNode = this.tagsNodeStack.pop();//avoid recursion, set the parent tag scope textData = ""; @@ -342,7 +339,7 @@ const parseXml = function (xmlData) { const lastTag = currentNode; if (lastTag && this.options.unpairedTags.indexOf(lastTag.tagname) !== -1) { currentNode = this.tagsNodeStack.pop(); - jPath = jPath.substring(0, jPath.lastIndexOf(".")); + jPath = popTagFromJPath(jPath, lastTag.tagname); } if (tagName !== xmlObj.tagname) { jPath += jPath ? "." + tagName : tagName; @@ -384,7 +381,7 @@ const parseXml = function (xmlData) { tagContent = this.parseTextData(tagContent, tagName, jPath, true, attrExpPresent, true, true); } - jPath = jPath.substr(0, jPath.lastIndexOf(".")); + jPath = popTagFromJPath(jPath, tagName); childNode.add(this.options.textNodeName, tagContent); this.addChild(currentNode, childNode, jPath, startIndex); @@ -412,7 +409,7 @@ const parseXml = function (xmlData) { childNode[":@"] = this.buildAttributesMap(tagExp, jPath, tagName); } this.addChild(currentNode, childNode, jPath, startIndex); - jPath = jPath.substr(0, jPath.lastIndexOf(".")); + jPath = popTagFromJPath(jPath, tagName); } else if(this.options.unpairedTags.indexOf(tagName) !== -1){//unpaired tag const childNode = new xmlNode(tagName); @@ -420,7 +417,7 @@ const parseXml = function (xmlData) { childNode[":@"] = this.buildAttributesMap(tagExp, jPath); } this.addChild(currentNode, childNode, jPath, startIndex); - jPath = jPath.substr(0, jPath.lastIndexOf(".")); + jPath = popTagFromJPath(jPath, tagName); i = result.closeIndex; // Continue to next iteration without changing currentNode continue; @@ -718,6 +715,16 @@ function parseValue(val, shouldParse, options) { } } +/** + * Remove the last tag name from jPath. + * Since jPath is built by appending "." + tagName, we reverse it + * using the known tag name length rather than searching for "." + * which would break when tag names themselves contain dots. + */ +function popTagFromJPath(jPath, tagName) { + return jPath.substring(0, jPath.length - tagName.length - 1); +} + function fromCodePoint(str, base, prefix) { const codePoint = Number.parseInt(str, base);