Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions src/document.mts
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ function parse<Input>(
ctxt,
source,
url,
null,
options.encoding ?? null,
xmlOptions,
);
try {
Expand Down Expand Up @@ -281,12 +281,21 @@ export class XmlDocument extends XmlDisposable<XmlDocument> {

/**
* Parse and create an {@link XmlDocument} from an XML string.
*
* Note: Only UTF-8 encoding is supported for string input.
* For other encodings, use {@link fromBuffer} instead.
*
* @param source The XML string
* @param options Parsing options
* @throws Error when encoding is not 'utf-8'
*/
static fromString(
source: string,
options: ParseOptions = {},
): XmlDocument {
if (options.encoding && options.encoding !== 'utf-8') {
throw new XmlError('Non-UTF-8 encoding is not supported for string input, use fromBuffer instead');
}
return parse(xmlReadString, source, options.url ?? null, options);
}

Expand All @@ -304,13 +313,22 @@ export class XmlDocument extends XmlDisposable<XmlDocument> {

/**
* Save the XmlDocument to a string
*
* By default, it outputs utf-8 encoded bytes,
* while `ascii` is another allowed option for `options.encoding`,
* which converts non-ascii characters into numeric character references.
*
* @param options options to adjust the saving behavior
* @see {@link save}
* @see {@link XmlElement#toString}
*/
toString(options?: SaveOptions): string {
const saveOptions = options ?? { format: true };
if (saveOptions.encoding && saveOptions.encoding !== 'utf-8' && saveOptions.encoding !== 'ascii') {
throw new XmlError('Only utf-8 or ascii is supported in toString(). For other encodings, use save().');
}
const handler = new XmlStringOutputBufferHandler();
this.save(handler, options);
this.save(handler, { encoding: 'utf-8', ...saveOptions });

return handler.result;
}
Expand All @@ -327,13 +345,15 @@ export class XmlDocument extends XmlDisposable<XmlDocument> {
/**
* Save the XmlDocument to a buffer and invoke the callbacks to process.
*
* By default, it outputs with original encoding.
*
* @param handler handlers to process the content in the buffer
* @param options options to adjust the saving behavior
* @see {@link toString}
* @see {@link XmlElement#save}
*/
save(handler: XmlOutputBufferHandler, options?: SaveOptions) {
const ctxt = xmlSaveToIO(handler, null, xmlSaveOption(options));
const ctxt = xmlSaveToIO(handler, options?.encoding ?? null, xmlSaveOption(options));
if (options?.indentString) {
if (xmlSaveSetIndentString(ctxt, options.indentString) < 0) {
throw new XmlError('Failed to set indent string');
Expand Down
18 changes: 14 additions & 4 deletions src/libxml2.mts
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,9 @@ export function xmlReadString(
return withStringUTF8(
xmlString,
(xmlBuf, len) => withStrings(
(urlBuf) => libxml2._xmlCtxtReadMemory(ctxt, xmlBuf, len, urlBuf, 0, options),
(urlBuf, enc) => libxml2._xmlCtxtReadMemory(ctxt, xmlBuf, len, urlBuf, enc, options),
url,
encoding,
),
);
}
Expand All @@ -147,8 +148,9 @@ export function xmlReadMemory(
return withCString(
xmlBuffer,
(xmlBuf, len) => withStrings(
(urlBuf) => libxml2._xmlCtxtReadMemory(ctxt, xmlBuf, len, urlBuf, 0, options),
(urlBuf, enc) => libxml2._xmlCtxtReadMemory(ctxt, xmlBuf, len, urlBuf, enc, options),
url,
encoding,
),
);
}
Expand Down Expand Up @@ -518,6 +520,12 @@ export interface SaveOptions {
* @default Two spaces: " "
*/
indentString?: string;
/**
* The encoding to use for the output.
*
* @default The original encoding of the document or utf-8
*/
encoding?: string;
}

export function xmlSaveOption(options?: SaveOptions): number {
Expand Down Expand Up @@ -580,8 +588,10 @@ export function xmlSaveToIO(
format: number,
): XmlSaveCtxtPtr {
const index = outputHandlerStorage.allocate(handler); // will be freed in outputClose
// Support only UTF-8 as of now
return libxml2._xmlSaveToIO(outputWrite, outputClose, index, 0, format);
return withStringUTF8(
encoding,
(encBuf) => libxml2._xmlSaveToIO(outputWrite, outputClose, index, encBuf, format),
);
}

enum XmlParserInputFlags {
Expand Down
15 changes: 13 additions & 2 deletions src/nodes.mts
Original file line number Diff line number Diff line change
Expand Up @@ -801,13 +801,15 @@ export class XmlElement extends XmlTreeNode {
/**
* Save the XmlElement to a buffer and invoke the callbacks to process.
*
* By default, it outputs utf-8 encoded bytes. Use `options.encoding` to change it.
*
* @param handler handlers to process the content in the buffer
* @param options options to adjust the saving behavior
* @see {@link toString}
* @see {@link XmlDocument#save}
*/
save(handler: XmlOutputBufferHandler, options?: SaveOptions) {
const ctxt = xmlSaveToIO(handler, null, xmlSaveOption(options));
const ctxt = xmlSaveToIO(handler, options?.encoding ?? 'utf-8', xmlSaveOption(options));
if (options?.indentString) {
if (xmlSaveSetIndentString(ctxt, options.indentString) < 0) {
throw new XmlError('Failed to set indent string');
Expand All @@ -819,13 +821,22 @@ export class XmlElement extends XmlTreeNode {

/**
* Save the XmlElement to a string
*
* By default, it outputs utf-8 encoded bytes,
* while `ascii` is another allowed option for `options.encoding`,
* which converts non-ascii characters into numeric character references.
*
* @param options options to adjust the saving behavior
* @see {@link save}
* @see {@link XmlDocument#toString}
*/
toString(options?: SaveOptions): string {
const saveOptions = options ?? { format: true };
if (saveOptions.encoding && saveOptions.encoding !== 'utf-8' && saveOptions.encoding !== 'ascii') {
throw new XmlError('Only utf-8 or ascii is supported in toString(). For other encodings, use save().');
}
const handler = new XmlStringOutputBufferHandler();
this.save(handler, options);
this.save(handler, saveOptions);

return handler.result;
}
Expand Down
119 changes: 119 additions & 0 deletions test/backend/encoding.spec.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import { expect } from 'chai';
import { XmlDocument, XmlElement, XsdValidator } from '@libxml2-wasm/lib/index.mjs';
import * as fs from 'node:fs/promises';

// Use iso8859-15 input, which has to be a file, and has to use node fs module to read the file
describe('encoding', () => {
let xmlBuffer: Buffer;

before(async () => {
xmlBuffer = await fs.readFile('test/testfiles/iso8859-15.xml');
});

describe('parse', () => {
it('should support non-utf8 encoding', () => {
using doc = XmlDocument.fromBuffer(
xmlBuffer,
{ encoding: 'iso8859-15' },
);
expect(doc.get('asdf/@RT')?.content).to.equal('Müller');
});

it('should use encoding from xml declaration', () => {
using doc = XmlDocument.fromBuffer(xmlBuffer);
expect(doc.get('asdf/@RT')?.content).to.equal('Müller');
});
});

describe('validate', () => {
it('should validate with xsd', () => {
using doc = XmlDocument.fromBuffer(xmlBuffer);
using schema = XmlDocument.fromString(`<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:simpleType name="RTType">
<xsd:restriction base="xsd:string">
<xsd:enumeration value="Müller"/>
</xsd:restriction>
</xsd:simpleType>
<xsd:element name="levelone">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="asdf">
<xsd:complexType>
<xsd:attribute name="RT" type="RTType" use="required"/>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:schema>`);
using validator = XsdValidator.fromDoc(schema);
validator.validate(doc);
});
});

describe('document save', () => {
it('saves to original encoding by default', () => {
using doc = XmlDocument.fromBuffer(xmlBuffer);

const outputBuffer = Buffer.alloc(xmlBuffer.length);
doc.save({
write: (buf: Uint8Array) => { outputBuffer.set(buf); return buf.byteLength; },
close: () => true,
});
expect(outputBuffer).to.deep.equal(xmlBuffer);
});

it('saves to specified encoding', () => {
using doc = XmlDocument.fromBuffer(xmlBuffer);

expect(doc.toString()).to.equal(`\
<?xml version="1.0" encoding="utf-8"?>
<levelone>
<asdf RT="Müller"/>
</levelone>
`);
});

it.skip('saves to specified encoding without format', () => {
using doc = XmlDocument.fromBuffer(xmlBuffer);

expect(doc.toString({ format: false })).to.equal(`\
<?xml version="1.0" encoding="utf-8"?>
<levelone><asdf RT="Müller"/></levelone>
`);
});
});

describe('element save', () => {
it('save to utf-8 by default', () => {
using doc = XmlDocument.fromBuffer(xmlBuffer);

const outputBuffer = Buffer.alloc(xmlBuffer.length);
(doc.get('/levelone/asdf') as XmlElement).save({
write: (buf: Uint8Array) => { outputBuffer.set(buf); return buf.byteLength; },
close: () => true,
});
expect(outputBuffer.indexOf(Buffer.from('RT="Müller"'))).to.above(0);
});

it('save utf-8 xml to other encoding', () => {
using doc = XmlDocument.fromString(`\
<?xml version="1.0" encoding="utf-8"?>
<levelone>
<asdf RT="Müller"/>
</levelone>`);

const outputBuffer = Buffer.alloc(1024);
(doc.get('/levelone/asdf') as XmlElement).save({
write: (buf: Uint8Array) => { outputBuffer.set(buf); return buf.byteLength; },
close: () => true,
}, { encoding: 'iso8859-15' });
const posRT = outputBuffer.indexOf(Buffer.from('RT="M'));
expect(outputBuffer[posRT + 5]).to.equal(0xfc); // ü
expect(outputBuffer[posRT + 6]).to.equal(0x6c); // l
expect(outputBuffer[posRT + 7]).to.equal(0x6c); // l
expect(outputBuffer[posRT + 8]).to.equal(0x65); // e
});
});
});
31 changes: 23 additions & 8 deletions test/crossplatform/document.spec.mts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ describe('XmlDocument', () => {
const newDoc = XmlDocument.create();
newDoc.createRoot('docs');
expect(newDoc.toString()).to.equal(`\
<?xml version="1.0"?>
<?xml version="1.0" encoding="utf-8"?>
<docs/>
`);
});
Expand All @@ -66,7 +66,7 @@ describe('XmlDocument', () => {
const newDoc = XmlDocument.create();
newDoc.createRoot('docs', 'http://example.com');
expect(newDoc.toString()).to.equal(`\
<?xml version="1.0"?>
<?xml version="1.0" encoding="utf-8"?>
<docs xmlns="http://example.com"/>
`);
});
Expand All @@ -75,7 +75,7 @@ describe('XmlDocument', () => {
const newDoc = XmlDocument.create();
newDoc.createRoot('docs', 'http://example.com', 'ex');
expect(newDoc.toString()).to.equal(`\
<?xml version="1.0"?>
<?xml version="1.0" encoding="utf-8"?>
<ex:docs xmlns:ex="http://example.com"/>
`);
});
Expand Down Expand Up @@ -103,9 +103,18 @@ describe('XmlDocument', () => {
});

describe('toString', () => {
it('allows utf-8 or ascii', () => {
expect(() => doc.toString({ encoding: 'utf-8' })).to.not.throw();
expect(() => doc.toString({ encoding: 'ascii' })).to.not.throw();
expect(() => doc.toString({ encoding: 'iso8859-1' })).to.throw(
XmlError,
'Only utf-8 or ascii is supported in toString(). For other encodings, use save().',
);
});

it('formats output by default', () => {
expect(doc.toString()).to.equal(`\
<?xml version="1.0"?>
<?xml version="1.0" encoding="utf-8"?>
<docs>
<doc/>
</docs>
Expand All @@ -114,14 +123,14 @@ describe('XmlDocument', () => {

it('not format when required', () => {
expect(doc.toString({ format: false })).to.equal(`\
<?xml version="1.0"?>
<?xml version="1.0" encoding="utf-8"?>
<docs><doc/></docs>
`);
});

it('can set indent string', () => {
expect(doc.toString({ format: true, indentString: ' ' })).to.equal(`\
<?xml version="1.0"?>
<?xml version="1.0" encoding="utf-8"?>
<docs>
<doc/>
</docs>
Expand All @@ -138,7 +147,7 @@ describe('XmlDocument', () => {

it('can avoid empty tags', () => {
expect(doc.toString({ format: true, noEmptyTags: true })).to.equal(`\
<?xml version="1.0"?>
<?xml version="1.0" encoding="utf-8"?>
<docs>
<doc></doc>
</docs>
Expand All @@ -164,6 +173,12 @@ describe('XmlDocument', () => {
const text = d.toString();
expect(text).to.contain('Jan Sedloň');
});

it('generates numeric character references', () => {
using d = XmlDocument.fromString('<root><name>Jan Sedloň</name></root>');
const text = d.toString({ encoding: 'ascii' });
expect(text).to.contain('Jan Sedlo&#328;');
});
});

describe('processXInclude', () => {
Expand All @@ -173,7 +188,7 @@ describe('XmlDocument', () => {

it('does nothing w/o XInclude nodes', () => {
expect(doc.processXInclude()).to.equal(0);
expect(doc.toString({ format: false })).to.equal('<?xml version="1.0"?>\n<docs><doc/></docs>\n');
expect(doc.toString({ format: false })).to.equal('<?xml version="1.0" encoding="utf-8"?>\n<docs><doc/></docs>\n');
});

it('processes XInclude nodes', () => {
Expand Down
Loading