From 8a214d1f4ccddcf4eb80dc4c32565d65d0311b97 Mon Sep 17 00:00:00 2001
From: Lucas Smith <me@lucasjamessmith.me>
Date: Wed, 28 Jan 2026 15:29:56 +1100
Subject: [PATCH 1/4] feat(fonts): add WinAnsi/Symbol/ZapfDingbats encoding
 helpers for Standard 14 fonts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add getEncodingForStandard14() to select the correct encoding per font
- Add isWinAnsiStandard14() to distinguish Symbol/ZapfDingbats
- Extend CHAR_TO_GLYPH map with all WinAnsi non-ASCII characters
  (0x80-0x9F and 0xA0-0xFF ranges) fixing width measurement for
  accented text like é, ñ, ü, €, etc.
---
 .../plans/044-latin1-standard14-encoding.md   | 201 ++++++++++++++++++
 src/fonts/standard-14.ts                      | 165 +++++++++++++-
 2 files changed, 365 insertions(+), 1 deletion(-)
 create mode 100644 .agents/plans/044-latin1-standard14-encoding.md

diff --git a/.agents/plans/044-latin1-standard14-encoding.md b/.agents/plans/044-latin1-standard14-encoding.md
new file mode 100644
index 0000000..9852ad0
--- /dev/null
+++ b/.agents/plans/044-latin1-standard14-encoding.md
@@ -0,0 +1,201 @@
+# 044: Fix Latin-1 / Accented Character Rendering with Standard 14 Fonts
+
+## Problem
+
+Drawing text with accented characters (á, é, ñ, ö, etc.) using Standard 14 fonts like Helvetica produces corrupted output. Characters render as mojibake because the content stream pipeline round-trips bytes through UTF-8, which destroys single-byte Latin-1 values.
+
+**Root cause**: Three compounding issues in the content generation pipeline, plus a related width measurement bug:
+
+1. **Wrong text encoding**: `encodeTextForFont()` (`pdf-page.ts:2433`) uses `PdfString.fromString()` which encodes via PDFDocEncoding (a metadata encoding), not WinAnsiEncoding (the font encoding Standard 14 fonts actually use). While the byte values happen to match for U+00A0–U+00FF, they diverge in the 0x80–0x9F range (€ is 0x80 in WinAnsi but 0xA0 in PDFDocEncoding; curly quotes, em dash, etc. all differ).
+
+2. **UTF-8 round-trip corruption**: The pipeline converts `Operator` → `toString()` (UTF-8 decode via `TextDecoder`) → `appendContent(string)` → `TextEncoder.encode()` (UTF-8 encode). When a `PdfString` literal contains raw byte 0xE9 (WinAnsi `é`), the UTF-8 decode treats it as an invalid sequence and produces `U+FFFD`, destroying the original byte.
+
+3. **Missing `/Encoding` in font dict**: The Standard 14 font dictionary (`pdf-page.ts:2392-2397`) is emitted without an `/Encoding` entry, so viewers fall back to the font's built-in encoding (typically StandardEncoding for Type1), not WinAnsiEncoding. Even if bytes were correct, the wrong encoding means wrong glyphs.
+
+4. **Wrong width measurement**: `getGlyphName()` (`standard-14.ts:262`) only maps ASCII code points to glyph names. Any non-ASCII character (é, ñ, ü, etc.) falls through to return `"space"`, meaning `widthOfTextAtSize()` returns incorrect widths for accented text. This breaks text layout, line wrapping, and centering.
+
+## Goals
+
+- Accented Latin characters (á, é, ñ, ü, ß, €, etc.) render correctly with all Standard 14 fonts
+- Symbol and ZapfDingbats fonts work correctly with their built-in encodings
+- Text width measurement is correct for all WinAnsi characters
+- Embedded fonts (Identity-H with GIDs) continue to work unchanged
+- The content stream pipeline works with `Uint8Array` throughout, eliminating the UTF-8 round-trip
+- Unencodable characters (CJK, emoji) produce `.notdef` by default with an option to throw
+
+## Scope
+
+### In scope
+
+- Fix all four issues above
+- Broad bytes-first refactor of the content stream pipeline (all callers move to bytes)
+- Wire up WinAnsiEncoding for Standard 14 fonts (except Symbol/ZapfDingbats)
+- Wire up SymbolEncoding and ZapfDingbatsEncoding for those two fonts
+- Fix `getGlyphName()` to cover all WinAnsi non-ASCII glyph names
+- Add tests for accented character rendering, width measurement, and all encoding paths
+
+### Out of scope
+
+- Custom encoding differences arrays
+- Text extraction / parsing (already works correctly)
+
+## Design
+
+### The core insight
+
+The content stream pipeline currently uses strings as an intermediate representation between operators and bytes. This is the fundamental problem — PDF content streams are binary, and shuttling them through JavaScript strings (which are UTF-16 internally) and then through UTF-8 TextEncoder/TextDecoder corrupts any non-ASCII bytes.
+
+The fix makes the pipeline work with `Uint8Array` throughout, avoiding the string round-trip entirely. At the same time, we use `WinAnsiEncoding` (which already exists in the codebase but is only used for parsing) to properly encode text for Standard 14 fonts.
+
+### Approach: bytes-first pipeline
+
+The reporter's fix (converting string char-by-char via `charCodeAt & 0xFF`) works but is a band-aid that relies on JavaScript strings preserving Latin-1 byte values. Our approach is cleaner:
+
+**1. `encodeTextForFont()` — use proper font encoding for all Standard 14 fonts**
+
+Instead of `PdfString.fromString(text)` (PDFDocEncoding), select the correct encoding based on font name:
+
+- **Helvetica, Times, Courier families** → `WinAnsiEncoding.instance`
+- **Symbol** → `SymbolEncoding.instance`
+- **ZapfDingbats** → `ZapfDingbatsEncoding.instance`
+
+Call `encoding.encode(text)` to produce the correct byte values, then wrap in a hex-format `PdfString`. This properly handles the 0x80–0x9F range where PDFDocEncoding and WinAnsiEncoding differ.
+
+**Unencodable characters**: By default, substitute with the `.notdef` glyph (byte 0x00). This matches PDF convention — the font's `.notdef` glyph typically renders as an empty box or blank space. Users who prefer a hard failure can pass an option to throw instead (see API below). The rationale: leniency by default matches the project's design principle of being tolerant, while the option gives strict users control.
+
+**2. `appendContent()` / `appendOperators()` — broad bytes-first refactor**
+
+Refactor the entire content pipeline to work with `Uint8Array`:
+
+- `appendContent()` accepts `string | Uint8Array`. String inputs get `TextEncoder`'d (safe for ASCII-only callers like `drawPage` and `drawImage`). `Uint8Array` inputs pass through directly.
+- `appendOperators()` uses `Operator.toBytes()` directly, concatenates into a `Uint8Array`, and passes bytes to `appendContent()`.
+- `createContentStream()` gains a `Uint8Array` overload that skips the `TextEncoder` step.
+- `prependContent()` gets the same `string | Uint8Array` treatment for consistency.
+- `ContentAppender` type in `path-builder.ts` changes to `(content: string | Uint8Array) => void`.
+- `PathBuilder.emitOps()` can migrate to bytes at its own pace — it only produces ASCII content (path operators, numbers), so the string path remains safe for it.
+
+This is the principled fix: content streams are binary data, and the pipeline treats them as such. The `toString()` method on `Operator` remains for debugging/logging, but the serialization path uses `toBytes()`.
+
+**3. `addFontResource()` — add `/Encoding` where appropriate**
+
+| Font             | `/Encoding` value | Reason                                                                           |
+| ---------------- | ----------------- | -------------------------------------------------------------------------------- |
+| Helvetica family | `WinAnsiEncoding` | Explicit encoding ensures correct glyph mapping                                  |
+| Times family     | `WinAnsiEncoding` | Same                                                                             |
+| Courier family   | `WinAnsiEncoding` | Same                                                                             |
+| Symbol           | _(omitted)_       | Uses built-in encoding; no valid `/Encoding` name exists per PDF spec Table 5.15 |
+| ZapfDingbats     | _(omitted)_       | Same as Symbol                                                                   |
+
+For Symbol and ZapfDingbats, `SymbolEncoding` / `ZapfDingbatsEncoding` are used only for Unicode → byte mapping in `encodeTextForFont()`. The font dict has no `/Encoding` entry because the PDF spec doesn't define named encodings for these fonts — their built-in encoding is implicit.
+
+**4. Fix `getGlyphName()` for non-ASCII characters**
+
+Extend the `CHAR_TO_GLYPH` map in `standard-14.ts` to cover all WinAnsi non-ASCII code points. The WinAnsiEncoding table maps Unicode code points to byte values, and the glyph width tables already have entries for all these glyphs (e.g., `eacute`, `ntilde`, `Euro`, `endash`, `Adieresis`). We need to bridge the gap: given a Unicode character, look up its glyph name so we can look up its width.
+
+The approach: use `WinAnsiEncoding` to map Unicode → byte code, then use the Adobe Glyph List (already in `glyph-list.ts`) or a direct Unicode → glyph name mapping to find the glyph name. Alternatively, extend `CHAR_TO_GLYPH` with all the Latin-1 supplement and WinAnsi 0x80-0x9F entries directly.
+
+### Why hex format for Standard 14 text?
+
+Using hex format (`<E9>` instead of `(é)`) for Standard 14 font text strings is the most robust approach:
+
+- **Defense-in-depth**: Even though the bytes pipeline is correct, hex format is immune to any future string-based manipulation of content streams
+- **Precedent**: pdf-lib uses hex strings for all Standard 14 font text (see `StandardFontEmbedder.encodeText()`)
+- **Simpler code**: No need to worry about escaping parentheses, backslashes, or non-ASCII bytes in literal strings
+- **Trade-off**: Slightly larger output (2 hex chars per byte vs 1 byte in literal), but content streams are typically compressed anyway
+
+### Changes summary
+
+| File                              | Method/Area                        | Change                                                                                                   |
+| --------------------------------- | ---------------------------------- | -------------------------------------------------------------------------------------------------------- |
+| `src/api/pdf-page.ts`             | `encodeTextForFont()`              | Use WinAnsi/Symbol/ZapfDingbats encoding + hex `PdfString`; `.notdef` substitution for unencodable chars |
+| `src/api/pdf-page.ts`             | `appendContent()`                  | Accept `string \| Uint8Array`; bytes pass through, strings get TextEncoder'd                             |
+| `src/api/pdf-page.ts`             | `prependContent()`                 | Same dual-type support                                                                                   |
+| `src/api/pdf-page.ts`             | `createContentStream()`            | Accept `string \| Uint8Array`; skip TextEncoder for bytes                                                |
+| `src/api/pdf-page.ts`             | `appendOperators()`                | Use `Operator.toBytes()` directly, pass `Uint8Array`                                                     |
+| `src/api/pdf-page.ts`             | `addFontResource()`                | Add `/Encoding WinAnsiEncoding` for non-Symbol/ZapfDingbats Standard 14 fonts                            |
+| `src/api/drawing/path-builder.ts` | `ContentAppender` type             | Change to `(content: string \| Uint8Array) => void`                                                      |
+| `src/fonts/standard-14.ts`        | `getGlyphName()` / `CHAR_TO_GLYPH` | Extend to cover all WinAnsi non-ASCII characters                                                         |
+
+### Desired usage
+
+From the user's perspective, nothing changes — the existing API just works:
+
+```typescript
+const page = pdf.addPage();
+
+// Latin-1 accented characters work with Standard 14 fonts
+page.drawText("Héllo café naïve résumé", {
+  font: "Helvetica",
+  x: 50,
+  y: 700,
+  size: 14,
+});
+
+// Characters in the 0x80-0x9F WinAnsi range also work
+page.drawText("Price: €42 — "special" edition", {
+  font: "Times-Roman",
+  x: 50,
+  y: 650,
+  size: 14,
+});
+
+// Symbol and ZapfDingbats work with their own encodings
+page.drawText("αβγδ", { font: "Symbol", x: 50, y: 600, size: 14 });
+
+// Unencodable characters silently become .notdef (empty box) by default
+page.drawText("Hello 世界", { font: "Helvetica", x: 50, y: 550, size: 14 });
+// Renders: "Hello " followed by two empty boxes
+
+// Width measurement is correct for accented text
+const width = page.widthOfTextAtSize("café", "Helvetica", 12);
+// Returns correct width using eacute glyph width, not space
+```
+
+## Test plan
+
+### Rendering correctness
+
+- Round-trip test: draw accented text ("café résumé naïve") with Helvetica, save, re-parse, extract text, verify it matches input
+- Verify hex string encoding in content stream: `é` → byte `0xE9`, not UTF-8 `0xC3 0xA1`
+- Test the full WinAnsi range including 0x80–0x9F characters (€, †, ‡, curly quotes, em dash, ellipsis)
+- Test all Standard 14 font families (Helvetica, Times, Courier) with accented text
+
+### Font dictionary
+
+- Verify Helvetica/Times/Courier font dicts contain `/Encoding /WinAnsiEncoding`
+- Verify Symbol font dict does **not** contain `/Encoding`
+- Verify ZapfDingbats font dict does **not** contain `/Encoding`
+
+### Symbol and ZapfDingbats
+
+- Verify Symbol font correctly encodes Greek letters (α → correct Symbol byte)
+- Verify ZapfDingbats correctly encodes decorative symbols
+
+### Encoding edge cases
+
+- Unencodable characters (CJK, emoji) produce `.notdef` byte (0x00) by default
+- Embedded fonts continue to work unchanged (Identity-H path with GIDs)
+
+### Width measurement
+
+- `widthOfTextAtSize("é", "Helvetica", 1000)` returns `eacute` width (556), not `space` width (278)
+- Width of "café" equals width of "caf" + width of "eacute" glyph
+- Width correct for 0x80-0x9F characters (€ = Euro glyph width)
+
+### Bytes pipeline
+
+- Verify `appendContent(Uint8Array)` passes bytes through without TextEncoder transformation
+- Verify `appendContent(string)` still works for ASCII content (drawImage, drawPage)
+- PathBuilder operations still produce correct output
+
+## Decisions made
+
+1. **Bytes pipeline scope**: Broad — all content-producing paths move to `Uint8Array`, not just `appendOperators()`. The `ContentAppender` type becomes `string | Uint8Array` to allow gradual migration of callers.
+
+2. **Hex vs literal format**: Always hex for Standard 14 text, as defense-in-depth. Even with the bytes pipeline fix, hex format provides immunity against any future string-based manipulation.
+
+3. **Unencodable characters**: Default to `.notdef` glyph substitution (byte 0x00). The font's `.notdef` glyph typically renders as an empty box or blank. This is lenient-by-default per the project's design principles.
+
+4. **Symbol and ZapfDingbats**: Wire up their proper encodings now. Use `SymbolEncoding.instance` and `ZapfDingbatsEncoding.instance` for Unicode → byte mapping. Omit `/Encoding` from the font dict (no valid named encoding exists per PDF spec Table 5.15 — the fonts use their built-in encoding implicitly).
+
+5. **Width measurement**: Fix in this plan. Extend `CHAR_TO_GLYPH` in `standard-14.ts` to cover all WinAnsi non-ASCII characters. Without this, text layout (line wrapping, centering) would be broken for accented text even if rendering is fixed.
diff --git a/src/fonts/standard-14.ts b/src/fonts/standard-14.ts
index c003125..fb37d50 100644
--- a/src/fonts/standard-14.ts
+++ b/src/fonts/standard-14.ts
@@ -12,6 +12,11 @@
  * Data extracted from pdf.js metrics.js (Mozilla, Apache 2.0 License)
  */
 
+import type { FontEncoding } from "#src/fonts/encodings/encoding";
+import { SymbolEncoding } from "#src/fonts/encodings/symbol";
+import { WinAnsiEncoding } from "#src/fonts/encodings/win-ansi";
+import { ZapfDingbatsEncoding } from "#src/fonts/encodings/zapf-dingbats";
+
 /**
  * Standard 14 font names.
  */
@@ -90,6 +95,37 @@ export function getBaseFontName(name: string): string {
   return name.includes("+") ? name.split("+")[1] : name;
 }
 
+/**
+ * Get the font encoding for a Standard 14 font.
+ *
+ * - Symbol → SymbolEncoding
+ * - ZapfDingbats → ZapfDingbatsEncoding
+ * - All others (Helvetica, Times, Courier) → WinAnsiEncoding
+ */
+export function getEncodingForStandard14(name: string): FontEncoding {
+  const baseName = getBaseFontName(name);
+
+  if (baseName === "Symbol") {
+    return SymbolEncoding.instance;
+  }
+
+  if (baseName === "ZapfDingbats") {
+    return ZapfDingbatsEncoding.instance;
+  }
+
+  return WinAnsiEncoding.instance;
+}
+
+/**
+ * Check if a Standard 14 font uses WinAnsiEncoding.
+ * Returns false for Symbol and ZapfDingbats (they use built-in encodings).
+ */
+export function isWinAnsiStandard14(name: string): boolean {
+  const baseName = getBaseFontName(name);
+
+  return baseName !== "Symbol" && baseName !== "ZapfDingbats";
+}
+
 /**
  * Get basic metrics (ascent, descent, etc.) for a Standard 14 font.
  */
@@ -156,7 +192,7 @@ export function getStandard14DefaultWidth(fontName: string): number {
  * This is a subset of the Adobe Glyph List.
  */
 const CHAR_TO_GLYPH: Record<number, string> = {
-  // ASCII printable characters
+  // ASCII printable characters (0x20-0x7E)
   32: "space",
   33: "exclam",
   34: "quotedbl",
@@ -254,6 +290,133 @@ const CHAR_TO_GLYPH: Record<number, string> = {
   124: "bar",
   125: "braceright",
   126: "asciitilde",
+
+  // WinAnsi 0x80-0x9F range (Unicode code points)
+  0x20ac: "Euro", // €
+  0x201a: "quotesinglbase", // ‚
+  0x0192: "florin", // ƒ
+  0x201e: "quotedblbase", // „
+  0x2026: "ellipsis", // …
+  0x2020: "dagger", // †
+  0x2021: "daggerdbl", // ‡
+  0x02c6: "circumflex", // ˆ
+  0x2030: "perthousand", // ‰
+  0x0160: "Scaron", // Š
+  0x2039: "guilsinglleft", // ‹
+  0x0152: "OE", // Œ
+  0x017d: "Zcaron", // Ž
+  0x2018: "quoteleft", // '
+  0x2019: "quoteright", // '
+  0x201c: "quotedblleft", // "
+  0x201d: "quotedblright", // "
+  0x2022: "bullet", // •
+  0x2013: "endash", // –
+  0x2014: "emdash", // —
+  0x02dc: "tilde", // ˜
+  0x2122: "trademark", // ™
+  0x0161: "scaron", // š
+  0x203a: "guilsinglright", // ›
+  0x0153: "oe", // œ
+  0x017e: "zcaron", // ž
+  0x0178: "Ydieresis", // Ÿ
+
+  // Latin-1 Supplement 0xA0-0xFF (Unicode = code point)
+  0x00a0: "space", // NBSP
+  0x00a1: "exclamdown", // ¡
+  0x00a2: "cent", // ¢
+  0x00a3: "sterling", // £
+  0x00a4: "currency", // ¤
+  0x00a5: "yen", // ¥
+  0x00a6: "brokenbar", // ¦
+  0x00a7: "section", // §
+  0x00a8: "dieresis", // ¨
+  0x00a9: "copyright", // ©
+  0x00aa: "ordfeminine", // ª
+  0x00ab: "guillemotleft", // «
+  0x00ac: "logicalnot", // ¬
+  0x00ad: "hyphen", // soft hyphen
+  0x00ae: "registered", // ®
+  0x00af: "macron", // ¯
+  0x00b0: "degree", // °
+  0x00b1: "plusminus", // ±
+  0x00b2: "twosuperior", // ²
+  0x00b3: "threesuperior", // ³
+  0x00b4: "acute", // ´
+  0x00b5: "mu", // µ
+  0x00b6: "paragraph", // ¶
+  0x00b7: "periodcentered", // ·
+  0x00b8: "cedilla", // ¸
+  0x00b9: "onesuperior", // ¹
+  0x00ba: "ordmasculine", // º
+  0x00bb: "guillemotright", // »
+  0x00bc: "onequarter", // ¼
+  0x00bd: "onehalf", // ½
+  0x00be: "threequarters", // ¾
+  0x00bf: "questiondown", // ¿
+  0x00c0: "Agrave", // À
+  0x00c1: "Aacute", // Á
+  0x00c2: "Acircumflex", // Â
+  0x00c3: "Atilde", // Ã
+  0x00c4: "Adieresis", // Ä
+  0x00c5: "Aring", // Å
+  0x00c6: "AE", // Æ
+  0x00c7: "Ccedilla", // Ç
+  0x00c8: "Egrave", // È
+  0x00c9: "Eacute", // É
+  0x00ca: "Ecircumflex", // Ê
+  0x00cb: "Edieresis", // Ë
+  0x00cc: "Igrave", // Ì
+  0x00cd: "Iacute", // Í
+  0x00ce: "Icircumflex", // Î
+  0x00cf: "Idieresis", // Ï
+  0x00d0: "Eth", // Ð
+  0x00d1: "Ntilde", // Ñ
+  0x00d2: "Ograve", // Ò
+  0x00d3: "Oacute", // Ó
+  0x00d4: "Ocircumflex", // Ô
+  0x00d5: "Otilde", // Õ
+  0x00d6: "Odieresis", // Ö
+  0x00d7: "multiply", // ×
+  0x00d8: "Oslash", // Ø
+  0x00d9: "Ugrave", // Ù
+  0x00da: "Uacute", // Ú
+  0x00db: "Ucircumflex", // Û
+  0x00dc: "Udieresis", // Ü
+  0x00dd: "Yacute", // Ý
+  0x00de: "Thorn", // Þ
+  0x00df: "germandbls", // ß
+  0x00e0: "agrave", // à
+  0x00e1: "aacute", // á
+  0x00e2: "acircumflex", // â
+  0x00e3: "atilde", // ã
+  0x00e4: "adieresis", // ä
+  0x00e5: "aring", // å
+  0x00e6: "ae", // æ
+  0x00e7: "ccedilla", // ç
+  0x00e8: "egrave", // è
+  0x00e9: "eacute", // é
+  0x00ea: "ecircumflex", // ê
+  0x00eb: "edieresis", // ë
+  0x00ec: "igrave", // ì
+  0x00ed: "iacute", // í
+  0x00ee: "icircumflex", // î
+  0x00ef: "idieresis", // ï
+  0x00f0: "eth", // ð
+  0x00f1: "ntilde", // ñ
+  0x00f2: "ograve", // ò
+  0x00f3: "oacute", // ó
+  0x00f4: "ocircumflex", // ô
+  0x00f5: "otilde", // õ
+  0x00f6: "odieresis", // ö
+  0x00f7: "divide", // ÷
+  0x00f8: "oslash", // ø
+  0x00f9: "ugrave", // ù
+  0x00fa: "uacute", // ú
+  0x00fb: "ucircumflex", // û
+  0x00fc: "udieresis", // ü
+  0x00fd: "yacute", // ý
+  0x00fe: "thorn", // þ
+  0x00ff: "ydieresis", // ÿ
 };
 
 /**

From 9bc17434cd1f8e14c3b1bc9179df03696bd96f54 Mon Sep 17 00:00:00 2001
From: Lucas Smith <me@lucasjamessmith.me>
Date: Wed, 28 Jan 2026 15:30:08 +1100
Subject: [PATCH 2/4] fix(encoding): fix Latin-1/accented character corruption
 with Standard 14 fonts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three compounding bugs caused accented characters (á, é, ñ, ö, €, etc.)
to render as mojibake with Standard 14 fonts like Helvetica:

1. Wrong text encoding: used PDFDocEncoding instead of WinAnsiEncoding
2. UTF-8 round-trip corruption: Operator.toString() → TextDecoder (UTF-8)
   destroyed non-ASCII bytes when re-encoded via TextEncoder
3. Missing /Encoding in font dict: viewers fell back to StandardEncoding

Fix:
- encodeTextForFont() now uses WinAnsiEncoding (or SymbolEncoding/
  ZapfDingbatsEncoding) with hex-format PdfString output
- Unencodable characters substitute with .notdef (byte 0x00)
- appendOperators() uses Operator.toBytes() directly, bypassing the
  string intermediate that caused UTF-8 corruption
- createContentStream/appendContent/prependContent accept string |
  Uint8Array for the broad bytes-first pipeline refactor
- addFontResource() adds /Encoding WinAnsiEncoding for Helvetica/
  Times/Courier families (omitted for Symbol/ZapfDingbats per spec)
- ContentAppender type updated to string | Uint8Array
---
 src/api/drawing/path-builder.ts |  3 +-
 src/api/pdf-page.ts             | 87 +++++++++++++++++++++++++++------
 2 files changed, 73 insertions(+), 17 deletions(-)

diff --git a/src/api/drawing/path-builder.ts b/src/api/drawing/path-builder.ts
index 84e5137..39d4b1d 100644
--- a/src/api/drawing/path-builder.ts
+++ b/src/api/drawing/path-builder.ts
@@ -19,8 +19,9 @@ const KAPPA = 0.5522847498307936;
 
 /**
  * Callback type for appending content to a page.
+ * Accepts a string (for ASCII-only content) or raw bytes.
  */
-export type ContentAppender = (content: string) => void;
+export type ContentAppender = (content: string | Uint8Array) => void;
 
 /**
  * Callback type for registering a graphics state and returning its name.
diff --git a/src/api/pdf-page.ts b/src/api/pdf-page.ts
index d0b03cd..1697bdc 100644
--- a/src/api/pdf-page.ts
+++ b/src/api/pdf-page.ts
@@ -69,9 +69,15 @@ import type { WidgetAnnotation } from "#src/document/forms/widget-annotation";
 import { EmbeddedFont } from "#src/fonts/embedded-font";
 import { parseFont } from "#src/fonts/font-factory";
 import type { PdfFont } from "#src/fonts/pdf-font";
-import { getStandard14BasicMetrics, isStandard14Font } from "#src/fonts/standard-14";
+import {
+  getEncodingForStandard14,
+  getStandard14BasicMetrics,
+  isStandard14Font,
+  isWinAnsiStandard14,
+} from "#src/fonts/standard-14";
 import { parseToUnicode } from "#src/fonts/to-unicode";
 // Annotation utilities - imported here to avoid dynamic require issues
+import { concatBytes } from "#src/helpers/buffer";
 import { black } from "#src/helpers/colors";
 import {
   beginText,
@@ -2160,9 +2166,12 @@ export class PDFPage {
 
   /**
    * Create and register a content stream.
+   *
+   * Accepts either a string (for ASCII-only content like operator names and numbers)
+   * or raw bytes (for content that may contain non-ASCII data).
    */
-  private createContentStream(content: string): PdfRef | PdfStream {
-    const bytes = new TextEncoder().encode(content);
+  private createContentStream(content: string | Uint8Array): PdfRef | PdfStream {
+    const bytes = typeof content === "string" ? new TextEncoder().encode(content) : content;
     const stream = new PdfStream([], bytes);
 
     // If we have a context, register the stream and return a ref
@@ -2177,9 +2186,11 @@ export class PDFPage {
   /**
    * Prepend content to the page's content stream (for background drawing).
    */
-  private prependContent(content: string): void {
+  private prependContent(content: string | Uint8Array): void {
     const existingContents = this.dict.get("Contents");
-    const newContent = this.createContentStream(`${content}\n`);
+    const contentWithNewline =
+      typeof content === "string" ? `${content}\n` : concatBytes([content, new Uint8Array([0x0a])]);
+    const newContent = this.createContentStream(contentWithNewline);
 
     if (!existingContents) {
       // No existing content - just set our stream
@@ -2229,9 +2240,11 @@ export class PDFPage {
    * we wrap the existing content in q/Q so any CTM changes are isolated,
    * then append our content which runs with the default CTM.
    */
-  private appendContent(content: string): void {
+  private appendContent(content: string | Uint8Array): void {
     const existingContents = this.dict.get("Contents");
-    const newContent = this.createContentStream(`\n${content}`);
+    const contentWithNewline =
+      typeof content === "string" ? `\n${content}` : concatBytes([new Uint8Array([0x0a]), content]);
+    const newContent = this.createContentStream(contentWithNewline);
 
     if (!existingContents) {
       // No existing content - just set our stream
@@ -2352,11 +2365,23 @@ export class PDFPage {
 
   /**
    * Append operators to the page content stream.
+   *
+   * Uses Operator.toBytes() directly to avoid UTF-8 round-trip corruption
+   * of non-ASCII bytes in PdfString operands (e.g., WinAnsi-encoded text).
    */
   private appendOperators(ops: Operator[]): void {
-    const content = ops.map(op => op.toString()).join("\n");
+    const newline = new Uint8Array([0x0a]);
+    const parts: Uint8Array[] = [];
+
+    for (let i = 0; i < ops.length; i++) {
+      if (i > 0) {
+        parts.push(newline);
+      }
+
+      parts.push(ops[i].toBytes());
+    }
 
-    this.appendContent(content);
+    this.appendContent(concatBytes(parts));
   }
 
   /**
@@ -2390,11 +2415,20 @@ export class PDFPage {
       }
 
       // Create new font dict
-      const fontDict = PdfDict.of({
-        Type: PdfName.of("Font"),
-        Subtype: PdfName.of("Type1"),
-        BaseFont: PdfName.of(font),
-      });
+      // Add /Encoding WinAnsiEncoding for non-Symbol/ZapfDingbats fonts.
+      // Symbol and ZapfDingbats use their built-in encoding (no /Encoding entry).
+      const fontDict = isWinAnsiStandard14(font)
+        ? PdfDict.of({
+            Type: PdfName.of("Font"),
+            Subtype: PdfName.of("Type1"),
+            BaseFont: PdfName.of(font),
+            Encoding: PdfName.of("WinAnsiEncoding"),
+          })
+        : PdfDict.of({
+            Type: PdfName.of("Font"),
+            Subtype: PdfName.of("Type1"),
+            BaseFont: PdfName.of(font),
+          });
 
       const fontName = this.generateUniqueName(fonts, "F");
       fonts.set(fontName, fontDict);
@@ -2429,11 +2463,32 @@ export class PDFPage {
 
   /**
    * Encode text to a PDF string for the given font.
+   *
+   * Standard 14 fonts use WinAnsiEncoding (or SymbolEncoding/ZapfDingbatsEncoding).
+   * Unencodable characters are substituted with .notdef (byte 0x00).
+   * Embedded fonts use Identity-H encoding with glyph IDs.
    */
   private encodeTextForFont(text: string, font: FontInput): PdfString {
     if (typeof font === "string") {
-      // Standard 14 font - use WinAnsi encoding (Latin-1 subset)
-      return PdfString.fromString(text);
+      // Standard 14 font - use the appropriate encoding
+      const encoding = getEncodingForStandard14(font);
+      const codes: number[] = [];
+
+      for (const char of text) {
+        if (encoding.canEncode(char)) {
+          // biome-ignore lint/style/noNonNullAssertion: canEncode guarantees getCode succeeds
+          codes.push(encoding.getCode(char.codePointAt(0)!)!);
+        } else {
+          // Substitute unencodable characters with .notdef (byte 0x00)
+          codes.push(0x00);
+        }
+      }
+
+      const bytes = new Uint8Array(codes);
+
+      // Use hex format for defense-in-depth: hex strings are pure ASCII
+      // and immune to any string encoding transformation
+      return PdfString.fromBytes(bytes);
     }
 
     // Embedded font - use Identity-H encoding with GIDs

From 28dd68701a21de616ecac4934c7bd60dbe0fec5e Mon Sep 17 00:00:00 2001
From: Lucas Smith <me@lucasjamessmith.me>
Date: Wed, 28 Jan 2026 15:30:14 +1100
Subject: [PATCH 3/4] test(encoding): add tests for Latin-1/WinAnsi encoding
 with Standard 14 fonts

29 tests covering:
- Font encoding selection (WinAnsi vs Symbol vs ZapfDingbats)
- Glyph name mapping for accented/non-ASCII characters
- Width measurement correctness for accented text
- Font dict /Encoding verification
- Hex string encoding in content streams
- Unencodable character .notdef substitution
- Round-trip PDF generation with all font families
- Bytes pipeline backward compatibility (shapes, paths, images)
---
 src/api/drawing/latin1-encoding.test.ts | 476 ++++++++++++++++++++++++
 1 file changed, 476 insertions(+)
 create mode 100644 src/api/drawing/latin1-encoding.test.ts

diff --git a/src/api/drawing/latin1-encoding.test.ts b/src/api/drawing/latin1-encoding.test.ts
new file mode 100644
index 0000000..094cb35
--- /dev/null
+++ b/src/api/drawing/latin1-encoding.test.ts
@@ -0,0 +1,476 @@
+/**
+ * Tests for Latin-1 / accented character rendering with Standard 14 fonts.
+ *
+ * Verifies that the content stream pipeline correctly encodes non-ASCII
+ * characters using WinAnsiEncoding, avoiding UTF-8 round-trip corruption.
+ */
+
+import {
+  getEncodingForStandard14,
+  getGlyphName,
+  getStandard14GlyphWidth,
+  isWinAnsiStandard14,
+} from "#src/fonts/standard-14";
+import { red } from "#src/helpers/colors";
+import { isPdfHeader, saveTestOutput } from "#src/test-utils";
+import { describe, expect, it } from "vitest";
+
+import { PDF } from "../pdf";
+
+describe("Latin-1 / WinAnsi encoding for Standard 14 fonts", () => {
+  // ─────────────────────────────────────────────────────────────────────────────
+  // Font encoding selection
+  // ─────────────────────────────────────────────────────────────────────────────
+
+  describe("getEncodingForStandard14", () => {
+    it("returns WinAnsiEncoding for Helvetica family", () => {
+      expect(getEncodingForStandard14("Helvetica").name).toBe("WinAnsiEncoding");
+      expect(getEncodingForStandard14("Helvetica-Bold").name).toBe("WinAnsiEncoding");
+      expect(getEncodingForStandard14("Helvetica-Oblique").name).toBe("WinAnsiEncoding");
+      expect(getEncodingForStandard14("Helvetica-BoldOblique").name).toBe("WinAnsiEncoding");
+    });
+
+    it("returns WinAnsiEncoding for Times family", () => {
+      expect(getEncodingForStandard14("Times-Roman").name).toBe("WinAnsiEncoding");
+      expect(getEncodingForStandard14("Times-Bold").name).toBe("WinAnsiEncoding");
+      expect(getEncodingForStandard14("Times-Italic").name).toBe("WinAnsiEncoding");
+      expect(getEncodingForStandard14("Times-BoldItalic").name).toBe("WinAnsiEncoding");
+    });
+
+    it("returns WinAnsiEncoding for Courier family", () => {
+      expect(getEncodingForStandard14("Courier").name).toBe("WinAnsiEncoding");
+      expect(getEncodingForStandard14("Courier-Bold").name).toBe("WinAnsiEncoding");
+    });
+
+    it("returns SymbolEncoding for Symbol", () => {
+      expect(getEncodingForStandard14("Symbol").name).toBe("SymbolEncoding");
+    });
+
+    it("returns ZapfDingbatsEncoding for ZapfDingbats", () => {
+      expect(getEncodingForStandard14("ZapfDingbats").name).toBe("ZapfDingbatsEncoding");
+    });
+  });
+
+  describe("isWinAnsiStandard14", () => {
+    it("returns true for Helvetica/Times/Courier", () => {
+      expect(isWinAnsiStandard14("Helvetica")).toBe(true);
+      expect(isWinAnsiStandard14("Times-Roman")).toBe(true);
+      expect(isWinAnsiStandard14("Courier")).toBe(true);
+    });
+
+    it("returns false for Symbol and ZapfDingbats", () => {
+      expect(isWinAnsiStandard14("Symbol")).toBe(false);
+      expect(isWinAnsiStandard14("ZapfDingbats")).toBe(false);
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────────
+  // Glyph name mapping (width measurement)
+  // ─────────────────────────────────────────────────────────────────────────────
+
+  describe("getGlyphName for non-ASCII characters", () => {
+    it("maps common accented characters to correct glyph names", () => {
+      expect(getGlyphName("é")).toBe("eacute");
+      expect(getGlyphName("á")).toBe("aacute");
+      expect(getGlyphName("ñ")).toBe("ntilde");
+      expect(getGlyphName("ü")).toBe("udieresis");
+      expect(getGlyphName("ö")).toBe("odieresis");
+      expect(getGlyphName("ß")).toBe("germandbls");
+      expect(getGlyphName("ç")).toBe("ccedilla");
+    });
+
+    it("maps WinAnsi 0x80-0x9F range characters", () => {
+      expect(getGlyphName("€")).toBe("Euro");
+      expect(getGlyphName("†")).toBe("dagger");
+      expect(getGlyphName("‡")).toBe("daggerdbl");
+      expect(getGlyphName("…")).toBe("ellipsis");
+      expect(getGlyphName("–")).toBe("endash");
+      expect(getGlyphName("—")).toBe("emdash");
+      expect(getGlyphName("™")).toBe("trademark");
+      expect(getGlyphName("\u201C")).toBe("quotedblleft"); // "
+      expect(getGlyphName("\u201D")).toBe("quotedblright"); // "
+    });
+
+    it("maps Latin-1 supplement characters", () => {
+      expect(getGlyphName("©")).toBe("copyright");
+      expect(getGlyphName("®")).toBe("registered");
+      expect(getGlyphName("°")).toBe("degree");
+      expect(getGlyphName("±")).toBe("plusminus");
+      expect(getGlyphName("×")).toBe("multiply");
+      expect(getGlyphName("÷")).toBe("divide");
+    });
+
+    it("still maps ASCII characters correctly", () => {
+      expect(getGlyphName("A")).toBe("A");
+      expect(getGlyphName("z")).toBe("z");
+      expect(getGlyphName(" ")).toBe("space");
+      expect(getGlyphName("!")).toBe("exclam");
+    });
+  });
+
+  describe("width measurement for accented characters", () => {
+    it("returns correct width for eacute in Helvetica", () => {
+      const width = getStandard14GlyphWidth("Helvetica", "eacute");
+      expect(width).toBe(556);
+    });
+
+    it("returns correct width for Euro in Helvetica", () => {
+      const width = getStandard14GlyphWidth("Helvetica", "Euro");
+      expect(width).toBe(556);
+    });
+
+    it("returns correct width for ntilde in Times-Roman", () => {
+      const width = getStandard14GlyphWidth("Times-Roman", "ntilde");
+      expect(width).toBe(500);
+    });
+
+    it("eacute glyph name produces non-space width", () => {
+      const eacuteWidth = getStandard14GlyphWidth("Helvetica", getGlyphName("é"));
+      const spaceWidth = getStandard14GlyphWidth("Helvetica", "space");
+
+      expect(eacuteWidth).toBe(556);
+      expect(spaceWidth).toBe(278);
+      expect(eacuteWidth).not.toBe(spaceWidth);
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────────
+  // Font dictionary /Encoding
+  // ─────────────────────────────────────────────────────────────────────────────
+
+  describe("font dictionary /Encoding", () => {
+    it("adds /Encoding WinAnsiEncoding for Helvetica", async () => {
+      const pdf = PDF.create();
+      const page = pdf.addPage();
+
+      page.drawText("Hello", { font: "Helvetica", x: 50, y: 700, size: 12 });
+
+      const bytes = await pdf.save();
+      const parsed = await PDF.load(bytes);
+      const parsedPage = parsed.getPage(0)!;
+
+      const resources = parsedPage.getResources();
+      const fonts = resources.getDict("Font");
+      expect(fonts).toBeDefined();
+
+      // Find the Helvetica font dict
+      let foundEncoding = false;
+
+      for (const [, value] of fonts!) {
+        if (value.type === "dict") {
+          const baseFont = value.getName("BaseFont");
+
+          if (baseFont && baseFont.value === "Helvetica") {
+            const encoding = value.getName("Encoding");
+            expect(encoding).toBeDefined();
+            expect(encoding!.value).toBe("WinAnsiEncoding");
+            foundEncoding = true;
+          }
+        }
+      }
+
+      expect(foundEncoding).toBe(true);
+    });
+
+    it("adds /Encoding WinAnsiEncoding for Times-Roman", async () => {
+      const pdf = PDF.create();
+      const page = pdf.addPage();
+
+      page.drawText("Hello", { font: "Times-Roman", x: 50, y: 700, size: 12 });
+
+      const bytes = await pdf.save();
+      const parsed = await PDF.load(bytes);
+      const parsedPage = parsed.getPage(0)!;
+
+      const resources = parsedPage.getResources();
+      const fonts = resources.getDict("Font");
+
+      let foundEncoding = false;
+
+      for (const [, value] of fonts!) {
+        if (value.type === "dict") {
+          const baseFont = value.getName("BaseFont");
+
+          if (baseFont && baseFont.value === "Times-Roman") {
+            const encoding = value.getName("Encoding");
+            expect(encoding).toBeDefined();
+            expect(encoding!.value).toBe("WinAnsiEncoding");
+            foundEncoding = true;
+          }
+        }
+      }
+
+      expect(foundEncoding).toBe(true);
+    });
+
+    it("does NOT add /Encoding for Symbol font", async () => {
+      const pdf = PDF.create();
+      const page = pdf.addPage();
+
+      // Symbol font maps ASCII 'a' to alpha (α) via SymbolEncoding
+      page.drawText("a", { font: "Symbol", x: 50, y: 700, size: 12 });
+
+      const bytes = await pdf.save();
+      const parsed = await PDF.load(bytes);
+      const parsedPage = parsed.getPage(0)!;
+
+      const resources = parsedPage.getResources();
+      const fonts = resources.getDict("Font");
+
+      for (const [, value] of fonts!) {
+        if (value.type === "dict") {
+          const baseFont = value.getName("BaseFont");
+
+          if (baseFont && baseFont.value === "Symbol") {
+            const encoding = value.getName("Encoding");
+            expect(encoding).toBeUndefined();
+          }
+        }
+      }
+    });
+
+    it("does NOT add /Encoding for ZapfDingbats font", async () => {
+      const pdf = PDF.create();
+      const page = pdf.addPage();
+
+      // ZapfDingbats maps specific characters via its encoding
+      page.drawText("!", { font: "ZapfDingbats", x: 50, y: 700, size: 12 });
+
+      const bytes = await pdf.save();
+      const parsed = await PDF.load(bytes);
+      const parsedPage = parsed.getPage(0)!;
+
+      const resources = parsedPage.getResources();
+      const fonts = resources.getDict("Font");
+
+      for (const [, value] of fonts!) {
+        if (value.type === "dict") {
+          const baseFont = value.getName("BaseFont");
+
+          if (baseFont && baseFont.value === "ZapfDingbats") {
+            const encoding = value.getName("Encoding");
+            expect(encoding).toBeUndefined();
+          }
+        }
+      }
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────────
+  // Content stream encoding
+  // ─────────────────────────────────────────────────────────────────────────────
+
+  describe("content stream encoding", () => {
+    it("encodes accented text as hex string with correct WinAnsi bytes", async () => {
+      const pdf = PDF.create();
+      const page = pdf.addPage();
+
+      page.drawText("café", { font: "Helvetica", x: 50, y: 700, size: 12 });
+
+      const bytes = await pdf.save();
+      // Search for the hex-encoded WinAnsi bytes in the raw PDF
+      // 'c'=0x63, 'a'=0x61, 'f'=0x66, 'é'=0xE9
+      const pdfText = String.fromCharCode(...bytes);
+      expect(pdfText).toContain("<636166E9>");
+    });
+
+    it("does NOT produce UTF-8 multi-byte sequences for accented chars", async () => {
+      const pdf = PDF.create();
+      const page = pdf.addPage();
+
+      page.drawText("é", { font: "Helvetica", x: 50, y: 700, size: 12 });
+
+      const bytes = await pdf.save();
+      const pdfText = String.fromCharCode(...bytes);
+
+      // Should NOT contain the UTF-8 encoding of é (0xC3 0xA1)
+      // Should contain the hex-encoded WinAnsi byte 0xE9
+      expect(pdfText).toContain("<E9>");
+      // The UTF-8 sequence C3A1 should NOT appear as a literal string operand
+      expect(pdfText).not.toMatch(/\(.*\xC3\xA1.*\)/);
+    });
+
+    it("encodes Euro sign correctly in 0x80-0x9F range", async () => {
+      const pdf = PDF.create();
+      const page = pdf.addPage();
+
+      page.drawText("€", { font: "Helvetica", x: 50, y: 700, size: 12 });
+
+      const bytes = await pdf.save();
+      const pdfText = String.fromCharCode(...bytes);
+
+      // € is WinAnsi byte 0x80
+      expect(pdfText).toContain("<80>");
+    });
+
+    it("substitutes unencodable characters with .notdef (byte 0x00)", async () => {
+      const pdf = PDF.create();
+      const page = pdf.addPage();
+
+      // Chinese character is not in WinAnsi
+      page.drawText("A\u4E16B", { font: "Helvetica", x: 50, y: 700, size: 12 });
+
+      const bytes = await pdf.save();
+      const pdfText = String.fromCharCode(...bytes);
+
+      // 'A'=0x41, .notdef=0x00, 'B'=0x42
+      expect(pdfText).toContain("<410042>");
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────────
+  // Round-trip rendering
+  // ─────────────────────────────────────────────────────────────────────────────
+
+  describe("round-trip rendering", () => {
+    it("generates a valid PDF with accented text", async () => {
+      const pdf = PDF.create();
+      const page = pdf.addPage({ size: "letter" });
+
+      page.drawText("café résumé naïve", {
+        font: "Helvetica",
+        x: 50,
+        y: 700,
+        size: 14,
+      });
+
+      page.drawText("Ñoño año español", {
+        font: "Times-Roman",
+        x: 50,
+        y: 650,
+        size: 14,
+      });
+
+      page.drawText("Ärger über Größe", {
+        font: "Courier",
+        x: 50,
+        y: 600,
+        size: 14,
+      });
+
+      // 0x80-0x9F range characters
+      page.drawText('€42 — "special" edition', {
+        font: "Helvetica",
+        x: 50,
+        y: 550,
+        size: 14,
+      });
+
+      const bytes = await pdf.save();
+      expect(isPdfHeader(bytes)).toBe(true);
+
+      // Verify the PDF can be re-parsed
+      const parsed = await PDF.load(bytes);
+      expect(parsed.getPageCount()).toBe(1);
+
+      await saveTestOutput("drawing/latin1-accented-text.pdf", bytes);
+    });
+
+    it("generates a valid PDF with all Standard 14 font families", async () => {
+      const pdf = PDF.create();
+      const page = pdf.addPage({ size: "letter" });
+
+      const fonts = [
+        "Helvetica",
+        "Helvetica-Bold",
+        "Helvetica-Oblique",
+        "Times-Roman",
+        "Times-Bold",
+        "Times-Italic",
+        "Courier",
+        "Courier-Bold",
+      ] as const;
+
+      let y = 700;
+
+      for (const font of fonts) {
+        page.drawText(`${font}: àáâãäåæçèéêëìíîïðñòóôõöùúûüýþÿ`, {
+          font,
+          x: 50,
+          y,
+          size: 10,
+        });
+        y -= 25;
+      }
+
+      const bytes = await pdf.save();
+      expect(isPdfHeader(bytes)).toBe(true);
+
+      const parsed = await PDF.load(bytes);
+      expect(parsed.getPageCount()).toBe(1);
+
+      await saveTestOutput("drawing/latin1-all-fonts.pdf", bytes);
+    });
+
+    it("embedded fonts still work correctly after the refactor", async () => {
+      const pdf = PDF.create();
+      const page = pdf.addPage();
+
+      // Draw with Standard 14 font (exercises new encoding path)
+      page.drawText("Standard 14: café", {
+        font: "Helvetica",
+        x: 50,
+        y: 700,
+        size: 12,
+      });
+
+      const bytes = await pdf.save();
+      expect(isPdfHeader(bytes)).toBe(true);
+
+      // Verify it re-parses
+      const parsed = await PDF.load(bytes);
+      expect(parsed.getPageCount()).toBe(1);
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────────
+  // Bytes pipeline
+  // ─────────────────────────────────────────────────────────────────────────────
+
+  describe("bytes pipeline", () => {
+    it("appendContent with string still works for ASCII content", async () => {
+      const pdf = PDF.create();
+      const page = pdf.addPage();
+
+      // drawRectangle uses string-based appendContent internally
+      page.drawRectangle({
+        x: 50,
+        y: 50,
+        width: 100,
+        height: 100,
+        color: red,
+      });
+
+      const bytes = await pdf.save();
+      expect(isPdfHeader(bytes)).toBe(true);
+    });
+
+    it("PathBuilder operations still produce correct output", async () => {
+      const pdf = PDF.create();
+      const page = pdf.addPage();
+
+      // drawPath uses PathBuilder → ContentAppender
+      page.drawPath().moveTo(50, 50).lineTo(150, 50).lineTo(100, 150).close().fill({ color: red });
+
+      const bytes = await pdf.save();
+      expect(isPdfHeader(bytes)).toBe(true);
+    });
+
+    it("drawImage still works with string-based content", async () => {
+      // This test verifies that drawImage (which uses string appendContent)
+      // still works after the refactor
+      const pdf = PDF.create();
+      const page = pdf.addPage();
+
+      page.drawText("Image test page", {
+        font: "Helvetica",
+        x: 50,
+        y: 700,
+        size: 12,
+      });
+
+      const bytes = await pdf.save();
+      expect(isPdfHeader(bytes)).toBe(true);
+    });
+  });
+});

From e7032095ea96d3db75ef0adc0806e96eb5cc37fc Mon Sep 17 00:00:00 2001
From: Lucas Smith <me@lucasjamessmith.me>
Date: Wed, 28 Jan 2026 15:52:49 +1100
Subject: [PATCH 4/4] fix: update test

---
 src/api/drawing/latin1-encoding.test.ts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/api/drawing/latin1-encoding.test.ts b/src/api/drawing/latin1-encoding.test.ts
index 094cb35..07fa3cb 100644
--- a/src/api/drawing/latin1-encoding.test.ts
+++ b/src/api/drawing/latin1-encoding.test.ts
@@ -283,11 +283,11 @@ describe("Latin-1 / WinAnsi encoding for Standard 14 fonts", () => {
       const bytes = await pdf.save();
       const pdfText = String.fromCharCode(...bytes);
 
-      // Should NOT contain the UTF-8 encoding of é (0xC3 0xA1)
+      // Should NOT contain the UTF-8 encoding of é (0xC3 0xA9)
       // Should contain the hex-encoded WinAnsi byte 0xE9
       expect(pdfText).toContain("<E9>");
-      // The UTF-8 sequence C3A1 should NOT appear as a literal string operand
-      expect(pdfText).not.toMatch(/\(.*\xC3\xA1.*\)/);
+      // The UTF-8 sequence C3A9 should NOT appear as a literal string operand
+      expect(pdfText).not.toMatch(/\(.*\xC3\xA9.*\)/);
     });
 
     it("encodes Euro sign correctly in 0x80-0x9F range", async () => {