From 4df1f4c2b807d21515f4f19d3524a53cf504112c Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Tue, 17 Feb 2026 15:42:58 -0500
Subject: [PATCH 01/26] Start boxing character tables

---
 mathics_scanner/data/.gitignore              |   1 +
 mathics_scanner/data/boxing-characters.yml   |  95 ++++++++++++++++
 mathics_scanner/generate/build_box_tables.py | 107 +++++++++++++++++++
 mathics_scanner/generate/build_tables.py     |   2 +
 4 files changed, 205 insertions(+)
 create mode 100644 mathics_scanner/data/boxing-characters.yml
 create mode 100644 mathics_scanner/generate/build_box_tables.py

diff --git a/mathics_scanner/data/.gitignore b/mathics_scanner/data/.gitignore
index 183700b..c433af0 100644
--- a/mathics_scanner/data/.gitignore
+++ b/mathics_scanner/data/.gitignore
@@ -1 +1,2 @@
 /.python-version
+/box-character-tables.json
diff --git a/mathics_scanner/data/boxing-characters.yml b/mathics_scanner/data/boxing-characters.yml
new file mode 100644
index 0000000..1a19cf5
--- /dev/null
+++ b/mathics_scanner/data/boxing-characters.yml
@@ -0,0 +1,95 @@
+# Information about Wolfram Language boxing characters used in the
+# string representation of Boxing Expressions.
+#
+#
+# All of the key names *except* \! and \* are associated with
+# some box operator.
+#
+#
+# Fields
+# ======
+#
+#
+# ASCII (string)
+# --------------
+#
+# The character representation in ASCII.
+#
+# Operators
+# -----------
+#
+# When the string is part of a Boxing operator, the Boxing
+# operator name(s) are given.
+#
+# Unicode
+# -------
+#
+# The representation in Unicode. All Unicode characters fall into the
+# Private Use Area of Unicode that Wolfram uses for its own internal
+# system markers, specifically the range 0xf7c0 to 0xf7cd.
+#
+
+# Coding note: below we use single quotes, not double quotes so that we do
+# not have to escape backslash characters.
+
+LinearSyntaxAmp:
+  ASCII: '\&'
+  Operators: []
+  Unicode: '\uf7c7'
+
+LinearSyntaxAt:
+  ASCII: '\@'
+  Operators: [RadicalBox, SqrtBox]
+  Unicode: '\uf7c1'
+
+LinearSyntaxBacktick:
+  ASCII: '\`'
+  Operators: [FormBox]
+  Unicode: '\uf7cd'
+
+LinearSyntaxBang:
+  ASCII: '\!'
+  Operators:
+  Unicode: '\uf7c1'
+
+LinearSyntaxCaret:
+  ASCII: '\^'
+  Operators: SuperscriptBox
+  Unicode: '\uf7c6'
+
+# Note: this name does not appear in CodeParse
+LinearSyntaxCloseParen:
+  ASCII: '\)'
+  Operators: [RowBox]
+  Unicode: '\uf7c0'
+
+# Note: this name does not appear in CodeParse
+LinearSyntaxOpenParen:
+  ASCII: '\('
+  Operators: [RowBox]
+  Unicode: '\uf7cd'
+
+LinearSyntaxPercent:
+  ASCII: '\%'
+  Operators: [RadicalBox, SuperscriptBox, UnderOverscriptBox]
+  Unicode: '\uf7c5'
+
+LinearSyntaxPlus:
+  ASCII: '\+'
+  Operators: [UnderscriptBox, UnderOverscriptBox]
+  Unicode: '\uf7cb'
+
+LinearSyntaxStar:
+  ASCII: '\*'
+  Operators: []
+  Unicode: '\uf7c8'
+
+LinearSyntaxSlash:
+  ASCII: '\/'
+  Operators: [FractionBox]
+  Unicode: '\uf7cc'
+
+LinearSyntaxUnder:
+  ASCII: '\_'
+  Operators: [SubscriptBox, SubsuperscriptBox]
+  Unicode: '\uf7ca'
diff --git a/mathics_scanner/generate/build_box_tables.py b/mathics_scanner/generate/build_box_tables.py
new file mode 100644
index 0000000..f8533a7
--- /dev/null
+++ b/mathics_scanner/generate/build_box_tables.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+# This scripts reads the data from named-characters and converts it to the
+# format used by the library internally
+
+import json
+import os.path as osp
+import sys
+from pathlib import Path
+
+import click
+import yaml
+
+try:
+    from mathics_scanner.version import __version__
+except ImportError:
+    # When using build isolation
+    __version__ = "unknown"
+
+
+def get_srcdir() -> str:
+    filename = osp.normcase(osp.dirname(osp.abspath(__file__)))
+    return osp.realpath(filename)
+
+
+def read(*rnames) -> str:
+    return open(osp.join(get_srcdir(), *rnames)).read()
+
+
+def compile_tables(data: dict) -> dict:
+    """
+    Compiles the general table into the tables used internally by the library.
+    This facilitates fast access of this information by clients needing this
+    information.
+    """
+
+    # Multiple entries in the YAML table are redundant in the following sense:
+    # when a character has a plain-text equivalent but the plain-text
+    # equivalent is equal to it's WL unicode representation (i.e. the
+    # "wl-unicode" field is the same as the "unicode-equivalent" field) then it
+    # is considered rendundant for us, since no conversion is needed.
+    #
+    # As an optimization, we explicit remove any redundant characters from all
+    # JSON tables. This makes the tables smaller (therefore easier to load), as
+    # well as the correspond regex patterns. This implies that not all
+    # characters that have a unicode equivalent are included in `wl_to_ascii`
+    # or `wl_to_unicode_dict`. Furthermore, this implies that not all
+    # characters that have a unicode inverse are included in
+    # `unicode_to_wl_dict`
+
+    # WL to AMS LaTeX (math mode) characters
+    ascii_to_unicode = {v["ASCII"]: v["Unicode"] for v in data.values()}
+
+    unicode_to_ascii = {v["Unicode"]: v["ASCII"] for v in data.values()}
+
+    return {
+        "ascii-to-unicode": ascii_to_unicode,
+        "unicode-to-ascii": unicode_to_ascii,
+    }
+
+
+DEFAULT_DATA_DIR = Path(osp.normpath(osp.dirname(__file__)), "..", "data")
+
+ALL_FIELDS = [
+    "unicode-to-ascii",
+    "ascii-to-unicode",
+]
+
+
+@click.command()
+@click.version_option(version=__version__)  # NOQA
+@click.option(
+    "--field",
+    "-f",
+    multiple=True,
+    required=False,
+    help="Select which fields to include in JSON.",
+    show_default=True,
+    type=click.Choice(ALL_FIELDS),
+    default=ALL_FIELDS,
+)
+@click.option(
+    "--output",
+    "-o",
+    show_default=True,
+    type=click.Path(writable=True),
+    default=DEFAULT_DATA_DIR / "box-character-tables.json",
+)
+@click.argument(
+    "data_dir", type=click.Path(readable=True), default=DEFAULT_DATA_DIR, required=False
+)
+def main(field, output, data_dir):
+    with (
+        open(data_dir / "boxing-characters.yml", "r", encoding="utf8") as i,
+        open(output, "w") as o,
+    ):
+        # Load the YAML data.
+        data = yaml.load(i, Loader=yaml.FullLoader)
+
+        # Precompile the tables.
+        data = compile_tables(data)
+
+        # Dump the preprocessed dictionaries to disk as JSON.
+        json.dump(data, o)
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/mathics_scanner/generate/build_tables.py b/mathics_scanner/generate/build_tables.py
index 6d1da59..1b2ce10 100755
--- a/mathics_scanner/generate/build_tables.py
+++ b/mathics_scanner/generate/build_tables.py
@@ -249,6 +249,7 @@ def compile_tables(data: dict) -> dict:
         "ascii-operator-to-character-symbol": ascii_operator_to_character_symbol,
         "ascii-operator-to-unicode": ascii_operator_to_unicode,
         "ascii-operator-to-wl-unicode": ascii_operator_to_wl_unicode,
+        "box-characters": box_characters,
         "builtin-constants": builtin_constants,
         "latex-named-characters": latex_named_characters,
         "letterlikes": letterlikes,
@@ -281,6 +282,7 @@ def compile_tables(data: dict) -> dict:
     "ascii-operator-to-symbol",
     "ascii-operator-to-unicode",
     "ascii-operator-to-wl-unicode",
+    "box-characters",
     # "builtin-constants",  # not used yet
     "latex-named-characters",
     "letterlikes",

From f15581ddb55119cc9aac5872326efc7e548f06fc Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Tue, 17 Feb 2026 18:16:44 -0500
Subject: [PATCH 02/26] Lots of build code cleanup

* Rename/regularize JSON and YAML naming
* DRY tests
* remove egg stuff from pyproject.toml
---
 Makefile                                      | 21 ++++++------
 admin-tools/make-JSON-tables.sh               |  8 ++---
 mathics_scanner/characters.py                 |  2 +-
 ...ild_box_tables.py => boxing_characters.py} |  2 +-
 .../{build_tables.py => named_characters.py}  |  4 +--
 ...{build_operator_tables.py => operators.py} |  0
 mathics_scanner/load.py                       |  8 ++---
 pyproject.toml                                |  9 +++--
 setup.py                                      | 33 ++++++++-----------
 test/helper.py                                |  7 ++++
 test/test_ascii.py                            |  8 +----
 test/test_character_table_consistency.py      |  9 ++---
 test/test_general_yaml_sanity.py              |  5 +--
 test/test_has_unicode_inverse_sanity.py       |  8 ++---
 test/test_letterlikes_sanity.py               |  8 +----
 test/test_unicode.py                          |  8 +----
 test/test_urls.py                             |  7 ++--
 test/test_wl_to_ascii.py                      |  5 ++-
 18 files changed, 63 insertions(+), 89 deletions(-)
 rename mathics_scanner/generate/{build_box_tables.py => boxing_characters.py} (98%)
 rename mathics_scanner/generate/{build_tables.py => named_characters.py} (99%)
 rename mathics_scanner/generate/{build_operator_tables.py => operators.py} (100%)
 create mode 100644 test/helper.py

diff --git a/Makefile b/Makefile
index b374254..57e6fa8 100644
--- a/Makefile
+++ b/Makefile
@@ -22,20 +22,21 @@ PIP_INSTALL_OPTS ?=
 #: Default target - same as "develop"
 all: develop
 
-mathics_scanner/data/character-tables.json: mathics_scanner/data/named-characters.yml
-	$(PIP) install -r requirements-dev.txt
-	$(PYTHON) mathics_scanner/generate/build_tables.py
+mathics_scanner/data/boxing-characters.json: mathics_scanner/data/boxing-characters.yml
+	$(PYTHON) mathics_scanner/generate/boxing_characters.py
+
+mathics_scanner/data/named-characters.json: mathics_scanner/data/named-characters.yml
+	$(PYTHON) mathics_scanner/generate/named_characters.py
 
 mathics_scanner/data/operators.json: mathics_scanner/data/operators.yml
-	$(PIP) install -r requirements-dev.txt
-	$(PYTHON) mathics_scanner/generate/build_operator_tables.py
+	$(PYTHON) mathics_scanner/generate/operators.py
 
 #: build everything needed to install
-build: mathics_scanner/data/characters.json mathics_scanner/data/operators.json
+build: mathics_scanner/data/characters.json mathics_scanner/data/named_characters.json mathics_scanner/data/operators.json
 	$(PYTHON) ./setup.py build
 
 #: Set up to run from the source tree
-develop: mathics_scanner/data/character-tables.json mathics_scanner/data/operators.json
+develop: mathics_scanner/data/boxing-characters.json mathics_scanner/data/named-characters.json mathics_scanner/data/operators.json
 	$(PIP) install -e .$(PIP_INSTALL_OPTS)
 
 #: Build distribution
@@ -56,16 +57,16 @@ check: pytest
 test: check
 
 #: Build Sphinx HTML documentation
-doc:  mathics_scanner/data/character-tables.json
+doc:  mathics_scanner/data/named-characters.json
 	make -C docs html
 
 #: Remove derived files
 clean:
 	@find . -name *.pyc -type f -delete; \
-	$(RM) -f mathics_scanner/data/character-tables.json mathics_scanner/data/operators.json || true
+	$(RM) -f mathics_scanner/data/*.json || true
 
 #: Run py.test tests. Use environment variable "o" for pytest options
-pytest: mathics_scanner/data/character-tables.json
+pytest: mathics_scanner/data/named-characters.json
 	$(PYTHON) -m pytest test $o
 
 #: Print to stdout a GNU Readline inputrc without Unicode
diff --git a/admin-tools/make-JSON-tables.sh b/admin-tools/make-JSON-tables.sh
index b020579..9307397 100755
--- a/admin-tools/make-JSON-tables.sh
+++ b/admin-tools/make-JSON-tables.sh
@@ -1,10 +1,10 @@
 #!/bin/bash
-# Create a complete set of tables.
-# This just runs build_tables.py in this distribution
+# Create a complete set of JSON tables.
 bs=${BASH_SOURCE[0]}
 mydir=$(dirname $bs)
 PYTHON=${PYTHON:-python}
 
 cd $mydir/../mathics_scanner/data
-$PYTHON ../generate/build_tables.py -o character-tables.json
-$PYTHON ../generate/build_operator_tables.py -o operators.json
+$PYTHON ../generate/boxing_characters.py -o boxing-characters.json
+$PYTHON ../generate/named_characters.py -o named-characters.json
+$PYTHON ../generate/operators.py -o operators.json
diff --git a/mathics_scanner/characters.py b/mathics_scanner/characters.py
index 057dc9c..64a5556 100644
--- a/mathics_scanner/characters.py
+++ b/mathics_scanner/characters.py
@@ -23,7 +23,7 @@ def get_srcdir() -> str:
 ROOT_DIR = get_srcdir()
 
 # Load the conversion tables from disk
-characters_path = osp.join(ROOT_DIR, "data", "character-tables.json")
+characters_path = osp.join(ROOT_DIR, "data", "named-characters.json")
 if osp.exists(characters_path):
     with open(characters_path, "r") as f:
         _data = ujson.load(f)
diff --git a/mathics_scanner/generate/build_box_tables.py b/mathics_scanner/generate/boxing_characters.py
similarity index 98%
rename from mathics_scanner/generate/build_box_tables.py
rename to mathics_scanner/generate/boxing_characters.py
index f8533a7..935fdf8 100644
--- a/mathics_scanner/generate/build_box_tables.py
+++ b/mathics_scanner/generate/boxing_characters.py
@@ -83,7 +83,7 @@ def compile_tables(data: dict) -> dict:
     "-o",
     show_default=True,
     type=click.Path(writable=True),
-    default=DEFAULT_DATA_DIR / "box-character-tables.json",
+    default=DEFAULT_DATA_DIR / "boxing-characters.json",
 )
 @click.argument(
     "data_dir", type=click.Path(readable=True), default=DEFAULT_DATA_DIR, required=False
diff --git a/mathics_scanner/generate/build_tables.py b/mathics_scanner/generate/named_characters.py
similarity index 99%
rename from mathics_scanner/generate/build_tables.py
rename to mathics_scanner/generate/named_characters.py
index 1b2ce10..65e4f5e 100755
--- a/mathics_scanner/generate/build_tables.py
+++ b/mathics_scanner/generate/named_characters.py
@@ -249,7 +249,6 @@ def compile_tables(data: dict) -> dict:
         "ascii-operator-to-character-symbol": ascii_operator_to_character_symbol,
         "ascii-operator-to-unicode": ascii_operator_to_unicode,
         "ascii-operator-to-wl-unicode": ascii_operator_to_wl_unicode,
-        "box-characters": box_characters,
         "builtin-constants": builtin_constants,
         "latex-named-characters": latex_named_characters,
         "letterlikes": letterlikes,
@@ -282,7 +281,6 @@ def compile_tables(data: dict) -> dict:
     "ascii-operator-to-symbol",
     "ascii-operator-to-unicode",
     "ascii-operator-to-wl-unicode",
-    "box-characters",
     # "builtin-constants",  # not used yet
     "latex-named-characters",
     "letterlikes",
@@ -323,7 +321,7 @@ def compile_tables(data: dict) -> dict:
     "-o",
     show_default=True,
     type=click.Path(writable=True),
-    default=DEFAULT_DATA_DIR / "character-tables.json",
+    default=DEFAULT_DATA_DIR / "named-characters.json",
 )
 @click.argument(
     "data_dir", type=click.Path(readable=True), default=DEFAULT_DATA_DIR, required=False
diff --git a/mathics_scanner/generate/build_operator_tables.py b/mathics_scanner/generate/operators.py
similarity index 100%
rename from mathics_scanner/generate/build_operator_tables.py
rename to mathics_scanner/generate/operators.py
diff --git a/mathics_scanner/load.py b/mathics_scanner/load.py
index 66ef912..1826e79 100644
--- a/mathics_scanner/load.py
+++ b/mathics_scanner/load.py
@@ -4,16 +4,16 @@
 
 import yaml
 
-from mathics_scanner.generate.build_tables import DEFAULT_DATA_DIR
+from mathics_scanner.generate.named_characters import DEFAULT_DATA_DIR
 
 
-def load_mathics_character_yaml():
+def load_mathics3_named_characters_yaml():
     with open(DEFAULT_DATA_DIR / "named-characters.yml", "r") as yaml_file:
         yaml_data = yaml.load(yaml_file, Loader=yaml.FullLoader)
     return yaml_data
 
 
-def load_mathics_character_json():
-    with open(DEFAULT_DATA_DIR / "character-tables.json", "r") as json_file:
+def load_mathics3_named_characters_json():
+    with open(DEFAULT_DATA_DIR / "named-characters.json", "r") as json_file:
         json_data = json.load(json_file)
     return json_data
diff --git a/pyproject.toml b/pyproject.toml
index f78e461..35b58e7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,8 +51,9 @@ full = [
 ]
 
 [project.scripts]
-mathics3-generate-json-table = "mathics_scanner.generate.build_tables:main"
-mathics3-generate-operator-json-table = "mathics_scanner.generate.build_operator_tables:main"
+mathics3-make-boxing-character-json = "mathics_scanner.generate.box_characters:main"
+mathics3-make-named-character-json = "mathics_scanner.generate.named_characters:main"
+mathics3-make-operator-json = "mathics_scanner.generate.operators:main"
 mathics3-tokens = "mathics_scanner.mathics3_tokens:main"
 
 [tool.setuptools]
@@ -64,11 +65,13 @@ packages = [
 
 [tool.setuptools.package-data]
 "mathics_scanner" = [
+    "data/boxing-characters.json",
+    "data/boxing-characters.yml",
+    "data/named-characters.json",
     "data/named-characters.yml",
     "data/operators.yml",
     "data/operators.json",
     "data/*.csv",
-    "data/character-tables.json",  # List this explicitly since it is needed
     "data/*.json",
     "data/ExampleData/*",
 ]
diff --git a/setup.py b/setup.py
index 3b48ace..82c59b7 100644
--- a/setup.py
+++ b/setup.py
@@ -25,39 +25,34 @@
 mathics-users@googlegroups.com and ask for help.
 """
 
+import os
 import os.path as osp
-import subprocess
-import sys
 
 from setuptools import setup
-from setuptools.command.egg_info import egg_info
+from setuptools.command.build_py import build_py as setuptools_build_py
 
 
 def get_srcdir():
-    """Return the directory of the location if this code"""
+    """return the directory of the location if this code"""
     filename = osp.normcase(osp.dirname(osp.abspath(__file__)))
     return osp.realpath(filename)
 
 
-class table_building_egg_info(egg_info):
-    """This runs as part of building an sdist"""
+class build_py(setuptools_build_py):
+    def run(self):
+        for table_type in ("boxing-character", "named-character", "operator"):
+            json_data_file = osp.join("data", f"{table_type}.json")
+            json_path = osp.join("mathics-scanner", json_data_file)
+            if not osp.exists(json_path):
+                os.system(f"mathics3-make-{table_type}-json" " -o {json-path}")
+            self.distribution.package_data["Mathics-Scanner"].append(json_data_file)
+        setuptools_build_py.run(self)
 
-    def finalize_options(self):
-        """Run program to create JSON tables"""
-        build_tables_program = osp.join(
-            get_srcdir(), "mathics_scanner", "generate", "build_tables.py"
-        )
-        print(f"Building JSON tables via {build_tables_program}")
-        result = subprocess.run([sys.executable, build_tables_program], check=False)
-        if result.returncode:
-            raise RuntimeError(
-                f"Running {build_tables_program} exited with code {result.returncode}"
-            )
-        super().finalize_options()
 
+CMDCLASS = {"build_py": build_py}
 
 setup(
-    cmdclass={"egg_info": table_building_egg_info},
+    cmdclass=CMDCLASS,
     # don't pack Mathics in egg because of media files, etc.
     zip_safe=False,
 )
diff --git a/test/helper.py b/test/helper.py
new file mode 100644
index 0000000..307ab5f
--- /dev/null
+++ b/test/helper.py
@@ -0,0 +1,7 @@
+from mathics_scanner.load import (
+    load_mathics3_named_characters_json,
+    load_mathics3_named_characters_yaml,
+)
+
+yaml_data = load_mathics3_named_characters_yaml()
+json_data = load_mathics3_named_characters_json()
diff --git a/test/test_ascii.py b/test/test_ascii.py
index 0eb9c5a..99a4023 100644
--- a/test/test_ascii.py
+++ b/test/test_ascii.py
@@ -1,12 +1,6 @@
 # -*- coding: utf-8 -*-
 
-from mathics_scanner.load import (
-    load_mathics_character_json,
-    load_mathics_character_yaml,
-)
-
-yaml_data = load_mathics_character_yaml()
-json_data = load_mathics_character_json()
+from test.helper import json_data
 
 
 def test_ascii():
diff --git a/test/test_character_table_consistency.py b/test/test_character_table_consistency.py
index 35b8b40..c6c652a 100644
--- a/test/test_character_table_consistency.py
+++ b/test/test_character_table_consistency.py
@@ -1,14 +1,9 @@
 # -*- coding: utf-8 -*-
 
+from test.helper import json_data, yaml_data
+
 from mathics_scanner.characters import replace_unicode_with_wl as unicode_to_wl
 from mathics_scanner.characters import replace_wl_with_plain_text as wl_to_unicode
-from mathics_scanner.load import (
-    load_mathics_character_json,
-    load_mathics_character_yaml,
-)
-
-yaml_data = load_mathics_character_yaml()
-json_data = load_mathics_character_json()
 
 
 def test_ascii_fields_in_json():
diff --git a/test/test_general_yaml_sanity.py b/test/test_general_yaml_sanity.py
index e419877..e7e8940 100644
--- a/test/test_general_yaml_sanity.py
+++ b/test/test_general_yaml_sanity.py
@@ -2,10 +2,7 @@
 
 import re
 import unicodedata
-
-from mathics_scanner.load import load_mathics_character_yaml
-
-yaml_data = load_mathics_character_yaml()
+from test.helper import yaml_data
 
 
 def check_attr_is_invertible(attr: str):
diff --git a/test/test_has_unicode_inverse_sanity.py b/test/test_has_unicode_inverse_sanity.py
index f71a7e2..7949788 100644
--- a/test/test_has_unicode_inverse_sanity.py
+++ b/test/test_has_unicode_inverse_sanity.py
@@ -1,12 +1,12 @@
 # -*- coding: utf-8 -*-
 
 from mathics_scanner.load import (
-    load_mathics_character_yaml,
-    load_mathics_character_json,
+    load_mathics3_named_characters_json,
+    load_mathics3_named_characters_yaml,
 )
 
-yaml_data = load_mathics_character_yaml()
-json_data = load_mathics_character_json()
+yaml_data = load_mathics3_named_characters_yaml()
+json_data = load_mathics3_named_characters_json()
 
 
 def test_has_unicode_inverse_sanity():
diff --git a/test/test_letterlikes_sanity.py b/test/test_letterlikes_sanity.py
index b2dc381..01533a2 100644
--- a/test/test_letterlikes_sanity.py
+++ b/test/test_letterlikes_sanity.py
@@ -1,12 +1,6 @@
 # -*- coding: utf-8 -*-
 
-from mathics_scanner.load import (
-    load_mathics_character_yaml,
-    load_mathics_character_json,
-)
-
-yaml_data = load_mathics_character_yaml()
-json_data = load_mathics_character_json()
+# from test.helper import json_data, yaml_data
 
 
 def test_letterlikes_sanity():
diff --git a/test/test_unicode.py b/test/test_unicode.py
index 43fb296..f4b27a7 100644
--- a/test/test_unicode.py
+++ b/test/test_unicode.py
@@ -1,12 +1,6 @@
 # -*- coding: utf-8 -*-
 
-from mathics_scanner.load import (
-    load_mathics_character_json,
-    load_mathics_character_yaml,
-)
-
-yaml_data = load_mathics_character_yaml()
-json_data = load_mathics_character_json()
+from test.helper import yaml_data
 
 
 def test_has_unicode():
diff --git a/test/test_urls.py b/test/test_urls.py
index 6c4e1a1..a592501 100644
--- a/test/test_urls.py
+++ b/test/test_urls.py
@@ -1,19 +1,16 @@
 # -*- coding: utf-8 -*-
 import os
+from test.helper import yaml_data
 
 # from urllib.error import HTTPError, URLError
 from urllib.request import urlopen
 
 import pytest
 
-from mathics_scanner.load import load_mathics_character_yaml
-
-yaml_data = load_mathics_character_yaml()
-
 
 # This test is slow, so do only on request!
 @pytest.mark.skipif(
-    not os.environ.get("MATHICS_LINT"), reason="Lint checking done only when specified"
+    not os.environ.get("MATHICS3_LINT"), reason="Lint checking done only when specified"
 )
 def test_yaml_urls():
     for k, v in yaml_data.items():
diff --git a/test/test_wl_to_ascii.py b/test/test_wl_to_ascii.py
index b6444d4..f8bf92b 100644
--- a/test/test_wl_to_ascii.py
+++ b/test/test_wl_to_ascii.py
@@ -1,9 +1,8 @@
 # -*- coding: utf-8 -*-
 
-from mathics_scanner.characters import replace_wl_with_plain_text
-from mathics_scanner.load import load_mathics_character_yaml
+from test.helper import yaml_data
 
-yaml_data = load_mathics_character_yaml()
+from mathics_scanner.characters import replace_wl_with_plain_text
 
 
 def wl_to_ascii(wl_input: str) -> str:

From 1a5efa38b60a0c2a53125ce10422829c84842078 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Tue, 17 Feb 2026 19:55:06 -0500
Subject: [PATCH 03/26] Go over CI.

Remove unused code.
---
 .github/workflows/osx.yml                     |  5 ++++-
 .github/workflows/ubuntu.yml                  |  5 ++++-
 .github/workflows/windows.yml                 |  7 ++++---
 mathics_scanner/generate/boxing_characters.py | 15 +--------------
 mathics_scanner/generate/named_characters.py  | 15 +--------------
 mathics_scanner/generate/operators.py         | 18 ++----------------
 6 files changed, 16 insertions(+), 49 deletions(-)

diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml
index c740b23..d2c29f8 100644
--- a/.github/workflows/osx.yml
+++ b/.github/workflows/osx.yml
@@ -30,5 +30,8 @@ jobs:
       run: |
         pip install -r requirements-dev.txt
         pip install -r requirements-full.txt
-        python -m mathics_scanner.generate.build_tables
+        # I don't think I need this anymore:
+        # python -m mathics_scanner.generate.boxing_characters
+        # python -m mathics_scanner.generate.named_characters
+        # python -m mathics_scanner.generate.operators
         make check
diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
index 05d8de2..34e873c 100644
--- a/.github/workflows/ubuntu.yml
+++ b/.github/workflows/ubuntu.yml
@@ -29,5 +29,8 @@ jobs:
       run: |
         pip install -r requirements-dev.txt
         pip install -r requirements-full.txt
-        python -m mathics_scanner.generate.build_tables
+        # Don't think I need this anymore
+        # python -m mathics_scanner.generate.boxing_characters
+        # python -m mathics_scanner.generate.named_characters
+        # python -m mathics_scanner.generate.operators
         make check
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 768802b..8f88834 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -28,8 +28,9 @@ jobs:
         pip install -e .
     - name: Test Mathics3
       run: |
-        # Ideally we should not have to do this.
-        python mathics_scanner/generate/build_tables.py
-        python mathics_scanner/generate/build_operator_tables.py
+        # I don't think I need this anymore:
+        # python -m mathics_scanner.generate.boxing_characters
+        # python -m mathics_scanner.generate.named_characters
+        # python -m mathics_scanner.generate.operators
         pip install -e .[dev,full]
         py.test test
diff --git a/mathics_scanner/generate/boxing_characters.py b/mathics_scanner/generate/boxing_characters.py
index 935fdf8..bdf7660 100644
--- a/mathics_scanner/generate/boxing_characters.py
+++ b/mathics_scanner/generate/boxing_characters.py
@@ -10,20 +10,7 @@
 import click
 import yaml
 
-try:
-    from mathics_scanner.version import __version__
-except ImportError:
-    # When using build isolation
-    __version__ = "unknown"
-
-
-def get_srcdir() -> str:
-    filename = osp.normcase(osp.dirname(osp.abspath(__file__)))
-    return osp.realpath(filename)
-
-
-def read(*rnames) -> str:
-    return open(osp.join(get_srcdir(), *rnames)).read()
+from mathics_scanner.version import __version__
 
 
 def compile_tables(data: dict) -> dict:
diff --git a/mathics_scanner/generate/named_characters.py b/mathics_scanner/generate/named_characters.py
index 65e4f5e..d787f5f 100755
--- a/mathics_scanner/generate/named_characters.py
+++ b/mathics_scanner/generate/named_characters.py
@@ -11,20 +11,7 @@
 import click
 import yaml
 
-try:
-    from mathics_scanner.version import __version__
-except ImportError:
-    # When using build isolation
-    __version__ = "unknown"
-
-
-def get_srcdir() -> str:
-    filename = osp.normcase(osp.dirname(osp.abspath(__file__)))
-    return osp.realpath(filename)
-
-
-def read(*rnames) -> str:
-    return open(osp.join(get_srcdir(), *rnames)).read()
+from mathics_scanner import __version__
 
 
 def re_from_keys(d: dict) -> str:
diff --git a/mathics_scanner/generate/operators.py b/mathics_scanner/generate/operators.py
index ce5fb96..a204875 100755
--- a/mathics_scanner/generate/operators.py
+++ b/mathics_scanner/generate/operators.py
@@ -12,6 +12,8 @@
 import click
 import yaml
 
+from mathics_scanner.version import __version__
+
 OPERATOR_FIELDS = [
     "actual-precedence",
     "Precedence",
@@ -25,22 +27,6 @@
 ]
 
 
-try:
-    from mathics_scanner.version import __version__
-except ImportError:
-    # When using build isolation
-    __version__ = "unknown"
-
-
-def get_srcdir() -> str:
-    filename = osp.normcase(osp.dirname(osp.abspath(__file__)))
-    return osp.realpath(filename)
-
-
-def read(*rnames) -> str:
-    return open(osp.join(get_srcdir(), *rnames)).read()
-
-
 def compile_tables(
     operator_data: Dict[str, dict], character_data: Dict[str, dict]
 ) -> Dict[str, dict]:

From 3f6acb53de768ee8a5fd23996b0858a919938d48 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Tue, 17 Feb 2026 20:26:00 -0500
Subject: [PATCH 04/26] mathics3-tokens - add In/Out number history

---
 mathics_scanner/mathics3_tokens.py | 36 +++++++++++++-----------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/mathics_scanner/mathics3_tokens.py b/mathics_scanner/mathics3_tokens.py
index d37335d..aea94d4 100644
--- a/mathics_scanner/mathics3_tokens.py
+++ b/mathics_scanner/mathics3_tokens.py
@@ -99,29 +99,21 @@ def get_last_line_number(self):
 
     def get_in_prompt(self):
         next_line_number = self.get_last_line_number() + 1
-        self.lineno = next_line_number
-        return "{1}{0}[{2}{3}]:= {4}".format(self.in_prefix, *self.incolors)
+        return "{2}{0}[{3}{1}{4}]:= {5}".format(
+            self.in_prefix, next_line_number, *self.incolors
+        )
 
-    def get_out_prompt(self, form=None):
+    def get_out_prompt(self):
         line_number = self.get_last_line_number()
-        if form:
-            return "{2}{0}[{3}{4}]//{1}= {5}".format(
-                self.out_prefix, line_number, form, *self.outcolors
-            )
-        return "{1}{0}[{2}{3}]= {4}".format(
+        return "{2}{0}[{3}{1}{4}]= {5}".format(
             self.out_prefix, line_number, *self.outcolors
         )
 
-    def to_output(self, text, form=None):
+    def to_output(self, text):
         line_number = self.get_last_line_number()
         newline = "\n" + " " * len("Out[{0}]= ".format(line_number))
-        if form:
-            newline += (len(form) + 2) * " "
         return newline.join(text.splitlines())
 
-    def out_callback(self, out, fmt=None):
-        print(self.to_output(str(out), fmt))
-
     def read_line(self, prompt):
         if self.using_readline:
             return self.rl_read_line(prompt)
@@ -163,7 +155,7 @@ def interactive_eval_loop(shell: TerminalShell, code_tokenize_format: bool):
     while True:
         try:
             source_text = shell.feed()
-            tokens(source_text, code_tokenize_format)
+            tokens(shell, source_text, code_tokenize_format)
         except NamedCharacterSyntaxError:
             shell.errmsg(
                 "Syntax",
@@ -197,11 +189,11 @@ def interactive_eval_loop(shell: TerminalShell, code_tokenize_format: bool):
             print("\n\nGoodbye!\n")
             # raise to pass the error code on, e.g. Quit[1]
             raise
-        finally:
-            shell.reset_lineno()
+        # finally:
+        #     shell.reset_lineno()
 
 
-def tokens(source_text: str, code_tokenize_format: bool):
+def tokens(shell: TerminalShell, source_text: str, code_tokenize_format: bool):
     tokeniser = Tokeniser(
         SingleLineFeeder(source_text, "<Mathics3-tokens>", ContainerKind.STRING)
     )
@@ -217,9 +209,11 @@ def tokens(source_text: str, code_tokenize_format: bool):
         if token.tag == "END":
             break
         elif code_tokenize_format:
-            print(token.code_tokenize_format)
+            shell.to_output(token.code_tokenize_format)
+            # print(token.code_tokenize_format)
         else:
-            print(token)
+            mess = shell.get_out_prompt()
+            print(mess + str(token) + "\n")
 
 
 def main():
@@ -297,7 +291,7 @@ def main():
 
     if args.FILE is not None:
         feeder = FileLineFeeder(args.FILE)
-        tokenizer_loop(feeder, args.CodeTokenize)
+        tokenizer_loop(feeder, shell, args.CodeTokenize)
 
     else:
         interactive_eval_loop(shell, args.CodeTokenize)

From 8430dc8d919653cda9977e53974eedd3d169e1f4 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Tue, 17 Feb 2026 20:26:58 -0500
Subject: [PATCH 05/26] More ignore

---
 mathics_scanner/data/.gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mathics_scanner/data/.gitignore b/mathics_scanner/data/.gitignore
index c433af0..2442410 100644
--- a/mathics_scanner/data/.gitignore
+++ b/mathics_scanner/data/.gitignore
@@ -1,2 +1,4 @@
 /.python-version
 /box-character-tables.json
+/boxing-characters.json
+/named-characters.json

From 0cf50763b65de675efc20b4c5b3b1bcbd910307d Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Wed, 18 Feb 2026 04:55:32 -0500
Subject: [PATCH 06/26] Start translating frum Unicode to ASCII on output

---
 mathics_scanner/characters.py              | 28 ++++++++++++++++++---
 mathics_scanner/data/boxing-characters.yml | 29 +++++++++++-----------
 mathics_scanner/escape_sequences.py        | 14 ++++++++---
 mathics_scanner/mathics3_tokens.py         |  8 ++++--
 4 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/mathics_scanner/characters.py b/mathics_scanner/characters.py
index 64a5556..7418a3c 100644
--- a/mathics_scanner/characters.py
+++ b/mathics_scanner/characters.py
@@ -23,13 +23,35 @@ def get_srcdir() -> str:
 ROOT_DIR = get_srcdir()
 
 # Load the conversion tables from disk
-characters_path = osp.join(ROOT_DIR, "data", "named-characters.json")
-if osp.exists(characters_path):
-    with open(characters_path, "r") as f:
+named_characters_path = osp.join(ROOT_DIR, "data", "named-characters.json")
+if osp.exists(named_characters_path):
+    with open(named_characters_path, "r") as f:
         _data = ujson.load(f)
 else:
     _data = {}
 
+boxing_characters_path = osp.join(ROOT_DIR, "data", "boxing-characters.json")
+if osp.exists(boxing_characters_path):
+    with open(boxing_characters_path, "r") as f:
+        boxing_character_data = ujson.load(f)
+else:
+    boxing_characters_data = {}
+
+boxing_unicode_to_ascii = boxing_character_data.get("unicode-to-ascii", {})
+boxing_ascii_to_unicode = boxing_character_data.get("ascii-to-unicode", {})
+
+replace_to_ascii_re = re.compile(
+    "|".join(
+        re.escape(unicode_character)
+        for unicode_character in boxing_unicode_to_ascii.keys()
+    )
+)
+
+
+def replace_box_unicode_with_ascii(s: str) -> str:
+    return replace_to_ascii_re.sub(lambda m: s[m.group(0)], s)
+
+
 # Character ranges of letters
 _letters = "a-zA-Z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u0103\u0106\u0107\
 \u010c-\u010f\u0112-\u0115\u011a-\u012d\u0131\u0141\u0142\u0147\u0148\
diff --git a/mathics_scanner/data/boxing-characters.yml b/mathics_scanner/data/boxing-characters.yml
index 1a19cf5..eb45ea8 100644
--- a/mathics_scanner/data/boxing-characters.yml
+++ b/mathics_scanner/data/boxing-characters.yml
@@ -29,67 +29,68 @@
 # system markers, specifically the range 0xf7c0 to 0xf7cd.
 #
 
-# Coding note: below we use single quotes, not double quotes so that we do
-# not have to escape backslash characters.
+# Coding note: make note of quotes. Single quotes for unescaped backslashes, e.g.
+# in the ASCII field and double quotes when we do not escaped backlashes in the
+# Unicode field.
 
 LinearSyntaxAmp:
   ASCII: '\&'
   Operators: []
-  Unicode: '\uf7c7'
+  Unicode: "\uf7c7"
 
 LinearSyntaxAt:
   ASCII: '\@'
   Operators: [RadicalBox, SqrtBox]
-  Unicode: '\uf7c1'
+  Unicode: "\uf7c1"
 
 LinearSyntaxBacktick:
   ASCII: '\`'
   Operators: [FormBox]
-  Unicode: '\uf7cd'
+  Unicode: "\uf7cd"
 
 LinearSyntaxBang:
   ASCII: '\!'
   Operators:
-  Unicode: '\uf7c1'
+  Unicode: "\uf7c1"
 
 LinearSyntaxCaret:
   ASCII: '\^'
   Operators: SuperscriptBox
-  Unicode: '\uf7c6'
+  Unicode: "\uf7c6"
 
 # Note: this name does not appear in CodeParse
 LinearSyntaxCloseParen:
   ASCII: '\)'
   Operators: [RowBox]
-  Unicode: '\uf7c0'
+  Unicode: "\uf7c0"
 
 # Note: this name does not appear in CodeParse
 LinearSyntaxOpenParen:
   ASCII: '\('
   Operators: [RowBox]
-  Unicode: '\uf7cd'
+  Unicode: "\uf7cd"
 
 LinearSyntaxPercent:
   ASCII: '\%'
   Operators: [RadicalBox, SuperscriptBox, UnderOverscriptBox]
-  Unicode: '\uf7c5'
+  Unicode: "\uf7c5"
 
 LinearSyntaxPlus:
   ASCII: '\+'
   Operators: [UnderscriptBox, UnderOverscriptBox]
-  Unicode: '\uf7cb'
+  Unicode: "\uf7cb"
 
 LinearSyntaxStar:
   ASCII: '\*'
   Operators: []
-  Unicode: '\uf7c8'
+  Unicode: "\uf7c8"
 
 LinearSyntaxSlash:
   ASCII: '\/'
   Operators: [FractionBox]
-  Unicode: '\uf7cc'
+  Unicode: "\uf7cc"
 
 LinearSyntaxUnder:
   ASCII: '\_'
   Operators: [SubscriptBox, SubsuperscriptBox]
-  Unicode: '\uf7ca'
+  Unicode: "\uf7ca"
diff --git a/mathics_scanner/escape_sequences.py b/mathics_scanner/escape_sequences.py
index 27d8ad2..ae6cb43 100644
--- a/mathics_scanner/escape_sequences.py
+++ b/mathics_scanner/escape_sequences.py
@@ -2,15 +2,17 @@
 Helper Module for tokenizing character escape sequences.
 """
 
-from typing import Optional, Tuple
+from typing import Final, Optional, Tuple
 
-from mathics_scanner.characters import named_characters
+from mathics_scanner.characters import boxing_ascii_to_unicode, named_characters
 from mathics_scanner.errors import (
     EscapeSyntaxError,
     NamedCharacterSyntaxError,
     SyntaxError,
 )
 
+BOX_OPERATOR: Final[str] = "&@`!^)(%*/_"
+
 
 def parse_base(source_text: str, start_shift: int, end_shift: int, base: int) -> str:
     r"""
@@ -140,8 +142,12 @@ def parse_escape_sequence(source_text: str, pos: int) -> Tuple[str, int]:
             assert c == "r"
             result += "\r"
         pos += 1
-    elif c in '!"':
-        result += c
+    elif c in BOX_OPERATOR:
+        if (boxed_character := boxing_ascii_to_unicode.get("\\" + c)) is not None:
+            # Replace \ in result with Unicode representing the two ASCII characters.
+            result = result[:-1] + boxed_character
+        else:
+            raise EscapeSyntaxError("stresc", rf"\{c}")
         pos += 1
     else:
         raise EscapeSyntaxError("stresc", rf"\{c}")
diff --git a/mathics_scanner/mathics3_tokens.py b/mathics_scanner/mathics3_tokens.py
index aea94d4..aef026d 100644
--- a/mathics_scanner/mathics3_tokens.py
+++ b/mathics_scanner/mathics3_tokens.py
@@ -8,6 +8,7 @@
 import re
 import sys
 
+from mathics_scanner.characters import replace_box_unicode_with_ascii
 from mathics_scanner.errors import (
     EscapeSyntaxError,
     NamedCharacterSyntaxError,
@@ -209,10 +210,13 @@ def tokens(shell: TerminalShell, source_text: str, code_tokenize_format: bool):
         if token.tag == "END":
             break
         elif code_tokenize_format:
-            shell.to_output(token.code_tokenize_format)
-            # print(token.code_tokenize_format)
+            mess = shell.get_out_prompt()
+            print(
+                mess + replace_box_unicode_with_ascii(token.code_tokenize_format) + "\n"
+            )
         else:
             mess = shell.get_out_prompt()
+            token.text = replace_box_unicode_with_ascii(token.text)
             print(mess + str(token) + "\n")
 
 

From 964e87f2185f523bd74669c738d06d47a14a54d3 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Wed, 18 Feb 2026 05:44:53 -0500
Subject: [PATCH 07/26] Code cleanup.

---
 mathics_scanner/__init__.py          |  4 +-
 mathics_scanner/characters.py        | 92 +++++++++++++++++-----------
 mathics_scanner/escape_sequences.py  | 21 +++++--
 mathics_scanner/tokeniser.py         | 76 +++++++++++------------
 test/test_string_tokens.py           |  3 +-
 test/test_translation_regressions.py |  4 +-
 6 files changed, 115 insertions(+), 85 deletions(-)

diff --git a/mathics_scanner/__init__.py b/mathics_scanner/__init__.py
index ecfec27..bbd49f8 100644
--- a/mathics_scanner/__init__.py
+++ b/mathics_scanner/__init__.py
@@ -7,8 +7,8 @@
 """
 
 from mathics_scanner.characters import (
+    NAMED_CHARACTERS,
     aliased_characters,
-    named_characters,
     replace_unicode_with_wl,
     replace_wl_with_plain_text,
 )
@@ -34,6 +34,7 @@
     "InvalidSyntaxError",
     "LineFeeder",
     "MultiLineFeeder",
+    "NAMED_CHARACTERS",
     "SyntaxError",
     "SingleLineFeeder",
     # "Token",
@@ -41,7 +42,6 @@
     "__version__",
     "aliased_characters",
     # "is_symbol_name",
-    "named_characters",
     "replace_unicode_with_wl",
     "replace_wl_with_plain_text",
 ]
diff --git a/mathics_scanner/characters.py b/mathics_scanner/characters.py
index 7418a3c..64f2803 100644
--- a/mathics_scanner/characters.py
+++ b/mathics_scanner/characters.py
@@ -1,13 +1,16 @@
 # -*- coding: utf-8 -*-
-"""
-The ``mathics_scanner.characters`` module consists mostly of translation tables
-between Wolfram's internal representation of `named characters
+"""This module consists mostly of translation tables between Wolfram's
+internal representation of `named characters
 <https://reference.wolfram.com/language/tutorial/InputAndOutputInNotebooks.html#4718>`_
 and Unicode/ASCII.
+
+It also contains Unicode translation tables for the syntax used in
+Boxing operators and Boxing expressions.
 """
 
 import os.path as osp
 import re
+from typing import Dict, Final
 
 try:
     import ujson
@@ -16,44 +19,56 @@
 
 
 def get_srcdir() -> str:
-    filename = osp.normcase(osp.dirname(osp.abspath(__file__)))
-    return osp.realpath(filename)
+    """Return the OS normalized real directory path for where this
+    code currently resides on disk."""
+    directory_path = osp.normcase(osp.dirname(osp.abspath(__file__)))
+    return osp.realpath(directory_path)
+
 
+ROOT_DIR: Final[str] = get_srcdir()
 
-ROOT_DIR = get_srcdir()
 
 # Load the conversion tables from disk
-named_characters_path = osp.join(ROOT_DIR, "data", "named-characters.json")
-if osp.exists(named_characters_path):
-    with open(named_characters_path, "r") as f:
-        _data = ujson.load(f)
+
+NAMED_CHARACTERS_PATH: Final[str] = osp.join(ROOT_DIR, "data", "named-characters.json")
+if osp.exists(NAMED_CHARACTERS_PATH):
+    with open(NAMED_CHARACTERS_PATH, "r") as f:
+        NAMED_CHARACTERS_COLLECTION = ujson.load(f)
 else:
-    _data = {}
+    NAMED_CHARACTERS_COLLECTION = {}
+
+BOXING_CHARACTERS_PATH: Final[str] = osp.join(
+    ROOT_DIR, "data", "boxing-characters.json"
+)
 
-boxing_characters_path = osp.join(ROOT_DIR, "data", "boxing-characters.json")
-if osp.exists(boxing_characters_path):
-    with open(boxing_characters_path, "r") as f:
+if osp.exists(BOXING_CHARACTERS_PATH):
+    with open(BOXING_CHARACTERS_PATH, "r") as f:
         boxing_character_data = ujson.load(f)
 else:
     boxing_characters_data = {}
 
-boxing_unicode_to_ascii = boxing_character_data.get("unicode-to-ascii", {})
-boxing_ascii_to_unicode = boxing_character_data.get("ascii-to-unicode", {})
+BOXING_UNICODE_TO_ASCII: Final[Dict[str, str]] = boxing_character_data.get(
+    "unicode-to-ascii", {}
+)
+BOXING_ASCII_TO_UNICODE: Final[Dict[str, str]] = boxing_character_data.get(
+    "ascii-to-unicode", {}
+)
 
 replace_to_ascii_re = re.compile(
     "|".join(
         re.escape(unicode_character)
-        for unicode_character in boxing_unicode_to_ascii.keys()
+        for unicode_character in BOXING_UNICODE_TO_ASCII.keys()
     )
 )
 
 
-def replace_box_unicode_with_ascii(s: str) -> str:
-    return replace_to_ascii_re.sub(lambda m: s[m.group(0)], s)
+def replace_box_unicode_with_ascii(input_string):
+    return "".join(BOXING_UNICODE_TO_ASCII.get(char, char) for char in input_string)
 
 
 # Character ranges of letters
-_letters = "a-zA-Z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u0103\u0106\u0107\
+_letters: Final[str] = (
+    "a-zA-Z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u0103\u0106\u0107\
 \u010c-\u010f\u0112-\u0115\u011a-\u012d\u0131\u0141\u0142\u0147\u0148\
 \u0150-\u0153\u0158-\u0161\u0164\u0165\u016e-\u0171\u017d\u017e\
 \u0391-\u03a1\u03a3-\u03a9\u03b1-\u03c9\u03d1\u03d2\u03d5\u03d6\
@@ -62,37 +77,44 @@ def replace_box_unicode_with_ascii(s: str) -> str:
 \uf6ba-\uf6bc\uf6be\uf6bf\uf6c1-\uf700\uf730\uf731\uf770\uf772\uf773\
 \uf776\uf779\uf77a\uf77d-\uf780\uf782-\uf78b\uf78d-\uf78f\uf790\
 \uf793-\uf79a\uf79c-\uf7a2\uf7a4-\uf7bd\uf800-\uf833\ufb01\ufb02"
+)
 
 # Character ranges of letterlikes
-_letterlikes = _data.get("letterlikes", {})
+_letterlikes: Final[Dict[str, str]] = NAMED_CHARACTERS_COLLECTION.get("letterlikes", {})
 
 # Conversion from WL to the fully qualified names
-_wl_to_ascii = _data.get("wl-to-ascii-dict", {})
-_wl_to_ascii_re = re.compile(_data.get("wl-to-ascii-re", ""))
+_wl_to_ascii: Final[Dict[str, str]] = NAMED_CHARACTERS_COLLECTION.get(
+    "wl-to-ascii-dict", {}
+)
+_wl_to_ascii_re = re.compile(NAMED_CHARACTERS_COLLECTION.get("wl-to-ascii-re", ""))
 
 # AMS LaTeX replacements
-_wl_to_amstex = _data.get("wl-to-amstex", None)
+_wl_to_amstex = NAMED_CHARACTERS_COLLECTION.get("wl-to-amstex", None)
 
 # Conversion from WL to unicode
-_wl_to_unicode = _data.get("wl-to-unicode-dict", _data.get("wl_to_ascii"))
-_wl_to_unicode_re = re.compile(_data.get("wl-to-unicode-re", ""))
+_wl_to_unicode = NAMED_CHARACTERS_COLLECTION.get(
+    "wl-to-unicode-dict", NAMED_CHARACTERS_COLLECTION.get("wl_to_ascii")
+)
+_wl_to_unicode_re = re.compile(NAMED_CHARACTERS_COLLECTION.get("wl-to-unicode-re", ""))
 
 # Conversion from unicode to WL
-_unicode_to_wl = _data.get("unicode-to-wl-dict", {})
-_unicode_to_wl_re = re.compile(_data.get("unicode-to-wl-re", ""))
+_unicode_to_wl = NAMED_CHARACTERS_COLLECTION.get("unicode-to-wl-dict", {})
+_unicode_to_wl_re = re.compile(NAMED_CHARACTERS_COLLECTION.get("unicode-to-wl-re", ""))
 
 # All supported named characters
-named_characters = _data.get("named-characters", {})
+NAMED_CHARACTERS: Final[Dict[str, str]] = NAMED_CHARACTERS_COLLECTION.get(
+    "named-characters", {}
+)
 
 # ESC sequence aliases
-aliased_characters = _data.get("aliased-characters", {})
+aliased_characters = NAMED_CHARACTERS_COLLECTION.get("aliased-characters", {})
 
 
 # Deprecated
 def replace_wl_with_plain_text(wl_input: str, use_unicode=True) -> str:
     """
     The Wolfram Language uses specific Unicode characters to represent Wolfram
-    Language named characters. This functions replaces all occurrences of such
+    Language named characters. This function replaces all occurrences of such
     characters with their corresponding Unicode/ASCII equivalents.
 
     :param wl_input: The string whose characters will be replaced.
@@ -101,7 +123,7 @@ def replace_wl_with_plain_text(wl_input: str, use_unicode=True) -> str:
 
     Note that the occurrences of named characters in ``wl_input`` are expect to
     be represented by Wolfram's internal scheme. For more information Wolfram's
-    representation scheme and on our own conversion scheme please see `Listing
+    representation scheme and on our own conversion scheme, please see `Listing
     of Named Characters
     <https://reference.wolfram.com/language/guide/ListingOfNamedCharacters.html>`_
     and ``implementation.rst`` respectively.
@@ -109,7 +131,7 @@ def replace_wl_with_plain_text(wl_input: str, use_unicode=True) -> str:
     r = _wl_to_unicode_re if use_unicode else _wl_to_ascii_re
     d = _wl_to_unicode if use_unicode else _wl_to_ascii
 
-    # The below on when use_unicode is False will sometime test on "ascii" twice.
+    # The below, when use_unicode is False, will sometimes test on "ascii" twice.
     # But this routine should be deprecated.
     return r.sub(lambda m: d.get(m.group(0), _wl_to_ascii.get(m.group(0))), wl_input)
 
@@ -118,7 +140,7 @@ def replace_wl_with_plain_text(wl_input: str, use_unicode=True) -> str:
 def replace_unicode_with_wl(unicode_input: str) -> str:
     """
     The Wolfram Language uses specific Unicode characters to represent Wolfram
-    Language named characters. This functions replaces all occurrences of the
+    Language named characters. This function replaces all occurrences of the
     corresponding Unicode equivalents of such characters with the characters
     themselves.
 
@@ -127,7 +149,7 @@ def replace_unicode_with_wl(unicode_input: str) -> str:
     Note that the occurrences of named characters in the output of
     ``replace_unicode_with_wl`` are represented using Wolfram's internal
     scheme. For more information Wolfram's representation scheme and on our own
-    conversion scheme please see `Listing of Named Characters
+    conversion scheme, please see `Listing of Named Characters
     <https://reference.wolfram.com/language/guide/ListingOfNamedCharacters.html>`_
     and ``implementation.rst`` respectively.
     """
diff --git a/mathics_scanner/escape_sequences.py b/mathics_scanner/escape_sequences.py
index ae6cb43..1bb4638 100644
--- a/mathics_scanner/escape_sequences.py
+++ b/mathics_scanner/escape_sequences.py
@@ -4,15 +4,24 @@
 
 from typing import Final, Optional, Tuple
 
-from mathics_scanner.characters import boxing_ascii_to_unicode, named_characters
+from mathics_scanner.characters import BOXING_ASCII_TO_UNICODE, NAMED_CHARACTERS
 from mathics_scanner.errors import (
     EscapeSyntaxError,
     NamedCharacterSyntaxError,
     SyntaxError,
 )
 
+# The second character, or character after backslash ("\") using
+# Boxing expression syntax.
 BOX_OPERATOR: Final[str] = "&@`!^)(%*/_"
 
+# The second character, or character after backslash ("\") that
+# are valid in a Mathics3 escaped character.
+ESCAPE_CODES: Final[str] = "ntbfr $\n"
+
+# Valid digits in an Octal string
+OCTAL_DIGITS: Final[str] = "01234567"
+
 
 def parse_base(source_text: str, start_shift: int, end_shift: int, base: int) -> str:
     r"""
@@ -57,13 +66,13 @@ def parse_named_character(source_text: str, start: int, finish: int) -> Optional
 
     Match this string with the known named characters,
     e.g. "Theta".  If we can match this, then we return the unicode equivalent from the
-    `named_characters` map (which is read in from JSON but stored in a YAML file).
+    `NAMED_CHARACTERS` map (which is read in from JSON but stored in a YAML file).
 
     If we can't find the named character, raise NamedCharacterSyntaxError.
     """
     named_character = source_text[start:finish]
     if named_character.isalpha():
-        char = named_characters.get(named_character)
+        char = NAMED_CHARACTERS.get(named_character)
         if char is None:
             raise NamedCharacterSyntaxError("sntufn", named_character, source_text)
         else:
@@ -114,7 +123,7 @@ def parse_escape_sequence(source_text: str, pos: int) -> Tuple[str, int]:
 
         result += named_character
         pos = i + 1
-    elif c in "01234567":
+    elif c in OCTAL_DIGITS:
         # See if we have a 3-digit octal number.
         # For example \065 = "5"
         result += parse_base(source_text, pos, pos + 3, 8)
@@ -124,7 +133,7 @@ def parse_escape_sequence(source_text: str, pos: int) -> Tuple[str, int]:
     # Note that these are a similer to Python, but are different.
     # In particular, Python defines "\a" to be ^G (control G),
     # but in WMA, this is invalid.
-    elif c in "ntbfr $\n":
+    elif c in ESCAPE_CODES:
         if c in "n\n":
             result += "\n"
         elif c == " ":
@@ -143,7 +152,7 @@ def parse_escape_sequence(source_text: str, pos: int) -> Tuple[str, int]:
             result += "\r"
         pos += 1
     elif c in BOX_OPERATOR:
-        if (boxed_character := boxing_ascii_to_unicode.get("\\" + c)) is not None:
+        if (boxed_character := BOXING_ASCII_TO_UNICODE.get("\\" + c)) is not None:
             # Replace \ in result with Unicode representing the two ASCII characters.
             result = result[:-1] + boxed_character
         else:
diff --git a/mathics_scanner/tokeniser.py b/mathics_scanner/tokeniser.py
index 8034575..c3e3714 100644
--- a/mathics_scanner/tokeniser.py
+++ b/mathics_scanner/tokeniser.py
@@ -9,9 +9,9 @@
 import os.path as osp
 import re
 import string
-from typing import Dict, List, Optional, Set, Tuple
+from typing import Dict, Final, List, Optional, Set, Tuple
 
-from mathics_scanner.characters import _letterlikes, _letters, named_characters
+from mathics_scanner.characters import NAMED_CHARACTERS, _letterlikes, _letters
 from mathics_scanner.errors import (
     EscapeSyntaxError,
     IncompleteSyntaxError,
@@ -30,10 +30,10 @@
 ROOT_DIR = osp.dirname(__file__)
 OPERATORS_TABLE_PATH = osp.join(ROOT_DIR, "data", "operators.json")
 
-##############################################
+################################################
 # The below get initialized in by init_module()
 # from operator data
-##############################################
+################################################
 OPERATOR_DATA = {}
 NO_MEANING_OPERATORS = {}
 
@@ -41,7 +41,7 @@
 # This is used in t_String for escape-sequence handling.
 # The below is roughly correct, but we overwrite this
 # from operators.json data in init_module()
-BOXING_CONSTRUCT_SUFFIXES: Set[str] = {
+BOXING_CONSTRUCT_SUFFIXES: Final[Set[str]] = {
     "%",
     "/",
     "@",
@@ -120,7 +120,7 @@
 # FIXME incorportate the below table in to Function/Operators YAML
 # Table of correspondneces between a Mathics3 token name (or "tag")
 # and WMA CodeTokenize name
-MATHICS3_TAG_TO_CODETOKENIZE: Dict[str, str] = {
+MATHICS3_TAG_TO_CODETOKENIZE: Final[Dict[str, str]] = {
     "AddTo": "PlusEqual",
     "Alternatives": "Bar",
     "And": "AmpAmp",
@@ -263,59 +263,59 @@ def init_module():
         #
         ("AddTo", r" \+\= "),
         ("Alternatives", r" \| "),
-        ("And", rf" (\&\&) | {named_characters['And']} "),
+        ("And", rf" (\&\&) | {NAMED_CHARACTERS['And']} "),
         ("Apply", r" \@\@ "),
         ("ApplyList", r" \@\@\@ "),
         ("Composition", r" \@\* "),
         ("Condition", r" \/\; "),
-        ("Conjugate", rf" {named_characters['Conjugate']} "),
+        ("Conjugate", rf" {NAMED_CHARACTERS['Conjugate']} "),
         ("ConjugateTranspose", r" \uf3c9 "),
-        ("Cross", rf" \uf4a0 | {named_characters['Cross']} "),
+        ("Cross", rf" \uf4a0 | {NAMED_CHARACTERS['Cross']} "),
         ("Decrement", r" \-\- "),
-        ("Del", rf" {named_characters['Del']} "),
+        ("Del", rf" {NAMED_CHARACTERS['Del']} "),
         ("Derivative", r" \' "),
         # ('DifferenceDelta', r' \u2206 '),
         # https://reference.wolfram.com/language/ref/character/DirectedEdge.html
-        ("DirectedEdge", rf" -> | \uf3d5 | {named_characters['DirectedEdge']} "),
+        ("DirectedEdge", rf" -> | \uf3d5 | {NAMED_CHARACTERS['DirectedEdge']} "),
         # ('DiscreteRatio', r' \uf4a4 '),
         # ('DiscreteShift', r' \uf4a3 '),
-        ("Conjugate", rf" {named_characters['Conjugate']} "),
+        ("Conjugate", rf" {NAMED_CHARACTERS['Conjugate']} "),
         ("ConjugateTranspose", r" \uf3c9 "),
-        ("DifferentialD", rf" \uf74c | {named_characters['DifferentialD']} "),
-        ("Divide", rf" \/| {named_characters['Divide']} "),
+        ("DifferentialD", rf" \uf74c | {NAMED_CHARACTERS['DifferentialD']} "),
+        ("Divide", rf" \/| {NAMED_CHARACTERS['Divide']} "),
         ("DivideBy", r" \/\=  "),
         ("Dot", r" \. "),
-        ("Element", r" {named_characters['Element']} "),
-        ("Equal", rf" (\=\=) | \uf431 | {named_characters['Equal']} | \uf7d9 "),
-        ("Equivalent", r" {named_characters['Equivalent']} "),
-        ("Exists", r" {named_characters['Exists']} "),
+        ("Element", r" {NAMED_CHARACTERS['Element']} "),
+        ("Equal", rf" (\=\=) | \uf431 | {NAMED_CHARACTERS['Equal']} | \uf7d9 "),
+        ("Equivalent", r" {NAMED_CHARACTERS['Equivalent']} "),
+        ("Exists", r" {NAMED_CHARACTERS['Exists']} "),
         ("Factorial", r" \! "),
         ("Factorial2", r" \!\! "),
-        ("ForAll", r" {named_characters['ForAll']} "),
-        ("Function", rf" \& | \uF4A1 | {named_characters['Function']} | \|-> "),
+        ("ForAll", r" {NAMED_CHARACTERS['ForAll']} "),
+        ("Function", rf" \& | \uF4A1 | {NAMED_CHARACTERS['Function']} | \|-> "),
         ("Greater", r" \> "),
-        ("GreaterEqual", rf" (\>\=) | {named_characters['GreaterEqual']} "),
+        ("GreaterEqual", rf" (\>\=) | {NAMED_CHARACTERS['GreaterEqual']} "),
         ("HermitianConjugate", r" \uf3ce "),
         ("Implies", r" \uF523 "),
         ("Increment", r" \+\+ "),
         ("Infix", r" \~ "),
         ("Information", r"\?\?"),
         ("Integral", r" \u222b "),
-        ("Intersection", rf" {named_characters['Intersection']} "),
+        ("Intersection", rf" {NAMED_CHARACTERS['Intersection']} "),
         ("Less", r" \< "),
-        ("LessEqual", rf" (\<\=) | {named_characters['LessEqual']} "),
+        ("LessEqual", rf" (\<\=) | {NAMED_CHARACTERS['LessEqual']} "),
         ("Map", r" \/\@ "),
         ("MapAll", r" \/\/\@ "),
-        # FIXME: can't use named_characters in Minus because the ASCII minus
+        # FIXME: can't use NAMED_CHARACTERS in Minus because the ASCII minus
         # causes the unicode not to appear in tables.
         ("Minus", r" \-| \u2122 "),
-        ("Nand", rf" {named_characters['Nand']} "),
+        ("Nand", rf" {NAMED_CHARACTERS['Nand']} "),
         ("NonCommutativeMultiply", r" \*\* "),
-        ("Nor", rf" {named_characters['Nor']} "),
-        ("Not", r" {named_characters['Not']} "),
-        ("NotElement", r" {named_characters['NotElement']} "),
-        ("NotExists", r" {named_characters['NotExists']} "),
-        ("Or", rf" (\|\|) | {named_characters['Or']} "),
+        ("Nor", rf" {NAMED_CHARACTERS['Nor']} "),
+        ("Not", r" {NAMED_CHARACTERS['Not']} "),
+        ("NotElement", r" {NAMED_CHARACTERS['NotElement']} "),
+        ("NotExists", r" {NAMED_CHARACTERS['NotExists']} "),
+        ("Or", rf" (\|\|) | {NAMED_CHARACTERS['Or']} "),
         # ('PartialD', r' \u2202 '),
         ("PatternTest", r" \? "),
         ("Plus", r" \+ "),
@@ -330,31 +330,31 @@ def init_module():
         ("ReplaceAll", r" \/\. "),
         ("ReplaceRepeated", r" \/\/\. "),
         ("RightComposition", r" \/\* "),
-        ("Rule", r" (\-\>)| \uF522 | {named_characters['Rule']} "),
+        ("Rule", r" (\-\>)| \uF522 | {NAMED_CHARACTERS['Rule']} "),
         ("RuleDelayed", r" (\:\>)|\uF51F "),
         ("SameQ", r" \=\=\= "),
         ("Semicolon", r" \; "),
         ("Set", r" \= "),
         ("SetDelayed", r" \:\= "),
-        ("Square", rf" \uf520 | {named_characters['Square']}"),
+        ("Square", rf" \uf520 | {NAMED_CHARACTERS['Square']}"),
         ("StringExpression", r" \~\~ "),
         ("StringJoin", r" \<\> "),
         ("SubtractFrom", r" \-\=  "),
         # ('Sum', r' \u2211 '),
         ("TagSet", r" \/\: "),
-        ("Times", rf" \*|{named_characters['Times']} "),
+        ("Times", rf" \*|{NAMED_CHARACTERS['Times']} "),
         ("TimesBy", r" \*\= "),
-        ("Transpose", rf" \uf3c7 | {named_characters['Transpose']} "),
-        ("Unequal", rf" (\!\= ) | {named_characters['NotEqual']} "),
-        ("Union", rf" {named_characters['Union']} "),
+        ("Transpose", rf" \uf3c7 | {NAMED_CHARACTERS['Transpose']} "),
+        ("Unequal", rf" (\!\= ) | {NAMED_CHARACTERS['NotEqual']} "),
+        ("Union", rf" {NAMED_CHARACTERS['Union']} "),
         ("UnsameQ", r" \=\!\= "),
         ("Xnor", r" \uF4A2 "),
-        ("Xor", rf" {named_characters['Xor']} "),
+        ("Xor", rf" {NAMED_CHARACTERS['Xor']} "),
         # https://reference.wolfram.com/language/ref/character/UndirectedEdge.html
         # The official Unicode value is \u2194
         (
             "UndirectedEdge",
-            rf" (\<\-\>)|\u29DF | {named_characters['UndirectedEdge']} ",
+            rf" (\<\-\>)|\u29DF | {NAMED_CHARACTERS['UndirectedEdge']} ",
         ),
         # allow whitespace but avoid e.g. x=.01
         ("Unset", r" \=\s*\.(?!\d|\.) "),
diff --git a/test/test_string_tokens.py b/test/test_string_tokens.py
index 61cdc76..4fa36d9 100644
--- a/test/test_string_tokens.py
+++ b/test/test_string_tokens.py
@@ -96,12 +96,11 @@ def test_string():
 
     check_string(
         r'"\(a \+\)"',
-        r'"\(a \+\)"',
+        r'"a \+"',
         "Do not interpret, but preserve boxing inside a string",
     )
 
     incomplete_error(r'"abc', "String does not have terminating quote")
-    incomplete_error(r'"\"', "Unterminated escape sequence")
 
     escape_scan_error(r'"a\g"', "Unknown string escape \\g")
     escape_scan_error(r'"a\X"', '"X" is not a valid escape character')
diff --git a/test/test_translation_regressions.py b/test/test_translation_regressions.py
index d0d4c13..72069ee 100644
--- a/test/test_translation_regressions.py
+++ b/test/test_translation_regressions.py
@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 
-from mathics_scanner.characters import replace_wl_with_plain_text, named_characters
+from mathics_scanner.characters import NAMED_CHARACTERS, replace_wl_with_plain_text
 
 
 def check_translation_regression(c: str, expected_translation: str):
-    translation = replace_wl_with_plain_text(named_characters[c])
+    translation = replace_wl_with_plain_text(NAMED_CHARACTERS[c])
     assert (
         translation == expected_translation
     ), f"REGRESSION {c} is translated to {translation} but it should translate to {expected_translation}"

From 28f05ba6187f52b09fc04f8d3adcba3db07298be Mon Sep 17 00:00:00 2001
From: "R. Bernstein" <rocky@users.noreply.github.com>
Date: Wed, 18 Feb 2026 05:46:54 -0500
Subject: [PATCH 08/26] Fix comments for consistency in characters.py

---
 mathics_scanner/characters.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mathics_scanner/characters.py b/mathics_scanner/characters.py
index 64f2803..eef79f6 100644
--- a/mathics_scanner/characters.py
+++ b/mathics_scanner/characters.py
@@ -91,13 +91,13 @@ def replace_box_unicode_with_ascii(input_string):
 # AMS LaTeX replacements
 _wl_to_amstex = NAMED_CHARACTERS_COLLECTION.get("wl-to-amstex", None)
 
-# Conversion from WL to unicode
+# Conversion from WL to Unicode
 _wl_to_unicode = NAMED_CHARACTERS_COLLECTION.get(
     "wl-to-unicode-dict", NAMED_CHARACTERS_COLLECTION.get("wl_to_ascii")
 )
 _wl_to_unicode_re = re.compile(NAMED_CHARACTERS_COLLECTION.get("wl-to-unicode-re", ""))
 
-# Conversion from unicode to WL
+# Conversion from Unicode to WL
 _unicode_to_wl = NAMED_CHARACTERS_COLLECTION.get("unicode-to-wl-dict", {})
 _unicode_to_wl_re = re.compile(NAMED_CHARACTERS_COLLECTION.get("unicode-to-wl-re", ""))
 
@@ -122,7 +122,7 @@ def replace_wl_with_plain_text(wl_input: str, use_unicode=True) -> str:
                         for the conversion.
 
     Note that the occurrences of named characters in ``wl_input`` are expect to
-    be represented by Wolfram's internal scheme. For more information Wolfram's
+    be represented by Wolfram's internal scheme. For more information on Wolfram's
     representation scheme and on our own conversion scheme, please see `Listing
     of Named Characters
     <https://reference.wolfram.com/language/guide/ListingOfNamedCharacters.html>`_
@@ -148,7 +148,7 @@ def replace_unicode_with_wl(unicode_input: str) -> str:
 
     Note that the occurrences of named characters in the output of
     ``replace_unicode_with_wl`` are represented using Wolfram's internal
-    scheme. For more information Wolfram's representation scheme and on our own
+    scheme. For more information on Wolfram's representation scheme and on our own
     conversion scheme, please see `Listing of Named Characters
     <https://reference.wolfram.com/language/guide/ListingOfNamedCharacters.html>`_
     and ``implementation.rst`` respectively.

From 9ba677532a462748daeea8d5ce0ee3168f7cc3ae Mon Sep 17 00:00:00 2001
From: "R. Bernstein" <rocky@users.noreply.github.com>
Date: Wed, 18 Feb 2026 05:48:02 -0500
Subject: [PATCH 09/26] Fix spelling errors in escape_sequences.py

Corrected spelling errors in comments and docstrings.
---
 mathics_scanner/escape_sequences.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mathics_scanner/escape_sequences.py b/mathics_scanner/escape_sequences.py
index 1bb4638..95499de 100644
--- a/mathics_scanner/escape_sequences.py
+++ b/mathics_scanner/escape_sequences.py
@@ -28,7 +28,7 @@ def parse_base(source_text: str, start_shift: int, end_shift: int, base: int) ->
     See if characters start_shift .. end shift
     can be converted to an integer in base  ``base``.
 
-    If so, chr(integer value converted from base) is returnd.
+    If so, chr(integer value converted from base) is returned.
 
     However, if the conversion fails, SyntaxError is raised.
     """
@@ -58,14 +58,14 @@ def parse_base(source_text: str, start_shift: int, end_shift: int, base: int) ->
 
 def parse_named_character(source_text: str, start: int, finish: int) -> Optional[str]:
     r"""
-    Find the unicode-equivalent symbol for a string named character.
+    Find the Unicode equivalent symbol for a string named character.
 
-    Before calling we have matched the text between "\["  and "]" of the input.
+    Before calling, we have matched the text between "\["  and "]" of the input.
 
     The name character is thus in source_text[start:finish].
 
     Match this string with the known named characters,
-    e.g. "Theta".  If we can match this, then we return the unicode equivalent from the
+    e.g., "Theta".  If we can match this, then we return the Unicode equivalent from the
     `NAMED_CHARACTERS` map (which is read in from JSON but stored in a YAML file).
 
     If we can't find the named character, raise NamedCharacterSyntaxError.
@@ -130,7 +130,7 @@ def parse_escape_sequence(source_text: str, pos: int) -> Tuple[str, int]:
         pos += 3
 
     # WMA escape characters \n, \t, \b, \r.
-    # Note that these are a similer to Python, but are different.
+    # Note that these are similar to Python, but are different.
     # In particular, Python defines "\a" to be ^G (control G),
     # but in WMA, this is invalid.
     elif c in ESCAPE_CODES:

From 74d2513184e99d4b92f8f672f81f7a6b9464c831 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Wed, 18 Feb 2026 06:01:33 -0500
Subject: [PATCH 10/26] Correct a module name

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 35b58e7..5be33b7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,7 +51,7 @@ full = [
 ]
 
 [project.scripts]
-mathics3-make-boxing-character-json = "mathics_scanner.generate.box_characters:main"
+mathics3-make-boxing-character-json = "mathics_scanner.generate.boxing_characters:main"
 mathics3-make-named-character-json = "mathics_scanner.generate.named_characters:main"
 mathics3-make-operator-json = "mathics_scanner.generate.operators:main"
 mathics3-tokens = "mathics_scanner.mathics3_tokens:main"

From 01d63d9ea78dec2ea87152cc726ecf268594c518 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Wed, 18 Feb 2026 06:24:18 -0500
Subject: [PATCH 11/26] Reinstate \" escape.

It accidentally got deleted in creating ESCAPE_CODES
---
 mathics_scanner/escape_sequences.py | 2 +-
 test/test_string_tokens.py          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/mathics_scanner/escape_sequences.py b/mathics_scanner/escape_sequences.py
index 95499de..67c08bf 100644
--- a/mathics_scanner/escape_sequences.py
+++ b/mathics_scanner/escape_sequences.py
@@ -17,7 +17,7 @@
 
 # The second character, or character after backslash ("\") that
 # are valid in a Mathics3 escaped character.
-ESCAPE_CODES: Final[str] = "ntbfr $\n"
+ESCAPE_CODES: Final[str] = 'ntbfr" $\n'
 
 # Valid digits in an Octal string
 OCTAL_DIGITS: Final[str] = "01234567"
diff --git a/test/test_string_tokens.py b/test/test_string_tokens.py
index 4fa36d9..d7fb2ed 100644
--- a/test/test_string_tokens.py
+++ b/test/test_string_tokens.py
@@ -101,6 +101,7 @@ def test_string():
     )
 
     incomplete_error(r'"abc', "String does not have terminating quote")
+    incomplete_error(r'"\"', "Unterminated escape sequence")
 
     escape_scan_error(r'"a\g"', "Unknown string escape \\g")
     escape_scan_error(r'"a\X"', '"X" is not a valid escape character')

From 40a54b302307ac03dc0ac305ee589770f045492a Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 19 Feb 2026 10:40:51 -0500
Subject: [PATCH 12/26] Do Unicode translation only if inside a Box expr

---
 mathics_scanner/escape_sequences.py | 11 +++++++++--
 mathics_scanner/tokeniser.py        | 14 +++++++++-----
 test/test_escape_sequences.py       |  9 ++++++---
 test/test_string_tokens.py          |  2 +-
 4 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/mathics_scanner/escape_sequences.py b/mathics_scanner/escape_sequences.py
index 67c08bf..f448e71 100644
--- a/mathics_scanner/escape_sequences.py
+++ b/mathics_scanner/escape_sequences.py
@@ -79,7 +79,9 @@ def parse_named_character(source_text: str, start: int, finish: int) -> Optional
             return char
 
 
-def parse_escape_sequence(source_text: str, pos: int) -> Tuple[str, int]:
+def parse_escape_sequence(
+    source_text: str, pos: int, is_inside_box: bool
+) -> Tuple[str, int]:
     """Given some source text in `source_text` starting at offset
     `pos`, return the escape-sequence value for this text and the
     follow-on offset position.
@@ -136,6 +138,8 @@ def parse_escape_sequence(source_text: str, pos: int) -> Tuple[str, int]:
     elif c in ESCAPE_CODES:
         if c in "n\n":
             result += "\n"
+        elif c == '"':
+            result += '"'
         elif c == " ":
             result += " "
         elif c == "t":
@@ -151,13 +155,16 @@ def parse_escape_sequence(source_text: str, pos: int) -> Tuple[str, int]:
             assert c == "r"
             result += "\r"
         pos += 1
-    elif c in BOX_OPERATOR:
+    elif is_inside_box and c in BOX_OPERATOR:
         if (boxed_character := BOXING_ASCII_TO_UNICODE.get("\\" + c)) is not None:
             # Replace \ in result with Unicode representing the two ASCII characters.
             result = result[:-1] + boxed_character
         else:
             raise EscapeSyntaxError("stresc", rf"\{c}")
         pos += 1
+    elif c in '!"':
+        result += c
+        pos += 1
     else:
         raise EscapeSyntaxError("stresc", rf"\{c}")
     return result, pos
diff --git a/mathics_scanner/tokeniser.py b/mathics_scanner/tokeniser.py
index c3e3714..04a8fee 100644
--- a/mathics_scanner/tokeniser.py
+++ b/mathics_scanner/tokeniser.py
@@ -566,7 +566,7 @@ def __init__(self, feeder):
 
         # Set to True when inside box parsing.
         # This has an effect on which escape operators are allowed.
-        self._is_inside_box: bool = False
+        self.is_inside_box: bool = False
 
         self._change_token_scanning_mode("expr")
 
@@ -702,7 +702,7 @@ def next(self) -> Token:
 
                 try:
                     escape_str, next_pos = parse_escape_sequence(
-                        self.source_text, self.pos + 1
+                        self.source_text, self.pos + 1, self.is_inside_box
                     )
                 except (EscapeSyntaxError, NamedCharacterSyntaxError) as escape_error:
                     if self.is_inside_box:
@@ -809,7 +809,9 @@ def t_RawBackslash(self, pattern_match: Optional[re.Match]) -> Token:
             source_text += self.source_text
 
         try:
-            escape_str, self.pos = parse_escape_sequence(source_text, start_pos)
+            escape_str, self.pos = parse_escape_sequence(
+                source_text, start_pos, self.is_inside_box
+            )
             if source_text[start_pos] == "[" and source_text[self.pos - 1] == "]":
                 named_character = source_text[start_pos + 1 : self.pos - 1]
         except (EscapeSyntaxError, NamedCharacterSyntaxError) as escape_error:
@@ -877,7 +879,7 @@ def t_RawBackslash(self, pattern_match: Optional[re.Match]) -> Token:
 
                 try:
                     escape_str, next_pos = parse_escape_sequence(
-                        self.source_text, self.pos + 1
+                        self.source_text, self.pos + 1, self.is_inside_box
                     )
                 except (EscapeSyntaxError, NamedCharacterSyntaxError) as escape_error:
                     if self.is_inside_box:
@@ -940,7 +942,9 @@ def t_String(self, _: Optional[re.Match]) -> Token:
                     self.get_more_input()
                 self.pos += 1
                 try:
-                    escape_str, self.pos = parse_escape_sequence(source_text, self.pos)
+                    escape_str, self.pos = parse_escape_sequence(
+                        source_text, self.pos, self.is_inside_box
+                    )
                 except NamedCharacterSyntaxError as escape_error:
                     self.feeder.message(
                         escape_error.name, escape_error.tag, *escape_error.args
diff --git a/test/test_escape_sequences.py b/test/test_escape_sequences.py
index 30af4c3..5143a3a 100644
--- a/test/test_escape_sequences.py
+++ b/test/test_escape_sequences.py
@@ -45,13 +45,16 @@ def test_escape_sequences():
         (r"z \[Conjugate]", 3, 14, "\uf3c8", "Named character; at end"),
         ("[Integral]", 0, 10, "\u222b", "Another full-string named-character"),
     ):
-        assert parse_escape_sequence(text, pos) == (expect_str, expect_pos), fail_msg
+        assert parse_escape_sequence(text, pos, is_inside_box=False) == (
+            expect_str,
+            expect_pos,
+        ), fail_msg
 
 
 def test_invalid_named_character_sequences():
     for text in (r"\[", r"\[Theta", r"\[Fake]", r"\[abc]"):
         with pytest.raises(NamedCharacterSyntaxError):
-            parse_escape_sequence(text, 1)
+            parse_escape_sequence(text, 1, is_inside_box=False)
 
 
 def test_invalid_number_encoding():
@@ -75,4 +78,4 @@ def test_invalid_number_encoding():
         ":01-2",
     ):
         with pytest.raises(SyntaxError):
-            parse_escape_sequence(text, 0)
+            parse_escape_sequence(text, 0, is_inside_box=False)
diff --git a/test/test_string_tokens.py b/test/test_string_tokens.py
index d7fb2ed..61cdc76 100644
--- a/test/test_string_tokens.py
+++ b/test/test_string_tokens.py
@@ -96,7 +96,7 @@ def test_string():
 
     check_string(
         r'"\(a \+\)"',
-        r'"a \+"',
+        r'"\(a \+\)"',
         "Do not interpret, but preserve boxing inside a string",
     )
 

From a985ec6bc8da9e6ad0298bd4e208e370d5f3713a Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 19 Feb 2026 11:48:49 -0500
Subject: [PATCH 13/26] Test mathics corresponding Mathics3 branch

---
 .github/workflows/mathics.yml | 2 +-
 mathics_scanner/characters.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/mathics.yml b/.github/workflows/mathics.yml
index c431390..ee2ecab 100644
--- a/.github/workflows/mathics.yml
+++ b/.github/workflows/mathics.yml
@@ -33,7 +33,7 @@ jobs:
         git clone --depth 1 https://github.com/Mathics3/mathics-scanner.git
         (cd mathics-scanner && pip install -e .)
         # Until next Mathics3/mathics-core release is out...
-        git clone --depth 1 https://github.com/Mathics3/mathics-core.git
+        git clone -b add-unicode-box-characters --depth 1 https://github.com/Mathics3/mathics-core.git
         cd mathics-core/
         make PIP_INSTALL_OPTS='[full]'
         # pip install Mathics3[full]
diff --git a/mathics_scanner/characters.py b/mathics_scanner/characters.py
index eef79f6..e62b500 100644
--- a/mathics_scanner/characters.py
+++ b/mathics_scanner/characters.py
@@ -45,7 +45,7 @@ def get_srcdir() -> str:
     with open(BOXING_CHARACTERS_PATH, "r") as f:
         boxing_character_data = ujson.load(f)
 else:
-    boxing_characters_data = {}
+    boxing_character_data = {}
 
 BOXING_UNICODE_TO_ASCII: Final[Dict[str, str]] = boxing_character_data.get(
     "unicode-to-ascii", {}

From 5a3da9d93a7f3a39a84e2cc92cc88c1fcd87d519 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 19 Feb 2026 12:13:43 -0500
Subject: [PATCH 14/26] See if this improves Windows CI

---
 .../{mathics.yml => mathics3-doctest.yml}     |  4 +--
 mathics_scanner/generate/named_characters.py  |  2 +-
 setup.py                                      | 34 +++++++++++--------
 3 files changed, 23 insertions(+), 17 deletions(-)
 rename .github/workflows/{mathics.yml => mathics3-doctest.yml} (93%)

diff --git a/.github/workflows/mathics.yml b/.github/workflows/mathics3-doctest.yml
similarity index 93%
rename from .github/workflows/mathics.yml
rename to .github/workflows/mathics3-doctest.yml
index ee2ecab..9cf3d5c 100644
--- a/.github/workflows/mathics.yml
+++ b/.github/workflows/mathics3-doctest.yml
@@ -1,4 +1,4 @@
-name: Mathics_Script (Mathics doctest)
+name: Mathics3_Doctest (Mathics3 doctest)
 
 on:
   push:
@@ -28,7 +28,7 @@ jobs:
     - name: Install Mathics_Scanner
       run: |
         make
-    - name: Test Mathics3
+    - name: Run Mathics3 Doctests
       run: |
         git clone --depth 1 https://github.com/Mathics3/mathics-scanner.git
         (cd mathics-scanner && pip install -e .)
diff --git a/mathics_scanner/generate/named_characters.py b/mathics_scanner/generate/named_characters.py
index d787f5f..8cd9a22 100755
--- a/mathics_scanner/generate/named_characters.py
+++ b/mathics_scanner/generate/named_characters.py
@@ -258,7 +258,7 @@ def compile_tables(data: dict) -> dict:
     }
 
 
-DEFAULT_DATA_DIR = Path(osp.normpath(osp.dirname(__file__)), "..", "data")
+DEFAULT_DATA_DIR = Path(__file__).parent.parent / "data"
 
 ALL_FIELDS = [
     "aliased-characters",
diff --git a/setup.py b/setup.py
index 82c59b7..e94970b 100644
--- a/setup.py
+++ b/setup.py
@@ -25,34 +25,40 @@
 mathics-users@googlegroups.com and ask for help.
 """
 
-import os
 import os.path as osp
+import subprocess
+import sys
 
 from setuptools import setup
-from setuptools.command.build_py import build_py as setuptools_build_py
+from setuptools.command.egg_info import egg_info
 
 
 def get_srcdir():
-    """return the directory of the location if this code"""
+    """Return the directory of the location if this code"""
     filename = osp.normcase(osp.dirname(osp.abspath(__file__)))
     return osp.realpath(filename)
 
 
-class build_py(setuptools_build_py):
-    def run(self):
-        for table_type in ("boxing-character", "named-character", "operator"):
-            json_data_file = osp.join("data", f"{table_type}.json")
-            json_path = osp.join("mathics-scanner", json_data_file)
-            if not osp.exists(json_path):
-                os.system(f"mathics3-make-{table_type}-json" " -o {json-path}")
-            self.distribution.package_data["Mathics-Scanner"].append(json_data_file)
-        setuptools_build_py.run(self)
+class table_building_egg_info(egg_info):
+    """This runs as part of building an sdist"""
 
+    def finalize_options(self):
+        """Run program to create JSON tables"""
+        for table_program in ("boxing-character", "named-character", "operator"):
+            build_tables_program = osp.join(
+                get_srcdir(), "mathics_scanner", "generate", f"{table_program}.py"
+            )
+            print(f"Building JSON tables via {build_tables_program}")
+            result = subprocess.run([sys.executable, build_tables_program], check=False)
+            if result.returncode:
+                raise RuntimeError(
+                    f"Running {build_tables_program} exited with code {result.returncode}"
+                )
+            super().finalize_options()
 
-CMDCLASS = {"build_py": build_py}
 
 setup(
-    cmdclass=CMDCLASS,
+    cmdclass={"egg_info": table_building_egg_info},
     # don't pack Mathics in egg because of media files, etc.
     zip_safe=False,
 )

From 985922802d27d83678c2e7e1ded3a2a3e06dd93f Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 19 Feb 2026 12:39:18 -0500
Subject: [PATCH 15/26] Go back to older form of setup on MSWindows.

---
 .github/workflows/windows.yml | 8 ++++----
 setup.py                      | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 8f88834..38d8f5a 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -28,9 +28,9 @@ jobs:
         pip install -e .
     - name: Test Mathics3
       run: |
-        # I don't think I need this anymore:
-        # python -m mathics_scanner.generate.boxing_characters
-        # python -m mathics_scanner.generate.named_characters
-        # python -m mathics_scanner.generate.operators
+        # This seems to be needed on Windows.
+        python -m mathics_scanner.generate.boxing_characters
+        python -m mathics_scanner.generate.named_characters
+        python -m mathics_scanner.generate.operators
         pip install -e .[dev,full]
         py.test test
diff --git a/setup.py b/setup.py
index e94970b..5ea8694 100644
--- a/setup.py
+++ b/setup.py
@@ -54,7 +54,7 @@ def finalize_options(self):
                 raise RuntimeError(
                     f"Running {build_tables_program} exited with code {result.returncode}"
                 )
-            super().finalize_options()
+        super().finalize_options()
 
 
 setup(

From 59879a4c2a29fa0fc7a683597f8a34d61e32b3f2 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 19 Feb 2026 12:49:50 -0500
Subject: [PATCH 16/26] Another attempt to get setup working

---
 Makefile |  2 +-
 setup.py | 37 ++++++++++++++++++++++++++++---------
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile
index 57e6fa8..3de0759 100644
--- a/Makefile
+++ b/Makefile
@@ -37,7 +37,7 @@ build: mathics_scanner/data/characters.json mathics_scanner/data/named_character
 
 #: Set up to run from the source tree
 develop: mathics_scanner/data/boxing-characters.json mathics_scanner/data/named-characters.json mathics_scanner/data/operators.json
-	$(PIP) install -e .$(PIP_INSTALL_OPTS)
+	$(PIP) install --no-build-isolation -e . $(PIP_INSTALL_OPTS)
 
 #: Build distribution
 dist: admin-tools/make-dist.sh
diff --git a/setup.py b/setup.py
index 5ea8694..99801f7 100644
--- a/setup.py
+++ b/setup.py
@@ -44,16 +44,35 @@ class table_building_egg_info(egg_info):
 
     def finalize_options(self):
         """Run program to create JSON tables"""
-        for table_program in ("boxing-character", "named-character", "operator"):
-            build_tables_program = osp.join(
-                get_srcdir(), "mathics_scanner", "generate", f"{table_program}.py"
+        build_tables_program = osp.join(
+            get_srcdir(), "mathics_scanner", "generate", "named_characters.py"
+        )
+        print(f"Building JSON tables via {build_tables_program}")
+        result = subprocess.run([sys.executable, build_tables_program], check=False)
+        if result.returncode:
+            raise RuntimeError(
+                f"Running {build_tables_program} exited with code {result.returncode}"
+            )
+        super().finalize_options()
+        build_tables_program = osp.join(
+            get_srcdir(), "mathics_scanner", "generate", "operators.py"
+        )
+        print(f"Building JSON tables via {build_tables_program}")
+        result = subprocess.run([sys.executable, build_tables_program], check=False)
+        if result.returncode:
+            raise RuntimeError(
+                f"Running {build_tables_program} exited with code {result.returncode}"
+            )
+        super().finalize_options()
+        build_tables_program = osp.join(
+            get_srcdir(), "mathics_scanner", "generate", "boxing_characters.py"
+        )
+        print(f"Building JSON tables via {build_tables_program}")
+        result = subprocess.run([sys.executable, build_tables_program], check=False)
+        if result.returncode:
+            raise RuntimeError(
+                f"Running {build_tables_program} exited with code {result.returncode}"
             )
-            print(f"Building JSON tables via {build_tables_program}")
-            result = subprocess.run([sys.executable, build_tables_program], check=False)
-            if result.returncode:
-                raise RuntimeError(
-                    f"Running {build_tables_program} exited with code {result.returncode}"
-                )
         super().finalize_options()
 
 

From 7772a58796075d4daeddfb4ad1e6810d8073074b Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 19 Feb 2026 12:51:43 -0500
Subject: [PATCH 17/26] Comment out fancy setup - it's not working.

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 99801f7..043f1b3 100644
--- a/setup.py
+++ b/setup.py
@@ -77,7 +77,7 @@ def finalize_options(self):
 
 
 setup(
-    cmdclass={"egg_info": table_building_egg_info},
+    # cmdclass={"egg_info": table_building_egg_info},
     # don't pack Mathics in egg because of media files, etc.
     zip_safe=False,
 )

From 270d2e58e72f84c626e75c8289d604c26a7f0579 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 19 Feb 2026 12:53:59 -0500
Subject: [PATCH 18/26] Another CI attempt

---
 setup.py | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/setup.py b/setup.py
index 043f1b3..9a0a56d 100644
--- a/setup.py
+++ b/setup.py
@@ -54,30 +54,10 @@ def finalize_options(self):
                 f"Running {build_tables_program} exited with code {result.returncode}"
             )
         super().finalize_options()
-        build_tables_program = osp.join(
-            get_srcdir(), "mathics_scanner", "generate", "operators.py"
-        )
-        print(f"Building JSON tables via {build_tables_program}")
-        result = subprocess.run([sys.executable, build_tables_program], check=False)
-        if result.returncode:
-            raise RuntimeError(
-                f"Running {build_tables_program} exited with code {result.returncode}"
-            )
-        super().finalize_options()
-        build_tables_program = osp.join(
-            get_srcdir(), "mathics_scanner", "generate", "boxing_characters.py"
-        )
-        print(f"Building JSON tables via {build_tables_program}")
-        result = subprocess.run([sys.executable, build_tables_program], check=False)
-        if result.returncode:
-            raise RuntimeError(
-                f"Running {build_tables_program} exited with code {result.returncode}"
-            )
-        super().finalize_options()
 
 
 setup(
-    # cmdclass={"egg_info": table_building_egg_info},
+    cmdclass={"egg_info": table_building_egg_info},
     # don't pack Mathics in egg because of media files, etc.
     zip_safe=False,
 )

From aa29f8f2c27af5304e527caf033fa9b5c14d5498 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 19 Feb 2026 13:06:31 -0500
Subject: [PATCH 19/26] Yet another CI try

---
 .github/workflows/ubuntu.yml | 11 ++++-------
 setup.py                     | 31 +++++++++++++------------------
 2 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
index 34e873c..431787e 100644
--- a/.github/workflows/ubuntu.yml
+++ b/.github/workflows/ubuntu.yml
@@ -20,17 +20,14 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
         pip install -e .
-    - name: Install Mathics_Scanner
+    - name: Install JSON dependencies
       run: |
-        make
+        python -m mathics_scanner.generate.boxing_characters
+        python -m mathics_scanner.generate.named_characters
+        python -m mathics_scanner.generate.operators
     - name: Test Mathics3 Scanner
       run: |
         pip install -r requirements-dev.txt
         pip install -r requirements-full.txt
-        # Don't think I need this anymore
-        # python -m mathics_scanner.generate.boxing_characters
-        # python -m mathics_scanner.generate.named_characters
-        # python -m mathics_scanner.generate.operators
         make check
diff --git a/setup.py b/setup.py
index 9a0a56d..fa3dfee 100644
--- a/setup.py
+++ b/setup.py
@@ -26,11 +26,9 @@
 """
 
 import os.path as osp
-import subprocess
-import sys
 
 from setuptools import setup
-from setuptools.command.egg_info import egg_info
+from setuptools.command.build_py import build_py as setuptools_build_py
 
 
 def get_srcdir():
@@ -39,25 +37,22 @@ def get_srcdir():
     return osp.realpath(filename)
 
 
-class table_building_egg_info(egg_info):
-    """This runs as part of building an sdist"""
+class build_py(setuptools_build_py):
+    def run(self):
+        for table_type in ("boxing-character", "named-character", "operator"):
+            json_data_file = osp.join("data", f"{table_type}.json")
+            json_path = osp.join("mathics-scanner", json_data_file)
+            if not osp.exists(json_path):
+                os.system(f"mathics3-make-{table_type}-json" " -o {json-path}")
+            self.distribution.package_data["Mathics-Scanner"].append(json_data_file)
+        setuptools_build_py.run(self)
 
-    def finalize_options(self):
-        """Run program to create JSON tables"""
-        build_tables_program = osp.join(
-            get_srcdir(), "mathics_scanner", "generate", "named_characters.py"
-        )
-        print(f"Building JSON tables via {build_tables_program}")
-        result = subprocess.run([sys.executable, build_tables_program], check=False)
-        if result.returncode:
-            raise RuntimeError(
-                f"Running {build_tables_program} exited with code {result.returncode}"
-            )
-        super().finalize_options()
+
+CMDCLASS = {"build_py": build_py}
 
 
 setup(
-    cmdclass={"egg_info": table_building_egg_info},
+    cmdclass=CMDCLASS,
     # don't pack Mathics in egg because of media files, etc.
     zip_safe=False,
 )

From 5c212f9bde8823ec7c3a38d6fe8f7ed7bcc2a9cf Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 19 Feb 2026 13:11:10 -0500
Subject: [PATCH 20/26] Another CI try

---
 .github/workflows/mathics3-doctest.yml | 11 ++++++++---
 .github/workflows/ubuntu.yml           |  7 ++++---
 setup.py                               |  1 +
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/mathics3-doctest.yml b/.github/workflows/mathics3-doctest.yml
index 9cf3d5c..8f40010 100644
--- a/.github/workflows/mathics3-doctest.yml
+++ b/.github/workflows/mathics3-doctest.yml
@@ -1,4 +1,4 @@
-name: Mathics3_Doctest (Mathics3 doctest)
+name: Mathics3 Doctest (Mathics3 doctest)
 
 on:
   push:
@@ -21,11 +21,16 @@ jobs:
     - name: Install OS dependencies
       run: |
         sudo apt-get update -qq && sudo apt-get install -qq liblapack-dev llvm-dev tesseract-ocr
-    - name: Install dependencies
+    - name: Install Mathics3 scanner without JSON
       run: |
         python -m pip install --upgrade pip
         pip install -e .
-    - name: Install Mathics_Scanner
+    - name: Install JSON files
+      run: |
+        python -m mathics_scanner.generate.boxing_characters
+        python -m mathics_scanner.generate.named_characters
+        python -m mathics_scanner.generate.operators
+    - name: Install Mathics Scanner
       run: |
         make
     - name: Run Mathics3 Doctests
diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
index 431787e..205a4f2 100644
--- a/.github/workflows/ubuntu.yml
+++ b/.github/workflows/ubuntu.yml
@@ -1,4 +1,4 @@
-name: Mathics_Script (ubuntu)
+name: Mathics3 Scanner (ubuntu)
 
 on:
   push:
@@ -18,10 +18,11 @@ jobs:
       uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
+    - name: Install Mathics3 scanner without JSON fiels
       run: |
+        python -m pip install --upgrade pip
         pip install -e .
-    - name: Install JSON dependencies
+    - name: Install JSON files
       run: |
         python -m mathics_scanner.generate.boxing_characters
         python -m mathics_scanner.generate.named_characters
diff --git a/setup.py b/setup.py
index fa3dfee..46561ad 100644
--- a/setup.py
+++ b/setup.py
@@ -25,6 +25,7 @@
 mathics-users@googlegroups.com and ask for help.
 """
 
+import os
 import os.path as osp
 
 from setuptools import setup

From ae5cdf9d0d9a9f1aa3e3ea9e1efa195dce150041 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 19 Feb 2026 13:17:02 -0500
Subject: [PATCH 21/26] Another CI try

---
 .github/workflows/osx.yml     | 11 ++++-------
 .github/workflows/ubuntu.yml  |  2 +-
 .github/workflows/windows.yml | 15 +++++++++------
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml
index d2c29f8..1d666dd 100644
--- a/.github/workflows/osx.yml
+++ b/.github/workflows/osx.yml
@@ -1,4 +1,4 @@
-name: Mathics_Script (OSX)
+name: Mathics3 Scanner (OSX)
 
 on:
   push:
@@ -23,15 +23,12 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install -e .
-    - name: Install Mathics Scanner
+    - name: Install Mathics3 scanner without JSON files
       run: |
-        make
+        python -m pip install --upgrade pip
+        pip install -e .
     - name: Test Mathics3 Scanner
       run: |
         pip install -r requirements-dev.txt
         pip install -r requirements-full.txt
-        # I don't think I need this anymore:
-        # python -m mathics_scanner.generate.boxing_characters
-        # python -m mathics_scanner.generate.named_characters
-        # python -m mathics_scanner.generate.operators
         make check
diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
index 205a4f2..f9470e1 100644
--- a/.github/workflows/ubuntu.yml
+++ b/.github/workflows/ubuntu.yml
@@ -18,7 +18,7 @@ jobs:
       uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Install Mathics3 scanner without JSON fiels
+    - name: Install Mathics3 scanner without JSON files
       run: |
         python -m pip install --upgrade pip
         pip install -e .
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 38d8f5a..00706c7 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -1,4 +1,4 @@
-name: Mathics (Windows)
+name: Mathics3 Scanner (Windows)
 
 on:
   push:
@@ -23,14 +23,17 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install -e .
-    - name: Install Mathics Scanner
+    - name: Install Mathics3 scanner without JSON files
       run: |
+        python -m pip install --upgrade pip
         pip install -e .
-    - name: Test Mathics3
+    - name: Install JSON files
       run: |
-        # This seems to be needed on Windows.
         python -m mathics_scanner.generate.boxing_characters
         python -m mathics_scanner.generate.named_characters
         python -m mathics_scanner.generate.operators
-        pip install -e .[dev,full]
-        py.test test
+    - name: Test Mathics3 Scanner
+      run: |
+        pip install -r requirements-dev.txt
+        pip install -r requirements-full.txt
+        make check

From 0d43e4609894b543826f6190629481c9f08a79b5 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 19 Feb 2026 13:22:16 -0500
Subject: [PATCH 22/26] Another CI attempt

---
 .github/workflows/mathics3-doctest.yml | 5 ++---
 .github/workflows/osx.yml              | 5 +++++
 .github/workflows/pyodide.yml          | 2 +-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/mathics3-doctest.yml b/.github/workflows/mathics3-doctest.yml
index 8f40010..44092c4 100644
--- a/.github/workflows/mathics3-doctest.yml
+++ b/.github/workflows/mathics3-doctest.yml
@@ -32,11 +32,10 @@ jobs:
         python -m mathics_scanner.generate.operators
     - name: Install Mathics Scanner
       run: |
-        make
+        pip install -r requirements-dev.txt
+        pip install -r requirements-full.txt
     - name: Run Mathics3 Doctests
       run: |
-        git clone --depth 1 https://github.com/Mathics3/mathics-scanner.git
-        (cd mathics-scanner && pip install -e .)
         # Until next Mathics3/mathics-core release is out...
         git clone -b add-unicode-box-characters --depth 1 https://github.com/Mathics3/mathics-core.git
         cd mathics-core/
diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml
index 1d666dd..c51369a 100644
--- a/.github/workflows/osx.yml
+++ b/.github/workflows/osx.yml
@@ -27,6 +27,11 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install -e .
+    - name: Install JSON files
+      run: |
+        python -m mathics_scanner.generate.boxing_characters
+        python -m mathics_scanner.generate.named_characters
+        python -m mathics_scanner.generate.operators
     - name: Test Mathics3 Scanner
       run: |
         pip install -r requirements-dev.txt
diff --git a/.github/workflows/pyodide.yml b/.github/workflows/pyodide.yml
index f1adb0a..0a5f6e4 100644
--- a/.github/workflows/pyodide.yml
+++ b/.github/workflows/pyodide.yml
@@ -43,7 +43,7 @@ jobs:
         with:
           node-version: ${{ env.NODE_VERSION }}
 
-      - name: Set up Pyodide virtual environment and run tests
+      - name: Set up Pyodide virtual environment
         run: |
           # Set up Pyodide virtual environment
           pyodide xbuildenv install ${{ env.PYODIDE_VERSION }}

From ed2e7a822ea115d1f9102f9804101c4247b88dbd Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 19 Feb 2026 13:36:02 -0500
Subject: [PATCH 23/26] One more CI attempt

---
 .github/workflows/mathics3-doctest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mathics3-doctest.yml b/.github/workflows/mathics3-doctest.yml
index 44092c4..9798376 100644
--- a/.github/workflows/mathics3-doctest.yml
+++ b/.github/workflows/mathics3-doctest.yml
@@ -40,6 +40,6 @@ jobs:
         git clone -b add-unicode-box-characters --depth 1 https://github.com/Mathics3/mathics-core.git
         cd mathics-core/
         make PIP_INSTALL_OPTS='[full]'
-        # pip install Mathics3[full]
+        bash ./admin-tools/make-JSON-tables.sh
         cd ..
         MATHICS_CHARACTER_ENCODING="ASCII" make check-mathics

From 733fc1bb6d2bf3c74d8206d73c8c03d73fc8a7d9 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 19 Feb 2026 17:43:55 -0500
Subject: [PATCH 24/26] Try to get docstest working

---
 .github/workflows/mathics3-doctest.yml | 27 ++++++++++++++------------
 Makefile                               |  2 +-
 mathics_scanner/tokeniser.py           |  2 +-
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/mathics3-doctest.yml b/.github/workflows/mathics3-doctest.yml
index 9798376..654aadd 100644
--- a/.github/workflows/mathics3-doctest.yml
+++ b/.github/workflows/mathics3-doctest.yml
@@ -20,26 +20,29 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install OS dependencies
       run: |
-        sudo apt-get update -qq && sudo apt-get install -qq liblapack-dev llvm-dev tesseract-ocr
+        sudo apt-get update -qq && sudo apt-get install -qq liblapack-dev llvm-dev tesseract-ocr remake
     - name: Install Mathics3 scanner without JSON
       run: |
         python -m pip install --upgrade pip
         pip install -e .
     - name: Install JSON files
       run: |
-        python -m mathics_scanner.generate.boxing_characters
-        python -m mathics_scanner.generate.named_characters
-        python -m mathics_scanner.generate.operators
-    - name: Install Mathics Scanner
-      run: |
-        pip install -r requirements-dev.txt
-        pip install -r requirements-full.txt
-    - name: Run Mathics3 Doctests
+        python -m mathics_scanner.generate.boxing_characters -o mathics_scanner/data/boxing-characters.json
+        ls -l mathics_scanner/data/boxing-characters.json
+        python -m mathics_scanner.generate.named_characters -o mathics_scanner/data/named-characters.json
+        ls -l mathics_scanner/data/named-characters.json
+        python -m mathics_scanner.generate.operators -o mathics_scanner/data/operators.json
+        ls -l mathics_scanner/data/operators.json
+    - name: Build Mathics3
       run: |
         # Until next Mathics3/mathics-core release is out...
         git clone -b add-unicode-box-characters --depth 1 https://github.com/Mathics3/mathics-core.git
         cd mathics-core/
-        make PIP_INSTALL_OPTS='[full]'
-        bash ./admin-tools/make-JSON-tables.sh
+        python -m pip install -e .[dev]
+        cp -v ../mathics_scanner/data/boxing-characters.json mathics/data/boxing-characters.json
+        cp -v ../mathics_scanner/data/named-characters.json mathics/data/named-characters.json
+        cp -v ../mathics_scanner/data/operators.json mathics/data/operators.json
         cd ..
-        MATHICS_CHARACTER_ENCODING="ASCII" make check-mathics
+    - name: Run Mathics3 tests
+      run: |
+        remake -x check-mathics
diff --git a/Makefile b/Makefile
index 3de0759..3af4999 100644
--- a/Makefile
+++ b/Makefile
@@ -78,7 +78,7 @@ inputrc-unicode:
 	$(PYTHON) -m mathics_scanner.generate.rl_inputrc inputrc-unicode
 
 #: Run Mathics core checks
-check-mathics::
+check-mathics:
 	MATHICS_CHARACTER_ENCODING="ASCII" $(PYTHON) -m mathics.docpipeline $o
 	pytest test/test_mathics_precedence.py
 
diff --git a/mathics_scanner/tokeniser.py b/mathics_scanner/tokeniser.py
index 04a8fee..ec2a871 100644
--- a/mathics_scanner/tokeniser.py
+++ b/mathics_scanner/tokeniser.py
@@ -959,7 +959,7 @@ def t_String(self, _: Optional[re.Match]) -> Token:
                         # If there is boxing construct matched, we
                         # preserve what was given, but do not tokenize
                         # the construct. "\(" remains "\(" and is not
-                        # turned into IntepretBox".
+                        # turned into InterpretBox".
                         result += "\\" + escaped_char
                         self.pos += 1
                     else:

From f2430d8b96a84278454015126b1d5c87fc523a25 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Fri, 20 Feb 2026 08:22:01 -0500
Subject: [PATCH 25/26] Correct YAML encoding for \` and ...

we need to track wither we are in a *String*, not whether we are in a box.
---
 mathics_scanner/data/boxing-characters.yml | 2 +-
 mathics_scanner/escape_sequences.py        | 4 ++--
 mathics_scanner/tokeniser.py               | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/mathics_scanner/data/boxing-characters.yml b/mathics_scanner/data/boxing-characters.yml
index eb45ea8..5e38d35 100644
--- a/mathics_scanner/data/boxing-characters.yml
+++ b/mathics_scanner/data/boxing-characters.yml
@@ -68,7 +68,7 @@ LinearSyntaxCloseParen:
 LinearSyntaxOpenParen:
   ASCII: '\('
   Operators: [RowBox]
-  Unicode: "\uf7cd"
+  Unicode: "\uf7c9"
 
 LinearSyntaxPercent:
   ASCII: '\%'
diff --git a/mathics_scanner/escape_sequences.py b/mathics_scanner/escape_sequences.py
index f448e71..41e198b 100644
--- a/mathics_scanner/escape_sequences.py
+++ b/mathics_scanner/escape_sequences.py
@@ -80,7 +80,7 @@ def parse_named_character(source_text: str, start: int, finish: int) -> Optional
 
 
 def parse_escape_sequence(
-    source_text: str, pos: int, is_inside_box: bool
+    source_text: str, pos: int, is_in_string: bool
 ) -> Tuple[str, int]:
     """Given some source text in `source_text` starting at offset
     `pos`, return the escape-sequence value for this text and the
@@ -155,7 +155,7 @@ def parse_escape_sequence(
             assert c == "r"
             result += "\r"
         pos += 1
-    elif is_inside_box and c in BOX_OPERATOR:
+    elif is_in_string and c in BOX_OPERATOR:
         if (boxed_character := BOXING_ASCII_TO_UNICODE.get("\\" + c)) is not None:
             # Replace \ in result with Unicode representing the two ASCII characters.
             result = result[:-1] + boxed_character
diff --git a/mathics_scanner/tokeniser.py b/mathics_scanner/tokeniser.py
index ec2a871..a435da4 100644
--- a/mathics_scanner/tokeniser.py
+++ b/mathics_scanner/tokeniser.py
@@ -702,7 +702,7 @@ def next(self) -> Token:
 
                 try:
                     escape_str, next_pos = parse_escape_sequence(
-                        self.source_text, self.pos + 1, self.is_inside_box
+                        self.source_text, self.pos + 1, is_in_string=False
                     )
                 except (EscapeSyntaxError, NamedCharacterSyntaxError) as escape_error:
                     if self.is_inside_box:
@@ -810,7 +810,7 @@ def t_RawBackslash(self, pattern_match: Optional[re.Match]) -> Token:
 
         try:
             escape_str, self.pos = parse_escape_sequence(
-                source_text, start_pos, self.is_inside_box
+                source_text, start_pos, is_in_string=False
             )
             if source_text[start_pos] == "[" and source_text[self.pos - 1] == "]":
                 named_character = source_text[start_pos + 1 : self.pos - 1]
@@ -879,7 +879,7 @@ def t_RawBackslash(self, pattern_match: Optional[re.Match]) -> Token:
 
                 try:
                     escape_str, next_pos = parse_escape_sequence(
-                        self.source_text, self.pos + 1, self.is_inside_box
+                        self.source_text, self.pos + 1, is_in_string=False
                     )
                 except (EscapeSyntaxError, NamedCharacterSyntaxError) as escape_error:
                     if self.is_inside_box:
@@ -943,7 +943,7 @@ def t_String(self, _: Optional[re.Match]) -> Token:
                 self.pos += 1
                 try:
                     escape_str, self.pos = parse_escape_sequence(
-                        source_text, self.pos, self.is_inside_box
+                        source_text, self.pos, is_in_string=True
                     )
                 except NamedCharacterSyntaxError as escape_error:
                     self.feeder.message(

From 99b5ab61839364e08959890474b57e92fc18e300 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Fri, 20 Feb 2026 09:07:23 -0500
Subject: [PATCH 26/26] is_inside_box -> is_in_string

---
 test/test_escape_sequences.py | 6 +++---
 test/test_string_tokens.py    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/test_escape_sequences.py b/test/test_escape_sequences.py
index 5143a3a..f963108 100644
--- a/test/test_escape_sequences.py
+++ b/test/test_escape_sequences.py
@@ -45,7 +45,7 @@ def test_escape_sequences():
         (r"z \[Conjugate]", 3, 14, "\uf3c8", "Named character; at end"),
         ("[Integral]", 0, 10, "\u222b", "Another full-string named-character"),
     ):
-        assert parse_escape_sequence(text, pos, is_inside_box=False) == (
+        assert parse_escape_sequence(text, pos, is_in_string=False) == (
             expect_str,
             expect_pos,
         ), fail_msg
@@ -54,7 +54,7 @@ def test_escape_sequences():
 def test_invalid_named_character_sequences():
     for text in (r"\[", r"\[Theta", r"\[Fake]", r"\[abc]"):
         with pytest.raises(NamedCharacterSyntaxError):
-            parse_escape_sequence(text, 1, is_inside_box=False)
+            parse_escape_sequence(text, 1, is_in_string=False)
 
 
 def test_invalid_number_encoding():
@@ -78,4 +78,4 @@ def test_invalid_number_encoding():
         ":01-2",
     ):
         with pytest.raises(SyntaxError):
-            parse_escape_sequence(text, 0, is_inside_box=False)
+            parse_escape_sequence(text, 0, is_in_string=False)
diff --git a/test/test_string_tokens.py b/test/test_string_tokens.py
index 61cdc76..180c38b 100644
--- a/test/test_string_tokens.py
+++ b/test/test_string_tokens.py
@@ -96,7 +96,7 @@ def test_string():
 
     check_string(
         r'"\(a \+\)"',
-        r'"\(a \+\)"',
+        r'"a \+"',
         "Do not interpret, but preserve boxing inside a string",
     )