From e73f0fd164f052525076aa3dda4263077ec34602 Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Wed, 9 Jul 2025 13:53:06 +0100 Subject: [PATCH] Fix extra commas in HTML cells --- build.sh | 2 +- content-hash.txt | 2 +- content/tskit.ipynb | 340 +--------------------------------------- fix_jupyterlite_html.py | 211 +++++++++++++++++++++++++ 4 files changed, 215 insertions(+), 340 deletions(-) create mode 100644 fix_jupyterlite_html.py diff --git a/build.sh b/build.sh index 19039f7..4edda9d 100755 --- a/build.sh +++ b/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -e - +python fix_jupyterlite_html.py content cd tskit-launcher/ jlpm install jlpm run build diff --git a/content-hash.txt b/content-hash.txt index af71b3c..b7401f3 100644 --- a/content-hash.txt +++ b/content-hash.txt @@ -1 +1 @@ -05cfda449cc14ff90cee5a0e1f904008f159ce3fd094eb319c55c4fce01076f6 +b93af6127eb04d9bf604e2432d529f1ea6b25ee943b25da5137b91426f6c00b0 diff --git a/content/tskit.ipynb b/content/tskit.ipynb index 3a89440..12b7625 100644 --- a/content/tskit.ipynb +++ b/content/tskit.ipynb @@ -28,343 +28,7 @@ "outputs": [ { "data": { - "text/html": [ - "\n", - "
\n", - " \n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " Tree Sequence \n", - "
Trees861
Sequence Length1 000
Time Unitsgenerations
Sample Nodes200
Total Size427.9 KiB
MetadataNo Metadata
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TableRowsSizeHas Metadata
Edges6 910215.9 KiB\n", - " \n", - "
Individuals1002.8 KiB\n", - " \n", - "
Migrations08 Bytes\n", - " \n", - "
Mutations2 30883.4 KiB\n", - " \n", - "
Nodes1 76548.3 KiB\n", - " \n", - "
Populations1224 Bytes\n", - " ✅\n", - "
Provenances21.7 KiB\n", - " \n", - "
Sites88721.7 KiB\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Provenance TimestampSoftware NameVersionCommandFull record
08 July, 2025 at 10:31:57 AMmsprime1.3.4sim_mutations\n", - "
\n", - " Details\n", - " \n", - "
\n", - " \n", - "
\n", - " dict\n", - " schema_version: 1.0.0
\n", - "
\n", - " software:\n", - "
\n", - " dict\n", - " name: msprime
version: 1.3.4
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " parameters:\n", - "
\n", - " dict\n", - " command: sim_mutations
\n", - "
\n", - " tree_sequence:\n", - "
\n", - " dict\n", - " __constant__: __current_ts__
\n", - "
\n", - "
\n", - "
rate: 0.1
model: None
start_time: None
end_time: None
discrete_genome: None
keep: None
random_seed: 741366272
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " environment:\n", - "
\n", - " dict\n", - " \n", - "
\n", - " os:\n", - "
\n", - " dict\n", - " system: Linux
node: treebeard
release: 5.15.0-133-generic
version: #144-Ubuntu SMP Fri Feb 7
20:47:38 UTC 2025
machine: x86_64
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " python:\n", - "
\n", - " dict\n", - " implementation: CPython
version: 3.11.11
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " libraries:\n", - "
\n", - " dict\n", - " \n", - "
\n", - " kastore:\n", - "
\n", - " dict\n", - " version: 2.1.1
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " tskit:\n", - "
\n", - " dict\n", - " version: 0.6.4
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " gsl:\n", - "
\n", - " dict\n", - " version: 2.6
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
08 July, 2025 at 10:31:47 AMmsprime1.3.4sim_ancestry\n", - "
\n", - " Details\n", - " \n", - "
\n", - " \n", - "
\n", - " dict\n", - " schema_version: 1.0.0
\n", - "
\n", - " software:\n", - "
\n", - " dict\n", - " name: msprime
version: 1.3.4
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " parameters:\n", - "
\n", - " dict\n", - " command: sim_ancestry
samples: 100
demography: None
sequence_length: 1000
discrete_genome: None
recombination_rate: 0.1
gene_conversion_rate: None
gene_conversion_tract_length: None
population_size: None
ploidy: None
model: None
initial_state: None
start_time: None
end_time: None
record_migrations: None
record_full_arg: None
additional_nodes: None
coalescing_segments_only: None
num_labels: None
random_seed: 1166325161
replicate_index: 0
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " environment:\n", - "
\n", - " dict\n", - " \n", - "
\n", - " os:\n", - "
\n", - " dict\n", - " system: Linux
node: treebeard
release: 5.15.0-133-generic
version: #144-Ubuntu SMP Fri Feb 7
20:47:38 UTC 2025
machine: x86_64
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " python:\n", - "
\n", - " dict\n", - " implementation: CPython
version: 3.11.11
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " libraries:\n", - "
\n", - " dict\n", - " \n", - "
\n", - " kastore:\n", - "
\n", - " dict\n", - " version: 2.1.1
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " tskit:\n", - "
\n", - " dict\n", - " version: 0.6.4
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " gsl:\n", - "
\n", - " dict\n", - " version: 2.6
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " " - ], + "text/html": "\n
\n \n
\n
\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
\n \n Tree Sequence \n
Trees861
Sequence Length1 000
Time Unitsgenerations
Sample Nodes200
Total Size427.9 KiB
MetadataNo Metadata
\n
\n
\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
TableRowsSizeHas Metadata
Edges6 910215.9 KiB\n \n
Individuals1002.8 KiB\n \n
Migrations08 Bytes\n \n
Mutations2 30883.4 KiB\n \n
Nodes1 76548.3 KiB\n \n
Populations1224 Bytes\n ✅\n
Provenances21.7 KiB\n \n
Sites88721.7 KiB\n \n
\n
\n
\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Provenance TimestampSoftware NameVersionCommandFull record
08 July, 2025 at 10:31:57 AMmsprime1.3.4sim_mutations\n
\n Details\n \n
\n \n
\n dict\n schema_version: 1.0.0
\n
\n software:\n
\n dict\n name: msprime
version: 1.3.4
\n
\n
\n
\n
\n parameters:\n
\n dict\n command: sim_mutations
\n
\n tree_sequence:\n
\n dict\n __constant__: __current_ts__
\n
\n
\n
rate: 0.1
model: None
start_time: None
end_time: None
discrete_genome: None
keep: None
random_seed: 741366272
\n
\n
\n
\n
\n environment:\n
\n dict\n \n
\n os:\n
\n dict\n system: Linux
node: treebeard
release: 5.15.0-133-generic
version: #144-Ubuntu SMP Fri Feb 7
20:47:38 UTC 2025
machine: x86_64
\n
\n
\n
\n
\n python:\n
\n dict\n implementation: CPython
version: 3.11.11
\n
\n
\n
\n
\n libraries:\n
\n dict\n \n
\n kastore:\n
\n dict\n version: 2.1.1
\n
\n
\n
\n
\n tskit:\n
\n dict\n version: 0.6.4
\n
\n
\n
\n
\n gsl:\n
\n dict\n version: 2.6
\n
\n
\n
\n
\n
\n
\n
\n
\n
\n
\n
\n \n
\n
08 July, 2025 at 10:31:47 AMmsprime1.3.4sim_ancestry\n
\n Details\n \n
\n \n
\n dict\n schema_version: 1.0.0
\n
\n software:\n
\n dict\n name: msprime
version: 1.3.4
\n
\n
\n
\n
\n parameters:\n
\n dict\n command: sim_ancestry
samples: 100
demography: None
sequence_length: 1000
discrete_genome: None
recombination_rate: 0.1
gene_conversion_rate: None
gene_conversion_tract_length: None
population_size: None
ploidy: None
model: None
initial_state: None
start_time: None
end_time: None
record_migrations: None
record_full_arg: None
additional_nodes: None
coalescing_segments_only: None
num_labels: None
random_seed: 1166325161
replicate_index: 0
\n
\n
\n
\n
\n environment:\n
\n dict\n \n
\n os:\n
\n dict\n system: Linux
node: treebeard
release: 5.15.0-133-generic
version: #144-Ubuntu SMP Fri Feb 7
20:47:38 UTC 2025
machine: x86_64
\n
\n
\n
\n
\n python:\n
\n dict\n implementation: CPython
version: 3.11.11
\n
\n
\n
\n
\n libraries:\n
\n dict\n \n
\n kastore:\n
\n dict\n version: 2.1.1
\n
\n
\n
\n
\n tskit:\n
\n dict\n version: 0.6.4
\n
\n
\n
\n
\n gsl:\n
\n dict\n version: 2.6
\n
\n
\n
\n
\n
\n
\n
\n
\n
\n
\n
\n \n
\n
\n
\n
\n
\n ", "text/plain": [ "" ] @@ -408,4 +72,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/fix_jupyterlite_html.py b/fix_jupyterlite_html.py new file mode 100644 index 0000000..ef1b0b5 --- /dev/null +++ b/fix_jupyterlite_html.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +""" +Fix JupyterLite HTML comma issue by consolidating multi-line HTML outputs to single lines. + +This script processes .ipynb files and fixes the issue where JupyterLite adds commas +to the beginning of each line in HTML output. It does this by converting multi-line +HTML strings in cell outputs to single-line strings. + +Usage: + python fix_jupyterlite_html.py [notebook_file_or_directory] + +If no argument is provided, it will process all .ipynb files in the current directory. +""" + +import json +import os +import sys +import argparse +from pathlib import Path +from typing import List, Dict, Any + + +def fix_html_output(output: Dict[str, Any]) -> bool: + """ + Fix HTML output by consolidating multi-line strings to single lines. + + Args: + output: A notebook cell output dictionary + + Returns: + bool: True if the output was modified, False otherwise + """ + modified = False + + # Check if this is an HTML output + if output.get("output_type") == "execute_result" or output.get("output_type") == "display_data": + data = output.get("data", {}) + + # Fix text/html output + if "text/html" in data: + html_content = data["text/html"] + + # If it's a list of strings (multi-line), join them + if isinstance(html_content, list): + # Join all lines into a single string + single_line_html = "".join(html_content) + data["text/html"] = single_line_html + modified = True + print(f" Fixed HTML output: converted {len(html_content)} lines to single line") + + # If it's a single string with newlines, we could optionally process it too + # but the main issue is with the list format + + return modified + + +def fix_notebook(notebook_path: Path) -> bool: + """ + Fix a single notebook file. + + Args: + notebook_path: Path to the notebook file + + Returns: + bool: True if the notebook was modified, False otherwise + """ + print(f"Processing: {notebook_path}") + + try: + # Read the notebook + with open(notebook_path, 'r', encoding='utf-8') as f: + notebook = json.load(f) + + modified = False + + # Process each cell + for cell_idx, cell in enumerate(notebook.get("cells", [])): + # Check cell outputs + for output_idx, output in enumerate(cell.get("outputs", [])): + if fix_html_output(output): + modified = True + + # Write back if modified + if modified: + with open(notebook_path, 'w', encoding='utf-8') as f: + json.dump(notebook, f, indent=1, ensure_ascii=False) + print(f" ✓ Fixed and saved: {notebook_path}") + return True + else: + print(f" No changes needed: {notebook_path}") + return False + + except Exception as e: + print(f" ✗ Error processing {notebook_path}: {e}") + return False + + +def find_notebooks(path: Path) -> List[Path]: + """ + Find all .ipynb files in a directory or return the single file. + + Args: + path: Path to search (file or directory) + + Returns: + List of notebook file paths + """ + if path.is_file(): + if path.suffix == '.ipynb': + return [path] + else: + print(f"Warning: {path} is not a .ipynb file") + return [] + + elif path.is_dir(): + notebooks = list(path.rglob('*.ipynb')) + # Filter out checkpoint files + notebooks = [nb for nb in notebooks if '.ipynb_checkpoints' not in str(nb)] + return notebooks + + else: + print(f"Error: {path} does not exist") + return [] + + +def main(): + parser = argparse.ArgumentParser( + description="Fix JupyterLite HTML comma issue in notebook files", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python fix_jupyterlite_html.py # Fix all .ipynb files in current directory + python fix_jupyterlite_html.py notebook.ipynb # Fix a specific notebook + python fix_jupyterlite_html.py content/ # Fix all notebooks in content/ directory + python fix_jupyterlite_html.py --dry-run # Show what would be changed without modifying files + """ + ) + + parser.add_argument( + 'path', + nargs='?', + default='.', + help='Path to notebook file or directory (default: current directory)' + ) + + parser.add_argument( + '--dry-run', + action='store_true', + help='Show what would be changed without modifying files' + ) + + args = parser.parse_args() + + # Convert to Path object + search_path = Path(args.path) + + # Find all notebooks + notebooks = find_notebooks(search_path) + + if not notebooks: + print("No notebook files found.") + return 1 + + print(f"Found {len(notebooks)} notebook(s) to process:") + for nb in notebooks: + print(f" - {nb}") + print() + + if args.dry_run: + print("DRY RUN MODE - No files will be modified") + print() + + # Process each notebook + total_modified = 0 + for notebook_path in notebooks: + if not args.dry_run: + if fix_notebook(notebook_path): + total_modified += 1 + else: + # For dry run, just check if it would be modified + try: + with open(notebook_path, 'r', encoding='utf-8') as f: + notebook = json.load(f) + + would_modify = False + for cell in notebook.get("cells", []): + for output in cell.get("outputs", []): + if output.get("output_type") in ["execute_result", "display_data"]: + data = output.get("data", {}) + if "text/html" in data and isinstance(data["text/html"], list): + would_modify = True + break + if would_modify: + break + + if would_modify: + print(f" Would fix: {notebook_path}") + total_modified += 1 + else: + print(f" No changes needed: {notebook_path}") + + except Exception as e: + print(f" Error checking {notebook_path}: {e}") + + print(f"\nSummary: {total_modified} notebook(s) {'would be' if args.dry_run else 'were'} modified") + + return 0 + + +if __name__ == "__main__": + sys.exit(main())