diff --git a/pretext/project/__init__.py b/pretext/project/__init__.py index d9bad097..73838c40 100644 --- a/pretext/project/__init__.py +++ b/pretext/project/__init__.py @@ -128,6 +128,8 @@ class Target(pxml.BaseXmlModel, tag="target", search_mode=SearchMode.UNORDERED): # # A path to the root source for this target, relative to the project's `source` path. source: Path = pxml.attr(default=Path("main.ptx")) + # Cache of assembled source element. + _source_element: t.Optional[ET._Element] = None # A path to the publication file for this target, relative to the project's `publication` path. This is mostly validated by `post_validate`. publication: Path = pxml.attr(default=None) latex_engine: LatexEngine = pxml.attr( @@ -368,29 +370,9 @@ def post_validate(self) -> None: if server.name not in self_server_names: self.server.append(server) - # For the Runestone format, determine the ``, which specifies the `output_dir`. - if self.format == Format.HTML and self.platform == Platform.RUNESTONE: - # We expect `d_list == ["document-id contents here"]`. - d_list = self.source_element().xpath("/pretext/docinfo/document-id/text()") - if isinstance(d_list, list): - if len(d_list) != 1: - raise ValueError( - "Only one `document-id` tag is allowed in a PreTeXt document." - ) - # NB as of 2025-04-08, we are no longer setting the output directory automatically for - # Runestone targets. This must be managed by the project.ptx file or by a client script. - # The commented code below is how we used to do this. - - # d = d_list[0] - # assert isinstance(d, str) - # # Use the correct number of `../` to undo the project's `output-dir`, so the output from the build is located in the correct directory of `published/document-id`. - # self.output_dir = Path( - # f"{'../'*len(self._project.output_dir.parents)}published/{d}" - # ) - else: - raise ValueError( - "The `document-id` tag must be defined for the Runestone format." - ) + # NB as of 2025-04-08, we are no longer setting the output directory automatically for + # Runestone targets. This must be managed by the project.ptx file or by a client script. + # Check history here for how that was done. def source_abspath(self) -> Path: return self._project.source_abspath() / self.source @@ -408,14 +390,21 @@ def original_source_element(self) -> ET._Element: def source_element(self) -> ET._Element: """ Returns the root element for the assembled source, after processing with the "version-only" assembly. + Caches the result for future calls. """ - assembled = core.assembly_internal( - xml=self.source_abspath(), - pub_file=self.publication_abspath().as_posix(), - stringparams=self.stringparams.copy(), - method="version", - ) - return assembled.getroot() + if self._source_element is None: + log.debug( + f"Parsing source element for target {self.name}", + ) + self._source_element = core.assembly_internal( + xml=self.source_abspath(), + pub_file=self.publication_abspath().as_posix(), + stringparams=self.stringparams.copy(), + method="version", + ).getroot() + else: + log.debug(f"Using cached source_element for target {self.name}") + return self._source_element def publication_abspath(self) -> Path: return self._project.publication_abspath() / self.publication @@ -682,12 +671,16 @@ def build( self.stringparams["cli.version"] = VERSION[: VERSION.rfind(".")] # Check for xml syntax errors and quit if xml invalid: - if not utils.xml_syntax_is_valid(self.source_abspath()): + try: + # Access the source_element to trigger assembly if it hasn't been done yet. + self.source_element() + except Exception as e: + log.error(f"Error assembling source file: {e}") raise RuntimeError("XML syntax for source file is invalid") if not utils.xml_syntax_is_valid(self.publication_abspath(), "publication"): raise RuntimeError("XML syntax for publication file is invalid") # Validate xml against schema; continue with warning if invalid: - utils.xml_source_validates_against_schema(self.source_abspath()) + utils.xml_validates_against_schema(self.source_element()) # Clean output upon request if clean: diff --git a/pretext/utils.py b/pretext/utils.py index 602b5faf..86b2b6e0 100644 --- a/pretext/utils.py +++ b/pretext/utils.py @@ -12,6 +12,7 @@ import socketserver import socket import subprocess +import time as time import logging import logging.handlers import psutil @@ -203,17 +204,13 @@ def xml_syntax_is_valid(xmlfile: Path, root_tag: str = "pretext") -> bool: return True -def xml_source_validates_against_schema(xmlfile: Path) -> bool: +def xml_validates_against_schema(etree: _Element) -> bool: # get path to RelaxNG schema file: schemarngfile = resources.resource_base_path() / "core" / "schema" / "pretext.rng" # Open schemafile for validation: relaxng = ET.RelaxNG(file=schemarngfile) - # Parse xml file: - source_xml = ET.parse(xmlfile) - source_xml.xinclude() - # just for testing # ---------------- # relaxng.validate(source_xml) @@ -222,7 +219,7 @@ def xml_source_validates_against_schema(xmlfile: Path) -> bool: # validate against schema try: - relaxng.assertValid(source_xml) + relaxng.assertValid(etree) log.info("PreTeXt source passed schema validation.") except ET.DocumentInvalid as err: log.debug( @@ -1104,3 +1101,30 @@ def rs_methods( f"PTX-BUG: Format {format} not recognized for running ext_rs_methods. Something is wrong with the pretext script." ) return None + + +class Stopwatch: + """A simple stopwatch class for measuring elapsed time.""" + + """print_log set to false disables logging of elapsed time """ + + def __init__(self, name: str = "", print_log: bool = True): + self.name = name + self.print_log = print_log + self.start_time = time.time() + self.last_log_time = self.start_time + + def reset(self) -> None: + """Reset the log timer to the current time.""" + self.last_log_time = time.time() + + def log(self, timepoint_description: str = "") -> None: + """Print a log message with the elapsed time since the last log event.""" + if self.print_log: + cur_time = time.time() + elapsed_time = cur_time - self.start_time + since_last_log_time = cur_time - self.last_log_time + self.reset() + log.debug( + f"** Timing report from {self.name}: {timepoint_description}, {since_last_log_time:.2f}s since last watch reset. {elapsed_time:.2f}s total elapsed time." + )