diff --git a/pretext/cli.py b/pretext/cli.py index a7b04723..60e2cfd7 100644 --- a/pretext/cli.py +++ b/pretext/cli.py @@ -112,6 +112,9 @@ def main(ctx: click.Context, targets: bool) -> None: Use the `--help` option on any CLI command to learn more, for example, `pretext build --help`. """ + time_logger = utils.Stopwatch("core ", True) + time_logger.log("core started") + # the targets option just lists targets in the current project if targets: if (pp := utils.project_path()) is not None: @@ -136,7 +139,9 @@ def main(ctx: click.Context, targets: bool) -> None: return # In all other cases we need to know whether we are in a directory for a project. if (pp := utils.project_path()) is not None: + time_logger.log("starting project parse") project = Project.parse(pp) + time_logger.log("project parsed") log.info(f"PreTeXt project found in `{utils.project_path()}`.") logger.add_log_file_handler(pp / "logs") diff --git a/pretext/project/__init__.py b/pretext/project/__init__.py index d9bad097..d9ea5266 100644 --- a/pretext/project/__init__.py +++ b/pretext/project/__init__.py @@ -128,6 +128,8 @@ class Target(pxml.BaseXmlModel, tag="target", search_mode=SearchMode.UNORDERED): # # A path to the root source for this target, relative to the project's `source` path. source: Path = pxml.attr(default=Path("main.ptx")) + # Cache of assembled source element. + _source_element: t.Optional[ET._Element] = None # A path to the publication file for this target, relative to the project's `publication` path. This is mostly validated by `post_validate`. publication: Path = pxml.attr(default=None) latex_engine: LatexEngine = pxml.attr( @@ -368,29 +370,9 @@ def post_validate(self) -> None: if server.name not in self_server_names: self.server.append(server) - # For the Runestone format, determine the ``, which specifies the `output_dir`. - if self.format == Format.HTML and self.platform == Platform.RUNESTONE: - # We expect `d_list == ["document-id contents here"]`. - d_list = self.source_element().xpath("/pretext/docinfo/document-id/text()") - if isinstance(d_list, list): - if len(d_list) != 1: - raise ValueError( - "Only one `document-id` tag is allowed in a PreTeXt document." - ) - # NB as of 2025-04-08, we are no longer setting the output directory automatically for - # Runestone targets. This must be managed by the project.ptx file or by a client script. - # The commented code below is how we used to do this. - - # d = d_list[0] - # assert isinstance(d, str) - # # Use the correct number of `../` to undo the project's `output-dir`, so the output from the build is located in the correct directory of `published/document-id`. - # self.output_dir = Path( - # f"{'../'*len(self._project.output_dir.parents)}published/{d}" - # ) - else: - raise ValueError( - "The `document-id` tag must be defined for the Runestone format." - ) + # NB as of 2025-04-08, we are no longer setting the output directory automatically for + # Runestone targets. This must be managed by the project.ptx file or by a client script. + # Check history here for how that was done. def source_abspath(self) -> Path: return self._project.source_abspath() / self.source @@ -408,14 +390,21 @@ def original_source_element(self) -> ET._Element: def source_element(self) -> ET._Element: """ Returns the root element for the assembled source, after processing with the "version-only" assembly. + Caches the result for future calls. """ - assembled = core.assembly_internal( - xml=self.source_abspath(), - pub_file=self.publication_abspath().as_posix(), - stringparams=self.stringparams.copy(), - method="version", - ) - return assembled.getroot() + if self._source_element is None: + log.debug( + f"Parsing source element for target {self.name}", + ) + self._source_element = core.assembly_internal( + xml=self.source_abspath(), + pub_file=self.publication_abspath().as_posix(), + stringparams=self.stringparams.copy(), + method="version", + ) + else: + log.debug(f"Using cached source_element for target {self.name}") + return self._source_element.getroot() def publication_abspath(self) -> Path: return self._project.publication_abspath() / self.publication @@ -681,13 +670,23 @@ def build( # Add cli.version to stringparams. Use only the major and minor version numbers. self.stringparams["cli.version"] = VERSION[: VERSION.rfind(".")] + time_logger = utils.Stopwatch("Target::build()", True) + time_logger.log("build started") + # Check for xml syntax errors and quit if xml invalid: - if not utils.xml_syntax_is_valid(self.source_abspath()): + try: + # Access the source_element to trigger assembly if it hasn't been done yet. + self.source_element() + except Exception as e: + log.error(f"Error assembling source file: {e}") raise RuntimeError("XML syntax for source file is invalid") if not utils.xml_syntax_is_valid(self.publication_abspath(), "publication"): raise RuntimeError("XML syntax for publication file is invalid") + time_logger.log("XML syntax validated") + # Validate xml against schema; continue with warning if invalid: - utils.xml_source_validates_against_schema(self.source_abspath()) + utils.xml_validates_against_schema(self.source_element()) + time_logger.log("source validated against schema") # Clean output upon request if clean: @@ -698,6 +697,7 @@ def build( # verify that a webwork_representations.xml file exists if it is needed; generated if needed. self.ensure_webwork_reps() + time_logger.log("webwork representations ensured") # Generate needed assets unless requested not to. if generate: @@ -754,7 +754,9 @@ def build( log.warning( "The platform host in the publication file is not set to runestone. Since the requested target has @platform='runestone', we will override the publication file's platform host." ) + time_logger.log("building HTML") utils.ensure_css_node_modules() + time_logger.log("node set") core.html( xml=self.source_abspath(), pub_file=self.publication_abspath().as_posix(), @@ -766,6 +768,7 @@ def build( dest_dir=self.output_dir_abspath().as_posix(), ext_rs_methods=utils.rs_methods, ) + time_logger.log("done building HTML") if self.platform != Platform.RUNESTONE: # On non-runestone builds, we try to create a codechat mapping for authors. try: @@ -774,6 +777,7 @@ def build( self._project.abspath(), self.output_dir_abspath().as_posix(), ) + time_logger.log("done mapping codechat") except Exception as e: log.warning( "Failed to map codechat path to xml id; codechat will not work." diff --git a/pretext/utils.py b/pretext/utils.py index 602b5faf..d55a6b3b 100644 --- a/pretext/utils.py +++ b/pretext/utils.py @@ -12,6 +12,7 @@ import socketserver import socket import subprocess +import time as time import logging import logging.handlers import psutil @@ -203,17 +204,13 @@ def xml_syntax_is_valid(xmlfile: Path, root_tag: str = "pretext") -> bool: return True -def xml_source_validates_against_schema(xmlfile: Path) -> bool: +def xml_validates_against_schema(etree: ET) -> bool: # get path to RelaxNG schema file: schemarngfile = resources.resource_base_path() / "core" / "schema" / "pretext.rng" # Open schemafile for validation: relaxng = ET.RelaxNG(file=schemarngfile) - # Parse xml file: - source_xml = ET.parse(xmlfile) - source_xml.xinclude() - # just for testing # ---------------- # relaxng.validate(source_xml) @@ -222,7 +219,7 @@ def xml_source_validates_against_schema(xmlfile: Path) -> bool: # validate against schema try: - relaxng.assertValid(source_xml) + relaxng.assertValid(etree) log.info("PreTeXt source passed schema validation.") except ET.DocumentInvalid as err: log.debug( @@ -1104,3 +1101,30 @@ def rs_methods( f"PTX-BUG: Format {format} not recognized for running ext_rs_methods. Something is wrong with the pretext script." ) return None + + +class Stopwatch: + """A simple stopwatch class for measuring elapsed time.""" + + """print_log set to false disables logging of elapsed time """ + + def __init__(self, name: str = "", print_log: bool = True): + self.name = name + self.print_log = print_log + self.start_time = time.time() + self.last_log_time = self.start_time + + def reset(self): + """Reset the log timer to the current time.""" + self.last_log_time = time.time() + + def log(self, timepoint_description: str = ""): + """Print a log message with the elapsed time since the last log event.""" + if self.print_log: + cur_time = time.time() + elapsed_time = cur_time - self.start_time + since_last_log_time = cur_time - self.last_log_time + self.reset() + log.debug( + f"** Timing report from {self.name}: {timepoint_description}, {since_last_log_time:.2f}s since last watch reset. {elapsed_time:.2f}s total elapsed time." + )