From 1a8e33797071017c082a8256e23759cafe72f97a Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Wed, 18 Feb 2026 14:51:12 -0500 Subject: [PATCH 1/7] Extend paratext project settings class to support daughter translations --- machine/corpora/paratext_project_settings.py | 16 +++++++++++ .../paratext_project_settings_parser_base.py | 28 ++++++++++++++++--- ...t_file_paratext_project_settings_parser.py | 8 ++++++ .../corpora/test_paratext_project_settings.py | 2 ++ ...st_zip_paratext_project_settings_parser.py | 8 ++++++ tests/testutils/data/usfm/Tes/Settings.xml | 2 +- .../memory_paratext_project_file_handler.py | 8 ++++++ 7 files changed, 67 insertions(+), 5 deletions(-) diff --git a/machine/corpora/paratext_project_settings.py b/machine/corpora/paratext_project_settings.py index 5f747257..22e0ec13 100644 --- a/machine/corpora/paratext_project_settings.py +++ b/machine/corpora/paratext_project_settings.py @@ -8,6 +8,7 @@ @dataclass class ParatextProjectSettings: + guid: str name: str full_name: str encoding: str @@ -20,6 +21,9 @@ class ParatextProjectSettings: biblical_terms_project_name: str biblical_terms_file_name: str language_code: Optional[str] + translation_type: str + parent_guid: Optional[str] = None + parent_name: Optional[str] = None def get_book_id(self, file_name: str) -> Optional[str]: """Returns None when the file name doesn't match the pattern of a book file name for the project.""" @@ -57,6 +61,18 @@ def get_all_scripture_book_ids(self) -> Iterable[str]: for book_id in get_scripture_books(): yield book_id + def has_parent(self) -> bool: + return self.parent_guid is not None + + def is_daughter_project_of(self, other_project: "ParatextProjectSettings") -> bool: + if not self.has_parent(): + return False + return self.parent_guid == other_project.guid + + def set_parent_project(self, other_project: "ParatextProjectSettings"): + # TODO anything else we should set? From what I can tell, stylesheets are not inherited + self.versification = other_project.versification + def _get_book_file_name_digits(book_id: str) -> str: book_num = book_id_to_number(book_id) diff --git a/machine/corpora/paratext_project_settings_parser_base.py b/machine/corpora/paratext_project_settings_parser_base.py index 61f2e66b..49c06de1 100644 --- a/machine/corpora/paratext_project_settings_parser_base.py +++ b/machine/corpora/paratext_project_settings_parser_base.py @@ -1,4 +1,5 @@ from abc import ABC +from typing import Optional from xml.etree import ElementTree from ..scripture.verse_ref import Versification @@ -9,8 +10,13 @@ class ParatextProjectSettingsParserBase(ABC): - def __init__(self, paratext_project_file_handler: ParatextProjectFileHandler): + def __init__( + self, + paratext_project_file_handler: ParatextProjectFileHandler, + parent_paratext_project_file_handler: Optional[ParatextProjectFileHandler] = None, + ): self._paratext_project_file_handler = paratext_project_file_handler + self._parent_paratext_project_file_handler = parent_paratext_project_file_handler def parse(self) -> ParatextProjectSettings: settings_file_name = "Settings.xml" @@ -21,6 +27,7 @@ def parse(self) -> ParatextProjectSettings: with self._paratext_project_file_handler.open(settings_file_name) as stream: settings_tree = ElementTree.parse(stream) + guid = settings_tree.getroot().findtext("Guid", "") name = settings_tree.getroot().findtext("Name", "") full_name = settings_tree.getroot().findtext("FullName", "") encoding_str = settings_tree.getroot().findtext("Encoding", "65001") @@ -36,7 +43,6 @@ def parse(self) -> ParatextProjectSettings: versification_type = int(settings_tree.getroot().findtext("Versification", "4")) versification = Versification.get_builtin(versification_type) if self._paratext_project_file_handler.exists("custom.vrs"): - guid = settings_tree.getroot().findtext("Guid", "") versification_name = f"{versification.name}-{guid}" versification = Versification.load( self._paratext_project_file_handler.open("custom.vrs"), @@ -77,12 +83,23 @@ def parse(self) -> ParatextProjectSettings: ) language_code = None language_iso_code_setting = settings_tree.getroot().findtext("LanguageIsoCode", "") - if language_iso_code_setting: - language_iso_code_setting_parts = settings_tree.getroot().findtext("LanguageIsoCode", "").split(":") + if language_iso_code_setting is not None: + language_iso_code_setting_parts = language_iso_code_setting.split(":") if language_iso_code_setting_parts: language_code = language_iso_code_setting_parts[0] + translation_info_setting = settings_tree.getroot().findtext("TranslationInfo") + translation_type = "Standard" + parent_name = None + parent_guid = None + if translation_info_setting is not None: + translation_info_setting_parts = translation_info_setting.split(":") + translation_type = translation_info_setting_parts[0] + parent_name = translation_info_setting_parts[1] if translation_info_setting_parts[1] != "" else None + parent_guid = translation_info_setting_parts[2] if translation_info_setting_parts[2] != "" else None + return ParatextProjectSettings( + guid, name, full_name, encoding, @@ -95,4 +112,7 @@ def parse(self) -> ParatextProjectSettings: parts[1], parts[2], language_code, + translation_type, + parent_guid, + parent_name, ) diff --git a/tests/corpora/test_file_paratext_project_settings_parser.py b/tests/corpora/test_file_paratext_project_settings_parser.py index 56bb1fdc..c3299014 100644 --- a/tests/corpora/test_file_paratext_project_settings_parser.py +++ b/tests/corpora/test_file_paratext_project_settings_parser.py @@ -9,3 +9,11 @@ def test_parse_custom_stylesheet() -> None: test_tag = settings.stylesheet.get_tag("test") assert test_tag.style_type is UsfmStyleType.CHARACTER assert test_tag.text_type is UsfmTextType.OTHER + + +def test_is_daughter_project() -> None: + parser = FileParatextProjectSettingsParser(USFM_TEST_PROJECT_PATH) + settings = parser.parse() + assert settings.has_parent() + assert settings.is_daughter_project_of(settings) + assert settings.translation_type == "Standard" diff --git a/tests/corpora/test_paratext_project_settings.py b/tests/corpora/test_paratext_project_settings.py index 126ae654..2cce0159 100644 --- a/tests/corpora/test_paratext_project_settings.py +++ b/tests/corpora/test_paratext_project_settings.py @@ -115,6 +115,7 @@ def test_get_book_id_wrong_book_part_book_num_book_id() -> None: def _create_settings(file_name_form: str) -> ParatextProjectSettings: return ParatextProjectSettings( + "id", "Name", "Name", "utf-8", @@ -127,4 +128,5 @@ def _create_settings(file_name_form: str) -> ParatextProjectSettings: "", "BiblicalTerms.xml", "en", + "Standard", ) diff --git a/tests/corpora/test_zip_paratext_project_settings_parser.py b/tests/corpora/test_zip_paratext_project_settings_parser.py index abf300f0..bb5aed87 100644 --- a/tests/corpora/test_zip_paratext_project_settings_parser.py +++ b/tests/corpora/test_zip_paratext_project_settings_parser.py @@ -18,6 +18,14 @@ def test_parse_custom_stylesheet() -> None: assert test_tag.text_type is UsfmTextType.OTHER +def test_is_daughter_project() -> None: + with _TestEnvironment() as env: + settings = env.parser.parse() + assert settings.has_parent() + assert settings.is_daughter_project_of(settings) + assert settings.translation_type == "Standard" + + class _TestEnvironment(ContextManager["_TestEnvironment"]): def __init__(self) -> None: self._temp_dir = TemporaryDirectory() diff --git a/tests/testutils/data/usfm/Tes/Settings.xml b/tests/testutils/data/usfm/Tes/Settings.xml index d76bcbc5..0b683c63 100644 --- a/tests/testutils/data/usfm/Tes/Settings.xml +++ b/tests/testutils/data/usfm/Tes/Settings.xml @@ -24,7 +24,7 @@ F F Public - Standard:: + Standard:Tes:a7e0b3ce0200736062f9f810a444dbfbe64aca35 3 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 diff --git a/tests/testutils/memory_paratext_project_file_handler.py b/tests/testutils/memory_paratext_project_file_handler.py index a764cd69..22776ac0 100644 --- a/tests/testutils/memory_paratext_project_file_handler.py +++ b/tests/testutils/memory_paratext_project_file_handler.py @@ -26,6 +26,7 @@ def create_stylesheet(self, file_name): class DefaultParatextProjectSettings(ParatextProjectSettings): def __init__( self, + guid: str = "id", name: str = "Test", full_name: str = "TestProject", encoding: Optional[str] = None, @@ -38,9 +39,13 @@ def __init__( biblical_terms_project_name: str = "Test", biblical_terms_file_name: str = "ProjectBiblicalTerms.xml", language_code: str = "en", + translation_type: str = "Standard", + parent_guid: Optional[str] = None, + parent_name: Optional[str] = None, ): super().__init__( + guid, name, full_name, encoding if encoding is not None else "utf-8", @@ -53,4 +58,7 @@ def __init__( biblical_terms_project_name, biblical_terms_file_name, language_code, + translation_type, + parent_guid, + parent_name, ) From 2580c8fb72a5c1dc25cbb6e09ec7b0dc5771aaa1 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Thu, 19 Feb 2026 16:37:33 -0500 Subject: [PATCH 2/7] Support daughter projects; port invalid verse/number checking --- .../file_paratext_project_settings_parser.py | 7 +- .../file_paratext_project_text_updater.py | 8 +- ...xt_project_versification_error_detector.py | 8 +- .../corpora/paratext_backup_text_corpus.py | 19 ++++- machine/corpora/paratext_project_settings.py | 9 ++- .../paratext_project_settings_parser_base.py | 15 +++- machine/corpora/paratext_text_corpus.py | 18 ++++- .../usfm_versification_error_detector.py | 79 +++++++++++++++---- .../zip_paratext_project_settings_parser.py | 6 +- .../zip_paratext_project_text_updater.py | 8 +- ...paratext_project_versification_detector.py | 8 +- ...atext_project_quote_convention_detector.py | 8 +- ...atext_project_quote_convention_detector.py | 8 +- machine/scripture/verse_ref.py | 2 +- ...t_file_paratext_project_settings_parser.py | 10 ++- .../test_usfm_versification_error_detector.py | 66 ++++++++++++---- ...st_zip_paratext_project_settings_parser.py | 25 +++--- 17 files changed, 234 insertions(+), 70 deletions(-) diff --git a/machine/corpora/file_paratext_project_settings_parser.py b/machine/corpora/file_paratext_project_settings_parser.py index 13e62f28..afa3169b 100644 --- a/machine/corpora/file_paratext_project_settings_parser.py +++ b/machine/corpora/file_paratext_project_settings_parser.py @@ -1,8 +1,11 @@ +from typing import Optional + from ..utils.typeshed import StrPath from .file_paratext_project_file_handler import FileParatextProjectFileHandler +from .paratext_project_settings import ParatextProjectSettings from .paratext_project_settings_parser_base import ParatextProjectSettingsParserBase class FileParatextProjectSettingsParser(ParatextProjectSettingsParserBase): - def __init__(self, project_dir: StrPath) -> None: - super().__init__(FileParatextProjectFileHandler(project_dir)) + def __init__(self, project_dir: StrPath, parent_settings: Optional[ParatextProjectSettings] = None) -> None: + super().__init__(FileParatextProjectFileHandler(project_dir), parent_settings) diff --git a/machine/corpora/file_paratext_project_text_updater.py b/machine/corpora/file_paratext_project_text_updater.py index e09896aa..3840bdbc 100644 --- a/machine/corpora/file_paratext_project_text_updater.py +++ b/machine/corpora/file_paratext_project_text_updater.py @@ -1,16 +1,18 @@ from pathlib import Path -from typing import BinaryIO +from typing import BinaryIO, Optional from ..utils.typeshed import StrPath from .file_paratext_project_file_handler import FileParatextProjectFileHandler from .file_paratext_project_settings_parser import FileParatextProjectSettingsParser +from .paratext_project_settings import ParatextProjectSettings from .paratext_project_text_updater_base import ParatextProjectTextUpdaterBase class FileParatextProjectTextUpdater(ParatextProjectTextUpdaterBase): - def __init__(self, project_dir: StrPath) -> None: + def __init__(self, project_dir: StrPath, parent_settings: Optional[ParatextProjectSettings] = None) -> None: super().__init__( - FileParatextProjectFileHandler(project_dir), FileParatextProjectSettingsParser(project_dir).parse() + FileParatextProjectFileHandler(project_dir), + FileParatextProjectSettingsParser(project_dir, parent_settings).parse(), ) self._project_dir = project_dir diff --git a/machine/corpora/file_paratext_project_versification_error_detector.py b/machine/corpora/file_paratext_project_versification_error_detector.py index 5f451894..41138922 100644 --- a/machine/corpora/file_paratext_project_versification_error_detector.py +++ b/machine/corpora/file_paratext_project_versification_error_detector.py @@ -1,11 +1,15 @@ +from typing import Optional + from ..utils.typeshed import StrPath from .file_paratext_project_file_handler import FileParatextProjectFileHandler from .file_paratext_project_settings_parser import FileParatextProjectSettingsParser +from .paratext_project_settings import ParatextProjectSettings from .paratext_project_versification_error_detector_base import ParatextProjectVersificationErrorDetectorBase class FileParatextProjectVersificationErrorDetector(ParatextProjectVersificationErrorDetectorBase): - def __init__(self, project_dir: StrPath) -> None: + def __init__(self, project_dir: StrPath, parent_settings: Optional[ParatextProjectSettings] = None) -> None: super().__init__( - FileParatextProjectFileHandler(project_dir), FileParatextProjectSettingsParser(project_dir).parse() + FileParatextProjectFileHandler(project_dir), + FileParatextProjectSettingsParser(project_dir, parent_settings).parse(), ) diff --git a/machine/corpora/paratext_backup_text_corpus.py b/machine/corpora/paratext_backup_text_corpus.py index 77d70654..8015e539 100644 --- a/machine/corpora/paratext_backup_text_corpus.py +++ b/machine/corpora/paratext_backup_text_corpus.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional from zipfile import ZipFile from ..utils.typeshed import StrPath @@ -8,9 +8,22 @@ class ParatextBackupTextCorpus(ScriptureTextCorpus): - def __init__(self, filename: StrPath, include_markers: bool = False, include_all_text: bool = False) -> None: + def __init__( + self, + filename: StrPath, + include_markers: bool = False, + include_all_text: bool = False, + parent_filename: Optional[StrPath] = None, + ) -> None: + + parent_settings = None + if parent_filename is not None: + with ZipFile(parent_filename, "r") as parent_archive: + parent_parser = ZipParatextProjectSettingsParser(parent_archive) + parent_settings = parent_parser.parse() + with ZipFile(filename, "r") as archive: - parser = ZipParatextProjectSettingsParser(archive) + parser = ZipParatextProjectSettingsParser(archive, parent_settings) settings = parser.parse() versification = settings.versification diff --git a/machine/corpora/paratext_project_settings.py b/machine/corpora/paratext_project_settings.py index 22e0ec13..03e183f6 100644 --- a/machine/corpora/paratext_project_settings.py +++ b/machine/corpora/paratext_project_settings.py @@ -24,6 +24,7 @@ class ParatextProjectSettings: translation_type: str parent_guid: Optional[str] = None parent_name: Optional[str] = None + _parent_has_been_set: bool = False def get_book_id(self, file_name: str) -> Optional[str]: """Returns None when the file name doesn't match the pattern of a book file name for the project.""" @@ -61,17 +62,23 @@ def get_all_scripture_book_ids(self) -> Iterable[str]: for book_id in get_scripture_books(): yield book_id + @property def has_parent(self) -> bool: return self.parent_guid is not None + @property + def parent_has_been_set(self) -> bool: + return self._parent_has_been_set + def is_daughter_project_of(self, other_project: "ParatextProjectSettings") -> bool: - if not self.has_parent(): + if not self.has_parent: return False return self.parent_guid == other_project.guid def set_parent_project(self, other_project: "ParatextProjectSettings"): # TODO anything else we should set? From what I can tell, stylesheets are not inherited self.versification = other_project.versification + self._parent_has_been_set = True def _get_book_file_name_digits(book_id: str) -> str: diff --git a/machine/corpora/paratext_project_settings_parser_base.py b/machine/corpora/paratext_project_settings_parser_base.py index 49c06de1..975618ae 100644 --- a/machine/corpora/paratext_project_settings_parser_base.py +++ b/machine/corpora/paratext_project_settings_parser_base.py @@ -13,10 +13,10 @@ class ParatextProjectSettingsParserBase(ABC): def __init__( self, paratext_project_file_handler: ParatextProjectFileHandler, - parent_paratext_project_file_handler: Optional[ParatextProjectFileHandler] = None, + parent_paratext_project_settings: Optional[ParatextProjectSettings] = None, ): self._paratext_project_file_handler = paratext_project_file_handler - self._parent_paratext_project_file_handler = parent_paratext_project_file_handler + self.parent_paratext_project_settings = parent_paratext_project_settings def parse(self) -> ParatextProjectSettings: settings_file_name = "Settings.xml" @@ -98,7 +98,7 @@ def parse(self) -> ParatextProjectSettings: parent_name = translation_info_setting_parts[1] if translation_info_setting_parts[1] != "" else None parent_guid = translation_info_setting_parts[2] if translation_info_setting_parts[2] != "" else None - return ParatextProjectSettings( + settings = ParatextProjectSettings( guid, name, full_name, @@ -116,3 +116,12 @@ def parse(self) -> ParatextProjectSettings: parent_guid, parent_name, ) + + if self.parent_paratext_project_settings is not None and settings.has_parent: + if not settings.is_daughter_project_of(self.parent_paratext_project_settings): + raise ValueError( + f"Project {self.parent_paratext_project_settings.name} is not the parent project of project {settings.name}." + ) + settings.set_parent_project(self.parent_paratext_project_settings) + + return settings diff --git a/machine/corpora/paratext_text_corpus.py b/machine/corpora/paratext_text_corpus.py index 24c24dd3..f19503d7 100644 --- a/machine/corpora/paratext_text_corpus.py +++ b/machine/corpora/paratext_text_corpus.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import List +from typing import List, Optional from ..utils.typeshed import StrPath from .file_paratext_project_settings_parser import FileParatextProjectSettingsParser @@ -8,8 +8,20 @@ class ParatextTextCorpus(ScriptureTextCorpus): - def __init__(self, project_dir: StrPath, include_markers: bool = False, include_all_text: bool = False) -> None: - parser = FileParatextProjectSettingsParser(project_dir) + def __init__( + self, + project_dir: StrPath, + include_markers: bool = False, + include_all_text: bool = False, + parent_project_dir: Optional[StrPath] = None, + ) -> None: + + parent_settings = None + if parent_project_dir is not None: + parent_parser = FileParatextProjectSettingsParser(parent_project_dir) + parent_settings = parent_parser.parse() + + parser = FileParatextProjectSettingsParser(project_dir, parent_settings) settings = parser.parse() versification = settings.versification diff --git a/machine/corpora/usfm_versification_error_detector.py b/machine/corpora/usfm_versification_error_detector.py index c10a553f..c578436e 100644 --- a/machine/corpora/usfm_versification_error_detector.py +++ b/machine/corpora/usfm_versification_error_detector.py @@ -16,18 +16,22 @@ class UsfmVersificationErrorType(Enum): INVALID_VERSE_RANGE = auto() MISSING_VERSE_SEGMENT = auto() EXTRA_VERSE_SEGMENT = auto() + INVALID_CHAPTER_NUMBER = auto() + INVALID_VERSE_NUMBER = auto() class UsfmVersificationError: def __init__( self, - book_num: int, - expected_chapter: int, - expected_verse: int, - actual_chapter: int, - actual_verse: int, - project_name: str, + book_num: int = 0, + expected_chapter: int = 0, + expected_verse: int = 0, + actual_chapter: int = 0, + actual_verse: int = 0, + project_name: str = "", verse_ref: Optional[VerseRef] = None, + actual_value: Optional[str] = None, + usfm_versification_error_type: Optional[UsfmVersificationErrorType] = None, ): self._book_num = book_num self._expected_chapter = expected_chapter @@ -36,6 +40,9 @@ def __init__( self._actual_verse = actual_verse self._verse_ref = verse_ref self._type: UsfmVersificationErrorType + if usfm_versification_error_type is not None: + self._type = usfm_versification_error_type + self._actual_value = actual_value or "" self._project_name = project_name @property @@ -78,22 +85,24 @@ def map(valid_status: ValidStatus) -> UsfmVersificationErrorType: @property def expected_verse_ref(self) -> str: - if self._type == UsfmVersificationErrorType.EXTRA_VERSE: + if self._type in [ + UsfmVersificationErrorType.EXTRA_VERSE, + UsfmVersificationErrorType.INVALID_CHAPTER_NUMBER, + UsfmVersificationErrorType.INVALID_VERSE_NUMBER, + ]: return "" if ( default_verse_ref := VerseRef.try_from_string( f"{canon.book_number_to_id(self._book_num)} {self._expected_chapter}:{self._expected_verse}" ) - is None - ): + ) is None: return self.default_verse(self._expected_chapter, self._expected_verse) if self._type == UsfmVersificationErrorType.MISSING_VERSE_SEGMENT: if ( verse_ref_with_segment := VerseRef.try_from_string( - f"{self._book_num} {self._expected_chapter}:{self._expected_verse}a" + f"{canon.book_number_to_id(self._book_num)} {self._expected_chapter}:{self._expected_verse}a" ) - is not None - ): + ) is not None: return str(verse_ref_with_segment) if self._type == UsfmVersificationErrorType.INVALID_VERSE_RANGE and self._verse_ref is not None: sorted_all_unique_verses = sorted(set(self._verse_ref.all_verses())) @@ -103,19 +112,22 @@ def expected_verse_ref(self) -> str: return str(first_verse) elif ( corrected_verse_range_ref := VerseRef.try_from_string( - f"{canon.book_number_to_id(self._book_num)} {self._expected_chapter}:{first_verse}-{last_verse}" + f"{canon.book_number_to_id(self._book_num)} {self._expected_chapter}:{first_verse.verse_num}-{last_verse.verse_num}" ) - is not None - ): + ) is not None: return str(corrected_verse_range_ref) return str(default_verse_ref) @property def actual_verse_ref(self) -> str: + if self.type == UsfmVersificationErrorType.INVALID_CHAPTER_NUMBER: + return f"{canon.book_number_to_id(self._book_num)} {self._actual_value}" + if self.type == UsfmVersificationErrorType.INVALID_VERSE_NUMBER: + return f"{canon.book_number_to_id(self._book_num)} {self._expected_chapter}:{self._actual_value}" if self._verse_ref is not None: return str(self._verse_ref) if actual_verse_ref := VerseRef.try_from_string( - f"{self._book_num} {self._actual_chapter}:{self._actual_verse}" + f"{canon.book_number_to_id(self._book_num)} {self._actual_chapter}:{self._actual_verse}" ): return str(actual_verse_ref) return self.default_verse(self._actual_chapter, self._actual_verse) @@ -173,9 +185,28 @@ def chapter( if versification_error.check_error(): self._errors.append(versification_error) + self._current_chapter = state.verse_ref.chapter_num + self._current_verse = VerseRef() + + # See whether the chapter number is invalid + verse_ref = state.verse_ref.copy() + verse_ref.chapter = number + if verse_ref.chapter_num == -1: + self._errors.append( + UsfmVersificationError( + book_num=self._current_book, + expected_chapter=self._current_chapter, + actual_value=number, + project_name=self._project_name, + usfm_versification_error_type=UsfmVersificationErrorType.INVALID_CHAPTER_NUMBER, + ) + ) + def verse( self, state: UsfmParserState, number: str, marker: str, alt_number: Optional[str], pub_number: Optional[str] ) -> None: + verse_in_error = False + self._current_verse = state.verse_ref.copy() if self._current_book > 0 and canon.is_canonical(self._current_book) and self._current_chapter > 0: versification_error = UsfmVersificationError( self._current_book, @@ -188,3 +219,19 @@ def verse( ) if versification_error.check_error(): self._errors.append(versification_error) + verse_in_error = True + + if not verse_in_error: + # See whether the verse number is invalid + verse_ref = self._current_verse.copy() + verse_ref.verse = number + if verse_ref.verse_num == -1: + self._errors.append( + UsfmVersificationError( + book_num=self._current_book, + expected_chapter=self._current_chapter, + actual_value=number, + project_name=self._project_name, + usfm_versification_error_type=UsfmVersificationErrorType.INVALID_VERSE_NUMBER, + ) + ) diff --git a/machine/corpora/zip_paratext_project_settings_parser.py b/machine/corpora/zip_paratext_project_settings_parser.py index e9fb3080..ccca5487 100644 --- a/machine/corpora/zip_paratext_project_settings_parser.py +++ b/machine/corpora/zip_paratext_project_settings_parser.py @@ -1,9 +1,11 @@ +from typing import Optional from zipfile import ZipFile +from .paratext_project_settings import ParatextProjectSettings from .paratext_project_settings_parser_base import ParatextProjectSettingsParserBase from .zip_paratext_project_file_handler import ZipParatextProjectFileHandler class ZipParatextProjectSettingsParser(ParatextProjectSettingsParserBase): - def __init__(self, archive: ZipFile) -> None: - super().__init__(ZipParatextProjectFileHandler(archive)) + def __init__(self, archive: ZipFile, parent_settings: Optional[ParatextProjectSettings] = None) -> None: + super().__init__(ZipParatextProjectFileHandler(archive), parent_settings) diff --git a/machine/corpora/zip_paratext_project_text_updater.py b/machine/corpora/zip_paratext_project_text_updater.py index ffbdf1de..fb80e06b 100644 --- a/machine/corpora/zip_paratext_project_text_updater.py +++ b/machine/corpora/zip_paratext_project_text_updater.py @@ -1,10 +1,14 @@ +from typing import Optional from zipfile import ZipFile +from .paratext_project_settings import ParatextProjectSettings from .paratext_project_text_updater_base import ParatextProjectTextUpdaterBase from .zip_paratext_project_file_handler import ZipParatextProjectFileHandler from .zip_paratext_project_settings_parser import ZipParatextProjectSettingsParser class ZipParatextProjectTextUpdater(ParatextProjectTextUpdaterBase): - def __init__(self, archive: ZipFile) -> None: - super().__init__(ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser(archive).parse()) + def __init__(self, archive: ZipFile, parent_settings: Optional[ParatextProjectSettings] = None) -> None: + super().__init__( + ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser(archive, parent_settings).parse() + ) diff --git a/machine/corpora/zip_paratext_project_versification_detector.py b/machine/corpora/zip_paratext_project_versification_detector.py index ccb287c2..bc1964a3 100644 --- a/machine/corpora/zip_paratext_project_versification_detector.py +++ b/machine/corpora/zip_paratext_project_versification_detector.py @@ -1,10 +1,14 @@ +from typing import Optional from zipfile import ZipFile +from .paratext_project_settings import ParatextProjectSettings from .paratext_project_versification_error_detector_base import ParatextProjectVersificationErrorDetectorBase from .zip_paratext_project_file_handler import ZipParatextProjectFileHandler from .zip_paratext_project_settings_parser import ZipParatextProjectSettingsParser class ZipParatextProjectVersificationErrorDetector(ParatextProjectVersificationErrorDetectorBase): - def __init__(self, archive: ZipFile): - super().__init__(ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser(archive).parse()) + def __init__(self, archive: ZipFile, parent_settings: Optional[ParatextProjectSettings] = None): + super().__init__( + ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser(archive, parent_settings).parse() + ) diff --git a/machine/punctuation_analysis/file_paratext_project_quote_convention_detector.py b/machine/punctuation_analysis/file_paratext_project_quote_convention_detector.py index e3437521..8317e1c1 100644 --- a/machine/punctuation_analysis/file_paratext_project_quote_convention_detector.py +++ b/machine/punctuation_analysis/file_paratext_project_quote_convention_detector.py @@ -1,16 +1,18 @@ from pathlib import Path -from typing import BinaryIO +from typing import BinaryIO, Optional from ..corpora.file_paratext_project_file_handler import FileParatextProjectFileHandler from ..corpora.file_paratext_project_settings_parser import FileParatextProjectSettingsParser +from ..corpora.paratext_project_settings import ParatextProjectSettings from ..utils.typeshed import StrPath from .paratext_project_quote_convention_detector import ParatextProjectQuoteConventionDetector class FileParatextProjectQuoteConventionDetector(ParatextProjectQuoteConventionDetector): - def __init__(self, project_dir: StrPath) -> None: + def __init__(self, project_dir: StrPath, parent_settings: Optional[ParatextProjectSettings] = None) -> None: super().__init__( - FileParatextProjectFileHandler(project_dir), FileParatextProjectSettingsParser(project_dir).parse() + FileParatextProjectFileHandler(project_dir), + FileParatextProjectSettingsParser(project_dir, parent_settings).parse(), ) self._project_dir = project_dir diff --git a/machine/punctuation_analysis/zip_paratext_project_quote_convention_detector.py b/machine/punctuation_analysis/zip_paratext_project_quote_convention_detector.py index 470862c1..19df6cdf 100644 --- a/machine/punctuation_analysis/zip_paratext_project_quote_convention_detector.py +++ b/machine/punctuation_analysis/zip_paratext_project_quote_convention_detector.py @@ -1,10 +1,14 @@ +from typing import Optional from zipfile import ZipFile +from ..corpora.paratext_project_settings import ParatextProjectSettings from ..corpora.zip_paratext_project_file_handler import ZipParatextProjectFileHandler from ..corpora.zip_paratext_project_settings_parser import ZipParatextProjectSettingsParser from .paratext_project_quote_convention_detector import ParatextProjectQuoteConventionDetector class ZipParatextProjectQuoteConventionDetector(ParatextProjectQuoteConventionDetector): - def __init__(self, archive: ZipFile) -> None: - super().__init__(ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser(archive).parse()) + def __init__(self, archive: ZipFile, parent_settings: Optional[ParatextProjectSettings] = None) -> None: + super().__init__( + ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser(archive, parent_settings).parse() + ) diff --git a/machine/scripture/verse_ref.py b/machine/scripture/verse_ref.py index f35ea0fe..fcd21789 100644 --- a/machine/scripture/verse_ref.py +++ b/machine/scripture/verse_ref.py @@ -238,7 +238,7 @@ def is_excluded(self) -> bool: @property def has_segments_defined(self): - return self.versification is not None and self.versification.verse_segments[self.bbbcccvvv] is not None + return self.versification is not None and self.versification.verse_segments.get(self.bbbcccvvv) is not None def get_segments(self, default_segments: Optional[Set[str]] = None) -> Optional[Set[str]]: if self.versification is None: diff --git a/tests/corpora/test_file_paratext_project_settings_parser.py b/tests/corpora/test_file_paratext_project_settings_parser.py index c3299014..991c146b 100644 --- a/tests/corpora/test_file_paratext_project_settings_parser.py +++ b/tests/corpora/test_file_paratext_project_settings_parser.py @@ -14,6 +14,14 @@ def test_parse_custom_stylesheet() -> None: def test_is_daughter_project() -> None: parser = FileParatextProjectSettingsParser(USFM_TEST_PROJECT_PATH) settings = parser.parse() - assert settings.has_parent() + assert settings.has_parent assert settings.is_daughter_project_of(settings) assert settings.translation_type == "Standard" + assert not settings.parent_has_been_set + + parser = FileParatextProjectSettingsParser(USFM_TEST_PROJECT_PATH, settings) + settings = parser.parse() + assert settings.has_parent + assert settings.is_daughter_project_of(settings) + assert settings.translation_type == "Standard" + assert settings.parent_has_been_set diff --git a/tests/corpora/test_usfm_versification_error_detector.py b/tests/corpora/test_usfm_versification_error_detector.py index 1bd6efac..15cca145 100644 --- a/tests/corpora/test_usfm_versification_error_detector.py +++ b/tests/corpora/test_usfm_versification_error_detector.py @@ -10,7 +10,7 @@ from machine.scripture import ORIGINAL_VERSIFICATION, Versification -def get_usfm_versification_errors_no_errors(): +def test_get_usfm_versification_errors_no_errors(): env = _TestEnvironment( files={ "653JNTest.SFM": r"""\id 3JN @@ -36,7 +36,7 @@ def get_usfm_versification_errors_no_errors(): assert len(env.get_usfm_versification_errors()) == 0 -def get_usfm_versification_errors_missing_verse(): +def test_get_usfm_versification_errors_missing_verse(): env = _TestEnvironment( files={ "653JNTest.SFM": r"""\id 3JN @@ -65,7 +65,7 @@ def get_usfm_versification_errors_missing_verse(): assert errors[0].actual_verse_ref == "3JN 1:14" -def get_usfm_versification_missing_chapter(): +def test_get_usfm_versification_missing_chapter(): env = _TestEnvironment( files={ "653JNTest.SFM": r"""\id 3JN @@ -79,7 +79,7 @@ def get_usfm_versification_missing_chapter(): assert errors[0].actual_verse_ref == "3JN 0:0" -def get_usfm_versification_errors_extra_verse(): +def test_get_usfm_versification_errors_extra_verse(): env = _TestEnvironment( files={ "653JNTest.SFM": r"""\id 3JN @@ -110,7 +110,7 @@ def get_usfm_versification_errors_extra_verse(): assert errors[0].actual_verse_ref == "3JN 1:16" -def get_usfm_versification_errors_invalid_verse(): +def test_get_usfm_versification_errors_invalid_verse(): env = _TestEnvironment( files={ "653JNTest.SFM": r"""\id 3JN @@ -139,7 +139,7 @@ def get_usfm_versification_errors_invalid_verse(): assert errors[0].actual_verse_ref == "3JN 1:13-12" -def get_usfm_versification_errors_extra_verse_segment(): +def test_get_usfm_versification_errors_extra_verse_segment(): env = _TestEnvironment( files={ "653JNTest.SFM": r"""\id 3JN @@ -164,13 +164,13 @@ def get_usfm_versification_errors_extra_verse_segment(): } ) errors = env.get_usfm_versification_errors() - assert len(errors) == 1 + assert len(errors) == 2 assert errors[0].type == UsfmVersificationErrorType.EXTRA_VERSE_SEGMENT assert errors[0].expected_verse_ref == "3JN 1:14" assert errors[0].actual_verse_ref == "3JN 1:14a" -def get_usfm_versification_errors_missing_verse_segments(): +def test_get_usfm_versification_errors_missing_verse_segments(): env = _TestEnvironment( files={ "653JNTest.SFM": r"""\id 3JN @@ -191,7 +191,8 @@ def get_usfm_versification_errors_missing_verse_segments(): \v 14 \v 15 """ - } + }, + settings=DefaultParatextProjectSettings(versification=get_custom_versification(r"*3JN 1:13,a,b")), ) errors = env.get_usfm_versification_errors() assert len(errors) == 1 @@ -200,7 +201,7 @@ def get_usfm_versification_errors_missing_verse_segments(): assert errors[0].actual_verse_ref == "3JN 1:13" -def get_usfm_versification_errors_ignore_noncanonicals(): +def test_get_usfm_versification_errors_ignore_noncanonicals(): env = _TestEnvironment( files={ "98XXETest.SFM": r"""\id XXE @@ -212,7 +213,7 @@ def get_usfm_versification_errors_ignore_noncanonicals(): assert len(env.get_usfm_versification_errors()) == 0 -def get_usfm_versification_errors_excluded_in_custom_vrs(): +def test_get_usfm_versification_errors_excluded_in_custom_vrs(): env = _TestEnvironment( files={ "653JNTest.SFM": r"""\id 3JN @@ -243,7 +244,7 @@ def get_usfm_versification_errors_excluded_in_custom_vrs(): assert errors[0].actual_verse_ref == "3JN 1:13" -def get_usfm_versification_errors_multiple_books(): +def test_get_usfm_versification_errors_multiple_books(): env = _TestEnvironment( files={ "642JNTest.SFM": r"""\id 2JN @@ -288,7 +289,7 @@ def get_usfm_versification_errors_multiple_books(): assert errors[0].actual_verse_ref == "2JN 1:12" -def get_usfm_versification_errors_multiple_chapters(): +def test_get_usfm_versification_errors_multiple_chapters(): env = _TestEnvironment( files={ "642JNTest.SFM": r"""\id 2JN @@ -313,13 +314,50 @@ def get_usfm_versification_errors_multiple_chapters(): errors = env.get_usfm_versification_errors() assert len(errors) == 2 assert errors[0].type == UsfmVersificationErrorType.MISSING_VERSE - assert errors[0].type == UsfmVersificationErrorType.EXTRA_VERSE + assert errors[1].type == UsfmVersificationErrorType.EXTRA_VERSE assert errors[0].expected_verse_ref == "2JN 1:13" assert errors[0].actual_verse_ref == "2JN 1:12" assert errors[1].expected_verse_ref == "" assert errors[1].actual_verse_ref == "2JN 2:1" +def test_get_usfm_versification_errors_invalid_chapter_number(): + env = _TestEnvironment( + files={ + "653JNTest.SFM": r"""\id 3JN + \c 1. + """ + } + ) + errors = env.get_usfm_versification_errors() + assert len(errors) == 2 + assert errors[0].type == UsfmVersificationErrorType.INVALID_CHAPTER_NUMBER + assert errors[1].type == UsfmVersificationErrorType.MISSING_CHAPTER + assert errors[0].expected_verse_ref == "" + assert errors[0].actual_verse_ref == "3JN 1." + assert errors[1].expected_verse_ref == "3JN 1:15" + assert errors[1].actual_verse_ref == "3JN -1:0" + + +def test_get_usfm_versification_errors_invalid_verse_number(): + env = _TestEnvironment( + files={ + "653JNTest.SFM": r"""\id 3JN + \c 1 + \v v1 + """ + } + ) + errors = env.get_usfm_versification_errors() + assert len(errors) == 2 + assert errors[0].type == UsfmVersificationErrorType.INVALID_VERSE_NUMBER + assert errors[1].type == UsfmVersificationErrorType.MISSING_VERSE + assert errors[0].expected_verse_ref == "" + assert errors[0].actual_verse_ref == "3JN 1:v1" + assert errors[1].expected_verse_ref == "3JN 1:15" + assert errors[1].actual_verse_ref == "3JN 1:0" + + class _TestEnvironment: def __init__(self, settings: Optional[ParatextProjectSettings] = None, files: Optional[Dict[str, str]] = None): self._settings = settings diff --git a/tests/corpora/test_zip_paratext_project_settings_parser.py b/tests/corpora/test_zip_paratext_project_settings_parser.py index bb5aed87..acac06a5 100644 --- a/tests/corpora/test_zip_paratext_project_settings_parser.py +++ b/tests/corpora/test_zip_paratext_project_settings_parser.py @@ -2,12 +2,12 @@ from pathlib import Path from tempfile import TemporaryDirectory -from typing import Any, ContextManager +from typing import Any, ContextManager, Optional from zipfile import ZipFile from testutils.corpora_test_helpers import create_test_paratext_backup -from machine.corpora import UsfmStyleType, UsfmTextType, ZipParatextProjectSettingsParser +from machine.corpora import ParatextProjectSettings, UsfmStyleType, UsfmTextType, ZipParatextProjectSettingsParser def test_parse_custom_stylesheet() -> None: @@ -21,25 +21,30 @@ def test_parse_custom_stylesheet() -> None: def test_is_daughter_project() -> None: with _TestEnvironment() as env: settings = env.parser.parse() - assert settings.has_parent() + assert settings.has_parent assert settings.is_daughter_project_of(settings) assert settings.translation_type == "Standard" + assert not settings.parent_has_been_set + + env.parser = ZipParatextProjectSettingsParser(env.zip_file, settings) + + settings = env.parser.parse() + assert settings.has_parent + assert settings.is_daughter_project_of(settings) + assert settings.translation_type == "Standard" + assert settings.parent_has_been_set class _TestEnvironment(ContextManager["_TestEnvironment"]): def __init__(self) -> None: self._temp_dir = TemporaryDirectory() archive_filename = create_test_paratext_backup(Path(self._temp_dir.name)) - self._zip_file = ZipFile(archive_filename) - self._parser = ZipParatextProjectSettingsParser(self._zip_file) - - @property - def parser(self) -> ZipParatextProjectSettingsParser: - return self._parser + self.zip_file = ZipFile(archive_filename) + self.parser = ZipParatextProjectSettingsParser(self.zip_file) def __enter__(self) -> _TestEnvironment: return self def __exit__(self, type: Any, value: Any, traceback: Any) -> None: - self._zip_file.close() + self.zip_file.close() self._temp_dir.cleanup() From cbc2617293946180e50489cafa28547a35f8fd4a Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Thu, 19 Feb 2026 16:45:33 -0500 Subject: [PATCH 3/7] Fix linting errors --- .../corpora/paratext_project_settings_parser_base.py | 3 ++- machine/corpora/usfm_versification_error_detector.py | 10 +++++++--- .../test_zip_paratext_project_settings_parser.py | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/machine/corpora/paratext_project_settings_parser_base.py b/machine/corpora/paratext_project_settings_parser_base.py index 975618ae..adbe1b93 100644 --- a/machine/corpora/paratext_project_settings_parser_base.py +++ b/machine/corpora/paratext_project_settings_parser_base.py @@ -120,7 +120,8 @@ def parse(self) -> ParatextProjectSettings: if self.parent_paratext_project_settings is not None and settings.has_parent: if not settings.is_daughter_project_of(self.parent_paratext_project_settings): raise ValueError( - f"Project {self.parent_paratext_project_settings.name} is not the parent project of project {settings.name}." + f"Project {self.parent_paratext_project_settings.name} is \ + not the parent project of project {settings.name}." ) settings.set_parent_project(self.parent_paratext_project_settings) diff --git a/machine/corpora/usfm_versification_error_detector.py b/machine/corpora/usfm_versification_error_detector.py index c578436e..a6fa1956 100644 --- a/machine/corpora/usfm_versification_error_detector.py +++ b/machine/corpora/usfm_versification_error_detector.py @@ -111,10 +111,14 @@ def expected_verse_ref(self) -> str: if first_verse == last_verse: return str(first_verse) elif ( - corrected_verse_range_ref := VerseRef.try_from_string( - f"{canon.book_number_to_id(self._book_num)} {self._expected_chapter}:{first_verse.verse_num}-{last_verse.verse_num}" + ( + corrected_verse_range_ref := VerseRef.try_from_string( + f"{canon.book_number_to_id(self._book_num)} \ + {self._expected_chapter}:{first_verse.verse_num}-{last_verse.verse_num}" + ) ) - ) is not None: + is not None + ): return str(corrected_verse_range_ref) return str(default_verse_ref) diff --git a/tests/corpora/test_zip_paratext_project_settings_parser.py b/tests/corpora/test_zip_paratext_project_settings_parser.py index acac06a5..4e77a633 100644 --- a/tests/corpora/test_zip_paratext_project_settings_parser.py +++ b/tests/corpora/test_zip_paratext_project_settings_parser.py @@ -2,12 +2,12 @@ from pathlib import Path from tempfile import TemporaryDirectory -from typing import Any, ContextManager, Optional +from typing import Any, ContextManager from zipfile import ZipFile from testutils.corpora_test_helpers import create_test_paratext_backup -from machine.corpora import ParatextProjectSettings, UsfmStyleType, UsfmTextType, ZipParatextProjectSettingsParser +from machine.corpora import UsfmStyleType, UsfmTextType, ZipParatextProjectSettingsParser def test_parse_custom_stylesheet() -> None: From c73b46af6b25a1913674a789f2c9fd557710e194 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Thu, 19 Feb 2026 16:55:48 -0500 Subject: [PATCH 4/7] Fix multiline string --- machine/corpora/usfm_versification_error_detector.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/machine/corpora/usfm_versification_error_detector.py b/machine/corpora/usfm_versification_error_detector.py index a6fa1956..fc0374ad 100644 --- a/machine/corpora/usfm_versification_error_detector.py +++ b/machine/corpora/usfm_versification_error_detector.py @@ -111,14 +111,11 @@ def expected_verse_ref(self) -> str: if first_verse == last_verse: return str(first_verse) elif ( - ( - corrected_verse_range_ref := VerseRef.try_from_string( - f"{canon.book_number_to_id(self._book_num)} \ - {self._expected_chapter}:{first_verse.verse_num}-{last_verse.verse_num}" - ) + corrected_verse_range_ref := VerseRef.try_from_string( + f"{canon.book_number_to_id(self._book_num)} " + f"{self._expected_chapter}:{first_verse.verse_num}-{last_verse.verse_num}" ) - is not None - ): + ) is not None: return str(corrected_verse_range_ref) return str(default_verse_ref) From a7cfefb5ea53d628588e4fadfce3cfe3ced973c3 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Thu, 19 Feb 2026 16:58:01 -0500 Subject: [PATCH 5/7] Change other multiline string --- machine/corpora/paratext_project_settings_parser_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/machine/corpora/paratext_project_settings_parser_base.py b/machine/corpora/paratext_project_settings_parser_base.py index adbe1b93..eeb3dfe9 100644 --- a/machine/corpora/paratext_project_settings_parser_base.py +++ b/machine/corpora/paratext_project_settings_parser_base.py @@ -120,8 +120,8 @@ def parse(self) -> ParatextProjectSettings: if self.parent_paratext_project_settings is not None and settings.has_parent: if not settings.is_daughter_project_of(self.parent_paratext_project_settings): raise ValueError( - f"Project {self.parent_paratext_project_settings.name} is \ - not the parent project of project {settings.name}." + f"Project {self.parent_paratext_project_settings.name} is " + f"not the parent project of project {settings.name}." ) settings.set_parent_project(self.parent_paratext_project_settings) From 7016484f80ff11ab1dd51d5a53e34817138462d6 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Fri, 20 Feb 2026 13:39:18 -0500 Subject: [PATCH 6/7] Add type hints --- .../paratext_project_settings_parser_base.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/machine/corpora/paratext_project_settings_parser_base.py b/machine/corpora/paratext_project_settings_parser_base.py index eeb3dfe9..666fbf17 100644 --- a/machine/corpora/paratext_project_settings_parser_base.py +++ b/machine/corpora/paratext_project_settings_parser_base.py @@ -27,10 +27,10 @@ def parse(self) -> ParatextProjectSettings: with self._paratext_project_file_handler.open(settings_file_name) as stream: settings_tree = ElementTree.parse(stream) - guid = settings_tree.getroot().findtext("Guid", "") - name = settings_tree.getroot().findtext("Name", "") - full_name = settings_tree.getroot().findtext("FullName", "") - encoding_str = settings_tree.getroot().findtext("Encoding", "65001") + guid: str = settings_tree.getroot().findtext("Guid", "") + name: str = settings_tree.getroot().findtext("Name", "") + full_name: str = settings_tree.getroot().findtext("FullName", "") + encoding_str: str = settings_tree.getroot().findtext("Encoding", "65001") code_page = parse_integer(encoding_str) if code_page is None: raise NotImplementedError( @@ -49,7 +49,7 @@ def parse(self) -> ParatextProjectSettings: versification, versification_name, ) - stylesheet_file_name = settings_tree.getroot().findtext("StyleSheet", "usfm.sty") + stylesheet_file_name: str = settings_tree.getroot().findtext("StyleSheet", "usfm.sty") if ( not self._paratext_project_file_handler.exists(stylesheet_file_name) and stylesheet_file_name != "usfm_sb.sty" @@ -71,7 +71,7 @@ def parse(self) -> ParatextProjectSettings: post_part = naming_elem.get("PostPart") if post_part: suffix = post_part - biblical_terms_list_setting = settings_tree.getroot().findtext("BiblicalTermsListSetting") + biblical_terms_list_setting: Optional[str] = settings_tree.getroot().findtext("BiblicalTermsListSetting") if biblical_terms_list_setting is None: # Default to Major::BiblicalTerms.xml to mirror Paratext behavior biblical_terms_list_setting = "Major::BiblicalTerms.xml" @@ -82,13 +82,13 @@ def parse(self) -> ParatextProjectSettings: f" is not in the expected format (e.g., Major::BiblicalTerms.xml) but is {biblical_terms_list_setting}." ) language_code = None - language_iso_code_setting = settings_tree.getroot().findtext("LanguageIsoCode", "") + language_iso_code_setting: Optional[str] = settings_tree.getroot().findtext("LanguageIsoCode", "") if language_iso_code_setting is not None: language_iso_code_setting_parts = language_iso_code_setting.split(":") if language_iso_code_setting_parts: language_code = language_iso_code_setting_parts[0] - translation_info_setting = settings_tree.getroot().findtext("TranslationInfo") + translation_info_setting: Optional[str] = settings_tree.getroot().findtext("TranslationInfo") translation_type = "Standard" parent_name = None parent_guid = None From a10cbfe243d6f94b4440370b4a8e64b1c94a83b0 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Fri, 20 Feb 2026 16:20:19 -0500 Subject: [PATCH 7/7] Add 'parent' property --- machine/corpora/paratext_project_settings.py | 18 ++++++++++-------- .../paratext_project_settings_parser_base.py | 7 +------ ...st_file_paratext_project_settings_parser.py | 4 ++-- ...est_zip_paratext_project_settings_parser.py | 4 ++-- 4 files changed, 15 insertions(+), 18 deletions(-) diff --git a/machine/corpora/paratext_project_settings.py b/machine/corpora/paratext_project_settings.py index 03e183f6..eadd1ec6 100644 --- a/machine/corpora/paratext_project_settings.py +++ b/machine/corpora/paratext_project_settings.py @@ -24,7 +24,7 @@ class ParatextProjectSettings: translation_type: str parent_guid: Optional[str] = None parent_name: Optional[str] = None - _parent_has_been_set: bool = False + _parent: Optional["ParatextProjectSettings"] = None def get_book_id(self, file_name: str) -> Optional[str]: """Returns None when the file name doesn't match the pattern of a book file name for the project.""" @@ -67,19 +67,21 @@ def has_parent(self) -> bool: return self.parent_guid is not None @property - def parent_has_been_set(self) -> bool: - return self._parent_has_been_set + def parent(self) -> Optional["ParatextProjectSettings"]: + return self._parent + + @parent.setter + def parent(self, value: "ParatextProjectSettings"): + if not self.is_daughter_project_of(value): + raise ValueError(f"Project {value.name} is not the parent project of project {self.name}.") + self._parent = value + self.versification = value.versification def is_daughter_project_of(self, other_project: "ParatextProjectSettings") -> bool: if not self.has_parent: return False return self.parent_guid == other_project.guid - def set_parent_project(self, other_project: "ParatextProjectSettings"): - # TODO anything else we should set? From what I can tell, stylesheets are not inherited - self.versification = other_project.versification - self._parent_has_been_set = True - def _get_book_file_name_digits(book_id: str) -> str: book_num = book_id_to_number(book_id) diff --git a/machine/corpora/paratext_project_settings_parser_base.py b/machine/corpora/paratext_project_settings_parser_base.py index 666fbf17..2f9a4fbc 100644 --- a/machine/corpora/paratext_project_settings_parser_base.py +++ b/machine/corpora/paratext_project_settings_parser_base.py @@ -118,11 +118,6 @@ def parse(self) -> ParatextProjectSettings: ) if self.parent_paratext_project_settings is not None and settings.has_parent: - if not settings.is_daughter_project_of(self.parent_paratext_project_settings): - raise ValueError( - f"Project {self.parent_paratext_project_settings.name} is " - f"not the parent project of project {settings.name}." - ) - settings.set_parent_project(self.parent_paratext_project_settings) + settings.parent = self.parent_paratext_project_settings return settings diff --git a/tests/corpora/test_file_paratext_project_settings_parser.py b/tests/corpora/test_file_paratext_project_settings_parser.py index 991c146b..2bd294f5 100644 --- a/tests/corpora/test_file_paratext_project_settings_parser.py +++ b/tests/corpora/test_file_paratext_project_settings_parser.py @@ -17,11 +17,11 @@ def test_is_daughter_project() -> None: assert settings.has_parent assert settings.is_daughter_project_of(settings) assert settings.translation_type == "Standard" - assert not settings.parent_has_been_set + assert settings.parent is None parser = FileParatextProjectSettingsParser(USFM_TEST_PROJECT_PATH, settings) settings = parser.parse() assert settings.has_parent assert settings.is_daughter_project_of(settings) assert settings.translation_type == "Standard" - assert settings.parent_has_been_set + assert settings.parent is not None diff --git a/tests/corpora/test_zip_paratext_project_settings_parser.py b/tests/corpora/test_zip_paratext_project_settings_parser.py index 4e77a633..1058953c 100644 --- a/tests/corpora/test_zip_paratext_project_settings_parser.py +++ b/tests/corpora/test_zip_paratext_project_settings_parser.py @@ -24,7 +24,7 @@ def test_is_daughter_project() -> None: assert settings.has_parent assert settings.is_daughter_project_of(settings) assert settings.translation_type == "Standard" - assert not settings.parent_has_been_set + assert settings.parent is None env.parser = ZipParatextProjectSettingsParser(env.zip_file, settings) @@ -32,7 +32,7 @@ def test_is_daughter_project() -> None: assert settings.has_parent assert settings.is_daughter_project_of(settings) assert settings.translation_type == "Standard" - assert settings.parent_has_been_set + assert settings.parent is not None class _TestEnvironment(ContextManager["_TestEnvironment"]):