Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions machine/corpora/file_paratext_project_settings_parser.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from typing import Optional

from ..utils.typeshed import StrPath
from .file_paratext_project_file_handler import FileParatextProjectFileHandler
from .paratext_project_settings import ParatextProjectSettings
from .paratext_project_settings_parser_base import ParatextProjectSettingsParserBase


class FileParatextProjectSettingsParser(ParatextProjectSettingsParserBase):
def __init__(self, project_dir: StrPath) -> None:
super().__init__(FileParatextProjectFileHandler(project_dir))
def __init__(self, project_dir: StrPath, parent_settings: Optional[ParatextProjectSettings] = None) -> None:
super().__init__(FileParatextProjectFileHandler(project_dir), parent_settings)
8 changes: 5 additions & 3 deletions machine/corpora/file_paratext_project_text_updater.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from pathlib import Path
from typing import BinaryIO
from typing import BinaryIO, Optional

from ..utils.typeshed import StrPath
from .file_paratext_project_file_handler import FileParatextProjectFileHandler
from .file_paratext_project_settings_parser import FileParatextProjectSettingsParser
from .paratext_project_settings import ParatextProjectSettings
from .paratext_project_text_updater_base import ParatextProjectTextUpdaterBase


class FileParatextProjectTextUpdater(ParatextProjectTextUpdaterBase):
def __init__(self, project_dir: StrPath) -> None:
def __init__(self, project_dir: StrPath, parent_settings: Optional[ParatextProjectSettings] = None) -> None:
super().__init__(
FileParatextProjectFileHandler(project_dir), FileParatextProjectSettingsParser(project_dir).parse()
FileParatextProjectFileHandler(project_dir),
FileParatextProjectSettingsParser(project_dir, parent_settings).parse(),
)

self._project_dir = project_dir
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from typing import Optional

from ..utils.typeshed import StrPath
from .file_paratext_project_file_handler import FileParatextProjectFileHandler
from .file_paratext_project_settings_parser import FileParatextProjectSettingsParser
from .paratext_project_settings import ParatextProjectSettings
from .paratext_project_versification_error_detector_base import ParatextProjectVersificationErrorDetectorBase


class FileParatextProjectVersificationErrorDetector(ParatextProjectVersificationErrorDetectorBase):
def __init__(self, project_dir: StrPath) -> None:
def __init__(self, project_dir: StrPath, parent_settings: Optional[ParatextProjectSettings] = None) -> None:
super().__init__(
FileParatextProjectFileHandler(project_dir), FileParatextProjectSettingsParser(project_dir).parse()
FileParatextProjectFileHandler(project_dir),
FileParatextProjectSettingsParser(project_dir, parent_settings).parse(),
)
19 changes: 16 additions & 3 deletions machine/corpora/paratext_backup_text_corpus.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List
from typing import List, Optional
from zipfile import ZipFile

from ..utils.typeshed import StrPath
Expand All @@ -8,9 +8,22 @@


class ParatextBackupTextCorpus(ScriptureTextCorpus):
def __init__(self, filename: StrPath, include_markers: bool = False, include_all_text: bool = False) -> None:
def __init__(
self,
filename: StrPath,
include_markers: bool = False,
include_all_text: bool = False,
parent_filename: Optional[StrPath] = None,
) -> None:

parent_settings = None
if parent_filename is not None:
with ZipFile(parent_filename, "r") as parent_archive:
parent_parser = ZipParatextProjectSettingsParser(parent_archive)
parent_settings = parent_parser.parse()

with ZipFile(filename, "r") as archive:
parser = ZipParatextProjectSettingsParser(archive)
parser = ZipParatextProjectSettingsParser(archive, parent_settings)
settings = parser.parse()

versification = settings.versification
Expand Down
25 changes: 25 additions & 0 deletions machine/corpora/paratext_project_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

@dataclass
class ParatextProjectSettings:
guid: str
name: str
full_name: str
encoding: str
Expand All @@ -20,6 +21,10 @@ class ParatextProjectSettings:
biblical_terms_project_name: str
biblical_terms_file_name: str
language_code: Optional[str]
translation_type: str
parent_guid: Optional[str] = None
parent_name: Optional[str] = None
_parent: Optional["ParatextProjectSettings"] = None

def get_book_id(self, file_name: str) -> Optional[str]:
"""Returns None when the file name doesn't match the pattern of a book file name for the project."""
Expand Down Expand Up @@ -57,6 +62,26 @@ def get_all_scripture_book_ids(self) -> Iterable[str]:
for book_id in get_scripture_books():
yield book_id

@property
def has_parent(self) -> bool:
return self.parent_guid is not None

@property
def parent(self) -> Optional["ParatextProjectSettings"]:
return self._parent

@parent.setter
def parent(self, value: "ParatextProjectSettings"):
if not self.is_daughter_project_of(value):
raise ValueError(f"Project {value.name} is not the parent project of project {self.name}.")
self._parent = value
self.versification = value.versification

def is_daughter_project_of(self, other_project: "ParatextProjectSettings") -> bool:
if not self.has_parent:
return False
return self.parent_guid == other_project.guid


def _get_book_file_name_digits(book_id: str) -> str:
book_num = book_id_to_number(book_id)
Expand Down
47 changes: 36 additions & 11 deletions machine/corpora/paratext_project_settings_parser_base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from abc import ABC
from typing import Optional
from xml.etree import ElementTree

from ..scripture.verse_ref import Versification
Expand All @@ -9,8 +10,13 @@


class ParatextProjectSettingsParserBase(ABC):
def __init__(self, paratext_project_file_handler: ParatextProjectFileHandler):
def __init__(
self,
paratext_project_file_handler: ParatextProjectFileHandler,
parent_paratext_project_settings: Optional[ParatextProjectSettings] = None,
):
self._paratext_project_file_handler = paratext_project_file_handler
self.parent_paratext_project_settings = parent_paratext_project_settings

def parse(self) -> ParatextProjectSettings:
settings_file_name = "Settings.xml"
Expand All @@ -21,9 +27,10 @@ def parse(self) -> ParatextProjectSettings:
with self._paratext_project_file_handler.open(settings_file_name) as stream:
settings_tree = ElementTree.parse(stream)

name = settings_tree.getroot().findtext("Name", "")
full_name = settings_tree.getroot().findtext("FullName", "")
encoding_str = settings_tree.getroot().findtext("Encoding", "65001")
guid: str = settings_tree.getroot().findtext("Guid", "")
name: str = settings_tree.getroot().findtext("Name", "")
full_name: str = settings_tree.getroot().findtext("FullName", "")
encoding_str: str = settings_tree.getroot().findtext("Encoding", "65001")
code_page = parse_integer(encoding_str)
if code_page is None:
raise NotImplementedError(
Expand All @@ -36,14 +43,13 @@ def parse(self) -> ParatextProjectSettings:
versification_type = int(settings_tree.getroot().findtext("Versification", "4"))
versification = Versification.get_builtin(versification_type)
if self._paratext_project_file_handler.exists("custom.vrs"):
guid = settings_tree.getroot().findtext("Guid", "")
versification_name = f"{versification.name}-{guid}"
versification = Versification.load(
self._paratext_project_file_handler.open("custom.vrs"),
versification,
versification_name,
)
stylesheet_file_name = settings_tree.getroot().findtext("StyleSheet", "usfm.sty")
stylesheet_file_name: str = settings_tree.getroot().findtext("StyleSheet", "usfm.sty")
if (
not self._paratext_project_file_handler.exists(stylesheet_file_name)
and stylesheet_file_name != "usfm_sb.sty"
Expand All @@ -65,7 +71,7 @@ def parse(self) -> ParatextProjectSettings:
post_part = naming_elem.get("PostPart")
if post_part:
suffix = post_part
biblical_terms_list_setting = settings_tree.getroot().findtext("BiblicalTermsListSetting")
biblical_terms_list_setting: Optional[str] = settings_tree.getroot().findtext("BiblicalTermsListSetting")
if biblical_terms_list_setting is None:
# Default to Major::BiblicalTerms.xml to mirror Paratext behavior
biblical_terms_list_setting = "Major::BiblicalTerms.xml"
Expand All @@ -76,13 +82,24 @@ def parse(self) -> ParatextProjectSettings:
f" is not in the expected format (e.g., Major::BiblicalTerms.xml) but is {biblical_terms_list_setting}."
)
language_code = None
language_iso_code_setting = settings_tree.getroot().findtext("LanguageIsoCode", "")
if language_iso_code_setting:
language_iso_code_setting_parts = settings_tree.getroot().findtext("LanguageIsoCode", "").split(":")
language_iso_code_setting: Optional[str] = settings_tree.getroot().findtext("LanguageIsoCode", "")
if language_iso_code_setting is not None:
language_iso_code_setting_parts = language_iso_code_setting.split(":")
if language_iso_code_setting_parts:
language_code = language_iso_code_setting_parts[0]

return ParatextProjectSettings(
translation_info_setting: Optional[str] = settings_tree.getroot().findtext("TranslationInfo")
translation_type = "Standard"
parent_name = None
parent_guid = None
if translation_info_setting is not None:
translation_info_setting_parts = translation_info_setting.split(":")
translation_type = translation_info_setting_parts[0]
parent_name = translation_info_setting_parts[1] if translation_info_setting_parts[1] != "" else None
parent_guid = translation_info_setting_parts[2] if translation_info_setting_parts[2] != "" else None

settings = ParatextProjectSettings(
guid,
name,
full_name,
encoding,
Expand All @@ -95,4 +112,12 @@ def parse(self) -> ParatextProjectSettings:
parts[1],
parts[2],
language_code,
translation_type,
parent_guid,
parent_name,
)

if self.parent_paratext_project_settings is not None and settings.has_parent:
settings.parent = self.parent_paratext_project_settings

return settings
18 changes: 15 additions & 3 deletions machine/corpora/paratext_text_corpus.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import List
from typing import List, Optional

from ..utils.typeshed import StrPath
from .file_paratext_project_settings_parser import FileParatextProjectSettingsParser
Expand All @@ -8,8 +8,20 @@


class ParatextTextCorpus(ScriptureTextCorpus):
def __init__(self, project_dir: StrPath, include_markers: bool = False, include_all_text: bool = False) -> None:
parser = FileParatextProjectSettingsParser(project_dir)
def __init__(
self,
project_dir: StrPath,
include_markers: bool = False,
include_all_text: bool = False,
parent_project_dir: Optional[StrPath] = None,
) -> None:

parent_settings = None
if parent_project_dir is not None:
parent_parser = FileParatextProjectSettingsParser(parent_project_dir)
parent_settings = parent_parser.parse()

parser = FileParatextProjectSettingsParser(project_dir, parent_settings)
settings = parser.parse()

versification = settings.versification
Expand Down
Loading