From 1f20b9d1d905b1d5053f8570cc303f38eca6f54b Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Mon, 14 Jul 2025 18:50:49 -0400 Subject: [PATCH 01/36] feat(dfn): structure dfn component hierarchy (#228) much in the same way that we structured dfns internally (flat to tree), this organizes the components into a tree which represents valid mf6 simulation structure. tentative, may be subject to change --- autotest/test_dfn.py | 62 ++++++++++++++++++++++++++++++++++++ modflow_devtools/dfn.py | 36 +++++++++++++++++++++ modflow_devtools/dfn2toml.py | 42 +++++++++++++++++++++++- 3 files changed, 139 insertions(+), 1 deletion(-) diff --git a/autotest/test_dfn.py b/autotest/test_dfn.py index 72c58d66..27704fa6 100644 --- a/autotest/test_dfn.py +++ b/autotest/test_dfn.py @@ -60,3 +60,65 @@ def test_load_v2(toml_name): def test_load_all(version): dfns = Dfn.load_all(VERSIONS[version], version=version) assert any(dfns) + + +@requires_pkg("boltons") +def test_load_tree(): + import tempfile + + import tomli + + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_path = Path(tmp_dir) + convert(DFN_DIR, tmp_path) + + # Test file conversion and naming + assert (tmp_path / "sim.toml").exists() + assert (tmp_path / "gwf.toml").exists() + assert not (tmp_path / "sim-nam.toml").exists() + + # Test parent relationships in files + with (tmp_path / "sim.toml").open("rb") as f: + sim_data = tomli.load(f) + assert sim_data["name"] == "sim" + assert "parent" not in sim_data + + with (tmp_path / "gwf.toml").open("rb") as f: + gwf_data = tomli.load(f) + assert gwf_data["name"] == "gwf" + assert gwf_data["parent"] == "sim" + + # Test hierarchy enforcement and completeness + dfns = Dfn.load_all(tmp_path, version=2) + roots = [name for name, dfn in dfns.items() if not dfn.get("parent")] + assert len(roots) == 1 + assert roots[0] == "sim" + + for dfn in dfns.values(): + parent = dfn.get("parent") + if parent: + assert parent in dfns + + # Test tree building and navigation + tree = Dfn.load_tree(tmp_path, version=2) + assert "sim" in tree + assert tree["sim"]["name"] == "sim" + + for model_type in ["gwf", "gwt", "gwe"]: + if model_type in tree["sim"]: + assert tree["sim"][model_type]["name"] == model_type + assert tree["sim"][model_type]["parent"] == "sim" + + if "gwf" in tree["sim"]: + gwf_packages = [ + k + for k in tree["sim"]["gwf"].keys() + if k.startswith("gwf-") and isinstance(tree["sim"]["gwf"][k], dict) + ] + assert len(gwf_packages) > 0 + + if "gwf-dis" in tree["sim"]["gwf"]: + dis = tree["sim"]["gwf"]["gwf-dis"] + assert dis["name"] == "gwf-dis" + assert dis["parent"] == "gwf" + assert "options" in dis or "dimensions" in dis diff --git a/modflow_devtools/dfn.py b/modflow_devtools/dfn.py index eca91607..11cf3ff4 100644 --- a/modflow_devtools/dfn.py +++ b/modflow_devtools/dfn.py @@ -190,6 +190,7 @@ class Dfn(TypedDict): name: str advanced: bool = False multi: bool = False + parent: str | None = None ref: Ref | None = None sln: Sln | None = None fkeys: Dfns | None = None @@ -609,6 +610,41 @@ def load_all(dfndir: PathLike, version: FormatVersion = 1) -> Dfns: else: raise ValueError(f"Unsupported version, expected one of {version.__args__}") + @staticmethod + def load_tree(dfndir: PathLike, version: FormatVersion = 2) -> dict: + """Load all definitions and return as hierarchical tree.""" + dfns = Dfn.load_all(dfndir, version) + return infer_tree(dfns) + + +def infer_tree(dfns: dict[str, Dfn]) -> dict: + """Infer the component hierarchy from definitions. + + Enforces single root requirement - must be exactly one component + with no parent, and it must be named 'sim'. + """ + roots = [name for name, dfn in dfns.items() if not dfn.get("parent")] + + if len(roots) != 1: + raise ValueError( + f"Expected exactly one root component, found {len(roots)}: {roots}" + ) + + root_name = roots[0] + if root_name != "sim": + raise ValueError(f"Root component must be named 'sim', found '{root_name}'") + + def add_children(node_name: str) -> dict: + node = dfns[node_name].copy() + children = [ + name for name, dfn in dfns.items() if dfn.get("parent") == node_name + ] + for child in children: + node[child] = add_children(child) + return node + + return {root_name: add_children(root_name)} + def get_dfns(owner: str, repo: str, ref: str, outdir: str | PathLike, verbose: bool = False): """Fetch definition files from the MODFLOW 6 repository.""" diff --git a/modflow_devtools/dfn2toml.py b/modflow_devtools/dfn2toml.py index 96a68661..7d346e6d 100644 --- a/modflow_devtools/dfn2toml.py +++ b/modflow_devtools/dfn2toml.py @@ -17,7 +17,47 @@ def convert(indir: PathLike, outdir: PathLike): outdir = Path(outdir).expanduser().absolute() outdir.mkdir(exist_ok=True, parents=True) for dfn in Dfn.load_all(indir).values(): - with Path.open(outdir / f"{dfn['name']}.toml", "wb") as f: + dfn_name = dfn["name"] + + # Determine new filename and parent relationship + if dfn_name == "sim-nam": + filename = "sim.toml" + dfn = dfn.copy() + dfn["name"] = "sim" + # No parent - this is root + elif dfn_name.endswith("-nam"): + # Model name files: gwf-nam -> gwf.toml, parent = "sim" + model_type = dfn_name[:-4] # Remove "-nam" + filename = f"{model_type}.toml" + dfn = dfn.copy() + dfn["name"] = model_type + dfn["parent"] = "sim" + elif dfn_name.startswith("exg-"): + # Exchanges: parent = "sim" + filename = f"{dfn_name}.toml" + dfn = dfn.copy() + dfn["parent"] = "sim" + elif dfn_name.startswith("sln-"): + # Solutions: parent = "sim" + filename = f"{dfn_name}.toml" + dfn = dfn.copy() + dfn["parent"] = "sim" + elif dfn_name.startswith("utl-"): + # Utilities: parent = "sim" + filename = f"{dfn_name}.toml" + dfn = dfn.copy() + dfn["parent"] = "sim" + elif "-" in dfn_name: + # Packages: gwf-dis -> parent = "gwf" + model_type = dfn_name.split("-")[0] + filename = f"{dfn_name}.toml" + dfn = dfn.copy() + dfn["parent"] = model_type + else: + # Default case + filename = f"{dfn_name}.toml" + + with Path.open(outdir / filename, "wb") as f: def drop_none_or_empty(path, key, value): if value is None or value == "" or value == [] or value == {}: From fcd0017fe512c3e885a8777ccfbf73d9ceed04d8 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Mon, 21 Jul 2025 21:45:09 -0400 Subject: [PATCH 02/36] refactor(dfn): separate period block arrays in toml (#229) In the new TOML definition file format, define period block variables (arrays) in terms of the grid and time discretization shape, rather than in the sparse/list-based input form with shape maxbound, the latter being an MF6 input file format detail. Towards structure-scoped TOML definition files i.e. connections/contents of simulation components, whereas normal DFN files are also about input formatting. Dimension info previously encoded in iper and cellid is now signified as with other arrays with shape = "(nper, nnodes)". These variables and the intermediate recarray variable are dropped, as they specified formatting details more than intrinsically necessary information about the data model. Also, unrelatedly - use key "children" for composite Field children, previously "items", "choices", "fields" were used for list, union, record respectively - promote some utility functions to public naming (no leading underscore) - add a few block-related aliases and utilities --- modflow_devtools/dfn.py | 170 ++++++++++++++++++++++++++-------------- 1 file changed, 113 insertions(+), 57 deletions(-) diff --git a/modflow_devtools/dfn.py b/modflow_devtools/dfn.py index 11cf3ff4..146f155e 100644 --- a/modflow_devtools/dfn.py +++ b/modflow_devtools/dfn.py @@ -25,24 +25,10 @@ from boltons.iterutils import remap from modflow_devtools.download import download_and_unzip +from modflow_devtools.misc import try_literal_eval -# TODO: use dataclasses instead of typed dicts? static -# methods on typed dicts are evidently not allowed -# mypy: ignore-errors - -def _try_literal_eval(value: str) -> Any: - """ - Try to parse a string as a literal. If this fails, - return the value unaltered. - """ - try: - return literal_eval(value) - except (SyntaxError, ValueError): - return value - - -def _try_parse_bool(value: Any) -> Any: +def try_parse_bool(value: Any) -> Any: """ Try to parse a boolean from a string as represented in a DFN file, otherwise return the value unaltered. @@ -54,7 +40,7 @@ def _try_parse_bool(value: Any) -> Any: return value -def _field_attr_sort_key(item) -> int: +def field_attr_sort_key(item) -> int: """ Sort key for input field attributes. The order is: -1. block @@ -90,6 +76,22 @@ def _field_attr_sort_key(item) -> int: return 8 +def block_sort_key(item) -> int: + k, _ = item + if k == "options": + return 0 + elif k == "dimensions": + return 1 + elif k == "griddata": + return 2 + elif k == "packagedata": + return 3 + elif "period" in k: + return 4 + else: + return 5 + + FormatVersion = Literal[1, 2] """DFN format version number.""" @@ -113,11 +115,26 @@ def _field_attr_sort_key(item) -> int: ] -_SCALAR_TYPES = FieldType.__args__[:4] +_SCALAR_TYPES = ("keyword", "integer", "double precision", "string") Dfns = dict[str, "Dfn"] Fields = dict[str, "Field"] +Block = Fields +Blocks = dict[str, Block] + + +def get_blocks(dfn: "Dfn") -> Blocks: + """ + Extract blocks from an input definition. Any entry whose key + is not explicitly defined in `Dfn` is a block. + """ + return dict( + sorted( + {k: v for k, v in dfn.items() if k not in Dfn.__annotations__}.items(), # type: ignore + key=block_sort_key, + ) + ) class Field(TypedDict): @@ -125,12 +142,12 @@ class Field(TypedDict): name: str type: FieldType - shape: Any | None = None - block: str | None = None - default: Any | None = None - children: Optional["Fields"] = None - description: str | None = None - reader: Reader = "urword" + shape: Any | None + block: str | None + default: Any | None + children: Optional["Fields"] + description: str | None + reader: Reader class Ref(TypedDict): @@ -188,14 +205,14 @@ class Dfn(TypedDict): """ name: str - advanced: bool = False - multi: bool = False - parent: str | None = None - ref: Ref | None = None - sln: Sln | None = None - fkeys: Dfns | None = None - - @staticmethod + advanced: bool + multi: bool + parent: str | None + ref: Ref | None + sln: Sln | None + fkeys: Dfns | None + + @staticmethod # type: ignore[misc] def _load_v1_flat(f, common: dict | None = None) -> tuple[Mapping, list[str]]: field = {} flat = [] @@ -264,7 +281,7 @@ def _load_v1_flat(f, common: dict | None = None) -> tuple[Mapping, list[str]]: # the point of the OMD is to losslessly handle duplicate variable names return OMD(flat), meta - @classmethod + @classmethod # type: ignore[misc] def _load_v1(cls, f, name, **kwargs) -> "Dfn": """ Temporary load routine for the v1 DFN format. @@ -274,6 +291,41 @@ def _load_v1(cls, f, name, **kwargs) -> "Dfn": refs = kwargs.pop("refs", {}) flat, meta = Dfn._load_v1_flat(f, **kwargs) + def _convert_period_block(block: Block) -> Block: + """ + Convert a period block recarray to individual arrays, one per column. + + Extracts recarray fields and creates separate array variables. Gives + each an appropriate grid- or tdis-aligned shape as opposed to sparse + list shape in terms of maxbound as previously. + """ + + fields = list(block.values()) + if fields[0]["type"] == "recarray": + assert len(fields) == 1 + recarray_name = fields[0]["name"] + item = next(iter(fields[0]["children"].values())) + columns = item["children"] + else: + recarray_name = None + columns = block + block.pop(recarray_name, None) + cellid = columns.pop("cellid", None) + for col_name, column in columns.items(): + col_copy = column.copy() + old_dims = col_copy.get("shape") + if old_dims: + old_dims = old_dims[1:-1].split(",") + new_dims = ["nper"] + if cellid: + new_dims.append("nnodes") + if old_dims: + new_dims.extend([dim for dim in old_dims if dim != "maxbound"]) + col_copy["shape"] = f"({', '.join(new_dims)})" + block[col_name] = col_copy + + return block + def _convert_field(var: dict[str, Any]) -> Field: """ Convert an input field specification from its representation @@ -295,7 +347,7 @@ def _load(field) -> Field: # stay a string except default values, which we'll # try to parse as arbitrary literals below, and at # some point types, once we introduce type hinting - field = {k: _try_parse_bool(v) for k, v in field.items()} + field = {k: try_parse_bool(v) for k, v in field.items()} _name = field.pop("name") _type = field.pop("type", None) @@ -303,7 +355,7 @@ def _load(field) -> Field: shape = None if shape == "" else shape block = field.pop("block", None) default = field.pop("default", None) - default = _try_literal_eval(default) if _type != "string" else default + default = try_literal_eval(default) if _type != "string" else default description = field.pop("description", "") reader = field.pop("reader", "urword") ref = refs.get(_name, None) @@ -337,8 +389,10 @@ def _item() -> Field: name=_name, type="record", block=block, - fields=_fields(), - description=description.replace("is the list of", "is the record of"), + children=_fields(), + description=description.replace( + "is the list of", "is the record of" + ), reader=reader, **field, ) @@ -356,8 +410,10 @@ def _item() -> Field: name=first["name"] if single else _name, type=item_type, block=block, - fields=first["fields"] if single else fields, - description=description.replace("is the list of", f"is the {item_type} of"), + children=first["children"] if single else fields, + description=description.replace( + "is the list of", f"is the {item_type} of" + ), reader=reader, **field, ) @@ -393,15 +449,16 @@ def _fields() -> Fields: ) if _type.startswith("recarray"): - var_["item"] = _item() + item = _item() + var_["children"] = {item["name"]: item} var_["type"] = "recarray" elif _type.startswith("keystring"): - var_["choices"] = _choices() + var_["children"] = _choices() var_["type"] = "keystring" elif _type.startswith("record"): - var_["fields"] = _fields() + var_["children"] = _fields() var_["type"] = "record" # for now, we can tell a var is an array if its type @@ -435,7 +492,7 @@ def _fields() -> Fields: return var_ - return dict(sorted(_load(var).items(), key=_field_attr_sort_key)) + return dict(sorted(_load(var).items(), key=field_attr_sort_key)) # load top-level fields. any nested # fields will be loaded recursively @@ -451,11 +508,10 @@ def _fields() -> Fields: for block_name, block in groupby(fields.values(), lambda v: v["block"]) } - # mark transient blocks - transient_index_vars = flat.getlist("iper") - for transient_index in transient_index_vars: - transient_block = transient_index["block"] - blocks[transient_block]["transient_block"] = True + # if there's a period block, extract distinct arrays from + # the recarray-style definition + if (period_block := blocks.get("period", None)) is not None: + blocks["period"] = _convert_period_block(period_block) # remove unneeded variable attributes def remove_attrs(path, key, value): @@ -533,14 +589,14 @@ def _rest(): **blocks, ) - @classmethod + @classmethod # type: ignore[misc] def _load_v2(cls, f, name) -> "Dfn": data = tomli.load(f) if name and name != data.get("name", None): raise ValueError(f"Name mismatch, expected {name}") return cls(**data) - @classmethod + @classmethod # type: ignore[misc] def load( cls, f, @@ -559,7 +615,7 @@ def load( else: raise ValueError(f"Unsupported version, expected one of {version.__args__}") - @staticmethod + @staticmethod # type: ignore[misc] def _load_all_v1(dfndir: PathLike) -> Dfns: paths: list[Path] = [p for p in dfndir.glob("*.dfn") if p.stem not in ["common", "flopy"]] @@ -589,7 +645,7 @@ def _load_all_v1(dfndir: PathLike) -> Dfns: return dfns - @staticmethod + @staticmethod # type: ignore[misc] def _load_all_v2(dfndir: PathLike) -> Dfns: paths: list[Path] = [p for p in dfndir.glob("*.toml") if p.stem not in ["common", "flopy"]] dfns: Dfns = {} @@ -600,7 +656,7 @@ def _load_all_v2(dfndir: PathLike) -> Dfns: return dfns - @staticmethod + @staticmethod # type: ignore[misc] def load_all(dfndir: PathLike, version: FormatVersion = 1) -> Dfns: """Load all component definitions from the given directory.""" if version == 1: @@ -610,7 +666,7 @@ def load_all(dfndir: PathLike, version: FormatVersion = 1) -> Dfns: else: raise ValueError(f"Unsupported version, expected one of {version.__args__}") - @staticmethod + @staticmethod # type: ignore[misc] def load_tree(dfndir: PathLike, version: FormatVersion = 2) -> dict: """Load all definitions and return as hierarchical tree.""" dfns = Dfn.load_all(dfndir, version) @@ -634,8 +690,8 @@ def infer_tree(dfns: dict[str, Dfn]) -> dict: if root_name != "sim": raise ValueError(f"Root component must be named 'sim', found '{root_name}'") - def add_children(node_name: str) -> dict: - node = dfns[node_name].copy() + def add_children(node_name: str) -> dict[str, Any]: + node = dict(dfns[node_name]) children = [ name for name, dfn in dfns.items() if dfn.get("parent") == node_name ] @@ -652,7 +708,7 @@ def get_dfns(owner: str, repo: str, ref: str, outdir: str | PathLike, verbose: b if verbose: print(f"Downloading MODFLOW 6 repository from {url}") with tempfile.TemporaryDirectory() as tmp: - dl_path = download_and_unzip(url, tmp, verbose=verbose) + dl_path = download_and_unzip(url, Path(tmp), verbose=verbose) contents = list(dl_path.glob("modflow6-*")) proj_path = next(iter(contents), None) if not proj_path: From 4e8621c131cfbc99a52e576361fcdf863e439a94 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Thu, 24 Jul 2025 12:44:21 -0400 Subject: [PATCH 03/36] feat(dfn): add get_fields function (#230) Get a flat dict of top-level fields. Convenient when you don't need to care about block structure. --- modflow_devtools/dfn.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/modflow_devtools/dfn.py b/modflow_devtools/dfn.py index 146f155e..78341d87 100644 --- a/modflow_devtools/dfn.py +++ b/modflow_devtools/dfn.py @@ -137,6 +137,21 @@ def get_blocks(dfn: "Dfn") -> Blocks: ) +def get_fields(dfn: "Dfn") -> Fields: + """ + Extract a flat dictionary of fields from an input definition. + Only top-level fields are included, i.e. subfields of records + or recarrays are not included. + """ + fields = {} + for block in get_blocks(dfn).values(): + for field in block.values(): + if field["name"] in fields: + warn(f"Duplicate field name {field['name']} in {dfn['name']}") + fields[field["name"]] = field + return fields + + class Field(TypedDict): """A field specification.""" From b5f9dc38602b34e93ef64cce42d89e71dcbcf25b Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Tue, 29 Jul 2025 17:50:57 -0400 Subject: [PATCH 04/36] refactor(dfn): drop fkeys from v2/toml (#231) The foreign key system was to support flopy3's concept of "subpackage" but that should be a flopy concern rather than of the specification, though this leaves the Ref typed dict for now --- modflow_devtools/dfn.py | 50 +++++++---------------------------------- 1 file changed, 8 insertions(+), 42 deletions(-) diff --git a/modflow_devtools/dfn.py b/modflow_devtools/dfn.py index 78341d87..10c47c7d 100644 --- a/modflow_devtools/dfn.py +++ b/modflow_devtools/dfn.py @@ -126,12 +126,16 @@ def block_sort_key(item) -> int: def get_blocks(dfn: "Dfn") -> Blocks: """ - Extract blocks from an input definition. Any entry whose key - is not explicitly defined in `Dfn` is a block. + Extract blocks from an input definition. """ + + def _is_block(item: tuple[str, Any]) -> bool: + k, v = item + return k not in Dfn.__annotations__ + return dict( sorted( - {k: v for k, v in dfn.items() if k not in Dfn.__annotations__}.items(), # type: ignore + {k: v for k, v in dfn.items() if _is_block((k, v))}.items(), # type: ignore key=block_sort_key, ) ) @@ -225,7 +229,6 @@ class Dfn(TypedDict): parent: str | None ref: Ref | None sln: Sln | None - fkeys: Dfns | None @staticmethod # type: ignore[misc] def _load_v1_flat(f, common: dict | None = None) -> tuple[Mapping, list[str]]: @@ -302,8 +305,6 @@ def _load_v1(cls, f, name, **kwargs) -> "Dfn": Temporary load routine for the v1 DFN format. """ - fkeys = {} - refs = kwargs.pop("refs", {}) flat, meta = Dfn._load_v1_flat(f, **kwargs) def _convert_period_block(block: Block) -> Block: @@ -373,11 +374,6 @@ def _load(field) -> Field: default = try_literal_eval(default) if _type != "string" else default description = field.pop("description", "") reader = field.pop("reader", "urword") - ref = refs.get(_name, None) - - # if the field is a foreign key, register it - if ref: - fkeys[_name] = ref def _item() -> Field: """Load list item.""" @@ -485,26 +481,6 @@ def _fields() -> Fields: else: var_["type"] = _type - # if var is a foreign key, return subpkg var instead - if ref: - return Field( - name=ref["val"], - type=_type, - shape=shape, - block=block, - description=( - f"Contains data for the {ref['abbr']} package. Data can be " - f"passed as a dictionary to the {ref['abbr']} package with " - "variable names as keys and package data as values. Data " - f"for the {ref['val']} variable is also acceptable. See " - f"{ref['abbr']} package documentation for more information." - ), - default=None, - ref=ref, - reader=reader, - **field, - ) - return var_ return dict(sorted(_load(var).items(), key=field_attr_sort_key)) @@ -596,7 +572,6 @@ def _rest(): return cls( name=name, - fkeys=fkeys, advanced=_advanced(), multi=_multi(), sln=_sln(), @@ -642,20 +617,11 @@ def _load_all_v1(dfndir: PathLike) -> Dfns: with common_path.open() as f: common, _ = Dfn._load_v1_flat(f) - # load references (subpackages) - refs = {} - for path in paths: - with path.open() as f: - dfn = Dfn.load(f, name=path.stem, common=common) - ref = dfn.get("ref", None) - if ref: - refs[ref["key"]] = ref - # load definitions dfns: Dfns = {} for path in paths: with path.open() as f: - dfn = Dfn.load(f, name=path.stem, common=common, refs=refs) + dfn = Dfn.load(f, name=path.stem, common=common) dfns[path.stem] = dfn return dfns From ece3d60a1ac92659650a9d3de93af5e2189e969a Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Tue, 5 Aug 2025 13:53:26 -0400 Subject: [PATCH 05/36] fix(dfn): populate keystring children properly (#232) --- modflow_devtools/dfn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modflow_devtools/dfn.py b/modflow_devtools/dfn.py index 10c47c7d..56aa4ad2 100644 --- a/modflow_devtools/dfn.py +++ b/modflow_devtools/dfn.py @@ -446,7 +446,7 @@ def _fields() -> Fields: v = flat.get(name, None) if not v or not v.get("in_record", False) or v["type"].startswith("record"): continue - fields[name] = v + fields[name] = _convert_field(v) return fields var_ = Field( From 0fc9539aea500fe26dc8464d28200dbcfcd18a70 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Tue, 19 Aug 2025 16:40:31 -0400 Subject: [PATCH 06/36] feat(dfn): separate all recarray columns in tomls (#234) generalize the column separation in #229 from period block recarrays to all recarrays. deal only with arrays in the v2 schema. --- modflow_devtools/dfn.py | 90 +++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 17 deletions(-) diff --git a/modflow_devtools/dfn.py b/modflow_devtools/dfn.py index 56aa4ad2..7dba27b3 100644 --- a/modflow_devtools/dfn.py +++ b/modflow_devtools/dfn.py @@ -307,37 +307,91 @@ def _load_v1(cls, f, name, **kwargs) -> "Dfn": flat, meta = Dfn._load_v1_flat(f, **kwargs) - def _convert_period_block(block: Block) -> Block: + def _convert_recarray_block(block: Block, block_name: str) -> Block: """ - Convert a period block recarray to individual arrays, one per column. + Convert a recarray block to individual arrays, one per column. - Extracts recarray fields and creates separate array variables. Gives - each an appropriate grid- or tdis-aligned shape as opposed to sparse - list shape in terms of maxbound as previously. + Extract recarray fields and create separate array variables. For period + blocks, give each an appropriate grid- or time-aligned shape (nper, nnodes). + For other blocks, uses the declared dimensions directly. """ fields = list(block.values()) if fields[0]["type"] == "recarray": assert len(fields) == 1 - recarray_name = fields[0]["name"] - item = next(iter(fields[0]["children"].values())) + recarray_field = fields[0] + recarray_name = recarray_field["name"] + item = next(iter(recarray_field["children"].values())) columns = item["children"] + + # Get the original recarray shape to determine base dimensions + recarray_shape = recarray_field.get("shape") + if recarray_shape: + # Parse shape like "(nexg)" or "(maxbound)" + base_dims = recarray_shape[1:-1].split(",") + base_dims = [dim.strip() for dim in base_dims if dim.strip()] + else: + base_dims = [] else: recarray_name = None columns = block + base_dims = [] + + # Remove the original recarray field block.pop(recarray_name, None) + + # Handle cellid specially - it indicates spatial indexing cellid = columns.pop("cellid", None) + for col_name, column in columns.items(): col_copy = column.copy() old_dims = col_copy.get("shape") if old_dims: old_dims = old_dims[1:-1].split(",") - new_dims = ["nper"] - if cellid: - new_dims.append("nnodes") - if old_dims: - new_dims.extend([dim for dim in old_dims if dim != "maxbound"]) - col_copy["shape"] = f"({', '.join(new_dims)})" + old_dims = [dim.strip() for dim in old_dims if dim.strip()] + else: + old_dims = [] + + # Determine new dimensions based on block type + if block_name == "period": + # Period blocks get time + spatial dimensions + new_dims = ["nper"] + if cellid: + new_dims.append("nnodes") + # Add any additional dimensions, excluding maxbound + if old_dims: + new_dims.extend([dim for dim in old_dims if dim != "maxbound"]) + else: + # Non-period blocks use declared dimensions + new_dims = [] + if base_dims: + # Use the dimensions from the recarray shape + # Only drop maxbound if there are other meaningful dimensions + filtered_base_dims = [ + dim for dim in base_dims if dim != "maxbound" + ] + if filtered_base_dims: + new_dims.extend(filtered_base_dims) + else: + # Keep maxbound if no other dimensions are available + new_dims.extend(base_dims) + # Add any column-specific dimensions + if old_dims: + filtered_old_dims = [ + dim for dim in old_dims if dim != "maxbound" + ] + if filtered_old_dims: + new_dims.extend(filtered_old_dims) + else: + # Keep maxbound if no other dimensions are available + new_dims.extend(old_dims) + + if new_dims: + col_copy["shape"] = f"({', '.join(new_dims)})" + else: + # Scalar field + col_copy["shape"] = None + block[col_name] = col_copy return block @@ -499,10 +553,12 @@ def _fields() -> Fields: for block_name, block in groupby(fields.values(), lambda v: v["block"]) } - # if there's a period block, extract distinct arrays from - # the recarray-style definition - if (period_block := blocks.get("period", None)) is not None: - blocks["period"] = _convert_period_block(period_block) + # extract distinct arrays from recarray-style definitions in all blocks + for block_name, block in blocks.items(): + # Check if this block contains any recarray fields + has_recarray = any(field["type"] == "recarray" for field in block.values()) + if has_recarray: + blocks[block_name] = _convert_recarray_block(block, block_name) # remove unneeded variable attributes def remove_attrs(path, key, value): From 0c07240883318e8f5a19ab894be6e70aa3edfc79 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Tue, 19 Aug 2025 16:45:13 -0400 Subject: [PATCH 07/36] feat(dfn): mark models, exgs, slns multi-components in tomls (#235) multi attribute generalizes concept of multi-package to any component whose parent can have not just one but multiple --- modflow_devtools/dfn.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/modflow_devtools/dfn.py b/modflow_devtools/dfn.py index 7dba27b3..749129c7 100644 --- a/modflow_devtools/dfn.py +++ b/modflow_devtools/dfn.py @@ -626,11 +626,19 @@ def _rest(): return Ref(parent=parent, **rest) return None + sln = _sln() + multi = ( + _multi() + or sln is not None + or ("nam" in name and "sim" not in name) + or name.startswith("exg-") + ) + return cls( name=name, advanced=_advanced(), - multi=_multi(), - sln=_sln(), + multi=multi, + sln=sln, ref=_sub(), **blocks, ) From 8290fd2e8f1dcec355659c64bb2c6a954c580880 Mon Sep 17 00:00:00 2001 From: Mike Taves Date: Thu, 11 Sep 2025 23:45:27 +1200 Subject: [PATCH 08/36] refactor(dfn): rename _SCALAR_TYPES to public SCALAR_TYPES (#236) This variable is accessed externally (here), which means it should be a public variable, not private. Keep a private _SCALAR_TYPES copy for now. --- modflow_devtools/dfn.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modflow_devtools/dfn.py b/modflow_devtools/dfn.py index 749129c7..069beefc 100644 --- a/modflow_devtools/dfn.py +++ b/modflow_devtools/dfn.py @@ -115,7 +115,8 @@ def block_sort_key(item) -> int: ] -_SCALAR_TYPES = ("keyword", "integer", "double precision", "string") +SCALAR_TYPES = ("keyword", "integer", "double precision", "string") +_SCALAR_TYPES = SCALAR_TYPES # allow backwards compat; imported by flopy Dfns = dict[str, "Dfn"] @@ -130,7 +131,7 @@ def get_blocks(dfn: "Dfn") -> Blocks: """ def _is_block(item: tuple[str, Any]) -> bool: - k, v = item + k, _v = item return k not in Dfn.__annotations__ return dict( @@ -449,7 +450,7 @@ def _item() -> Field: return _convert_field(next(iter(flat.getlist(item_names[0])))) # implicit simple record (no children) - if all(t in _SCALAR_TYPES for t in item_types): + if all(t in SCALAR_TYPES for t in item_types): return Field( name=_name, type="record", @@ -529,7 +530,7 @@ def _fields() -> Fields: # for now, we can tell a var is an array if its type # is scalar and it has a shape. once we have proper # typing, this can be read off the type itself. - elif shape is not None and _type not in _SCALAR_TYPES: + elif shape is not None and _type not in SCALAR_TYPES: raise TypeError(f"Unsupported array type: {_type}") else: From 0f7503b1f7179c2a90ffdb7fff2e6f97a6199d0c Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Thu, 9 Oct 2025 14:30:14 -0400 Subject: [PATCH 09/36] fix(dfn): distinguish file format from schema version (#233) Up to now we conflated the format of definition files (DFN vs TOML) with the data schema we expect their contents to follow. Separate these concerns. Add a schema conversion layer for v1 -> v2 transformations. Refactoring/usability-wise, make Dfn a dataclass instead of a frankenstein typed dict. Miscellaneous other tidying. And drop the renaming of simulation and model components without "-nam". We can do that later, for now stay consistent. --- autotest/test_dfn.py | 133 +++-- modflow_devtools/dfn.py | 764 --------------------------- modflow_devtools/dfn/__init__.py | 523 ++++++++++++++++++ modflow_devtools/dfn/fetch.py | 29 + modflow_devtools/dfn/parse.py | 177 +++++++ modflow_devtools/dfn/schema/block.py | 22 + modflow_devtools/dfn/schema/field.py | 39 ++ modflow_devtools/dfn/schema/ref.py | 13 + modflow_devtools/dfn/schema/v1.py | 23 + modflow_devtools/dfn/schema/v2.py | 19 + modflow_devtools/dfn2toml.py | 96 ++-- 11 files changed, 953 insertions(+), 885 deletions(-) delete mode 100644 modflow_devtools/dfn.py create mode 100644 modflow_devtools/dfn/__init__.py create mode 100644 modflow_devtools/dfn/fetch.py create mode 100644 modflow_devtools/dfn/parse.py create mode 100644 modflow_devtools/dfn/schema/block.py create mode 100644 modflow_devtools/dfn/schema/field.py create mode 100644 modflow_devtools/dfn/schema/ref.py create mode 100644 modflow_devtools/dfn/schema/v1.py create mode 100644 modflow_devtools/dfn/schema/v2.py diff --git a/autotest/test_dfn.py b/autotest/test_dfn.py index 27704fa6..31573a68 100644 --- a/autotest/test_dfn.py +++ b/autotest/test_dfn.py @@ -2,23 +2,25 @@ import pytest -from modflow_devtools.dfn import Dfn, get_dfns +from modflow_devtools.dfn import _load_common, load, load_flat +from modflow_devtools.dfn.fetch import fetch_dfns from modflow_devtools.dfn2toml import convert from modflow_devtools.markers import requires_pkg PROJ_ROOT = Path(__file__).parents[1] DFN_DIR = PROJ_ROOT / "autotest" / "temp" / "dfn" TOML_DIR = DFN_DIR / "toml" -VERSIONS = {1: DFN_DIR, 2: TOML_DIR} +SPEC_DIRS = {1: DFN_DIR, 2: TOML_DIR} MF6_OWNER = "MODFLOW-ORG" MF6_REPO = "modflow6" MF6_REF = "develop" +EMPTY_DFNS = {"exg-gwfgwe", "exg-gwfgwt", "exg-gwfprt", "sln-ems"} def pytest_generate_tests(metafunc): if "dfn_name" in metafunc.fixturenames: if not any(DFN_DIR.glob("*.dfn")): - get_dfns(MF6_OWNER, MF6_REPO, MF6_REF, DFN_DIR, verbose=True) + fetch_dfns(MF6_OWNER, MF6_REPO, MF6_REF, DFN_DIR, verbose=True) dfn_names = [ dfn.stem for dfn in DFN_DIR.glob("*.dfn") if dfn.stem not in ["common", "flopy"] ] @@ -43,82 +45,73 @@ def test_load_v1(dfn_name): (DFN_DIR / "common.dfn").open() as common_file, (DFN_DIR / f"{dfn_name}.dfn").open() as dfn_file, ): - common, _ = Dfn._load_v1_flat(common_file) - dfn = Dfn.load(dfn_file, name=dfn_name, common=common) - assert any(dfn) + common = _load_common(common_file) + dfn = load(dfn_file, name=dfn_name, format="dfn", common=common) + assert any(dfn.fields) == (dfn.name not in EMPTY_DFNS) @requires_pkg("boltons") def test_load_v2(toml_name): with (TOML_DIR / f"{toml_name}.toml").open(mode="rb") as toml_file: - toml = Dfn.load(toml_file, name=toml_name, version=2) - assert any(toml) + dfn = load(toml_file, name=toml_name, format="toml") + assert any(dfn.fields) == (dfn.name not in EMPTY_DFNS) @requires_pkg("boltons") -@pytest.mark.parametrize("version", list(VERSIONS.keys())) -def test_load_all(version): - dfns = Dfn.load_all(VERSIONS[version], version=version) - assert any(dfns) +@pytest.mark.parametrize("schema_version", list(SPEC_DIRS.keys())) +def test_load_all(schema_version): + dfns = load_flat(path=SPEC_DIRS[schema_version]) + for dfn in dfns.values(): + assert any(dfn.fields) == (dfn.name not in EMPTY_DFNS) -@requires_pkg("boltons") -def test_load_tree(): - import tempfile - +@requires_pkg("boltons", "tomli") +def test_convert(function_tmpdir): import tomli - with tempfile.TemporaryDirectory() as tmp_dir: - tmp_path = Path(tmp_dir) - convert(DFN_DIR, tmp_path) - - # Test file conversion and naming - assert (tmp_path / "sim.toml").exists() - assert (tmp_path / "gwf.toml").exists() - assert not (tmp_path / "sim-nam.toml").exists() - - # Test parent relationships in files - with (tmp_path / "sim.toml").open("rb") as f: - sim_data = tomli.load(f) - assert sim_data["name"] == "sim" - assert "parent" not in sim_data - - with (tmp_path / "gwf.toml").open("rb") as f: - gwf_data = tomli.load(f) - assert gwf_data["name"] == "gwf" - assert gwf_data["parent"] == "sim" - - # Test hierarchy enforcement and completeness - dfns = Dfn.load_all(tmp_path, version=2) - roots = [name for name, dfn in dfns.items() if not dfn.get("parent")] - assert len(roots) == 1 - assert roots[0] == "sim" - - for dfn in dfns.values(): - parent = dfn.get("parent") - if parent: - assert parent in dfns - - # Test tree building and navigation - tree = Dfn.load_tree(tmp_path, version=2) - assert "sim" in tree - assert tree["sim"]["name"] == "sim" - - for model_type in ["gwf", "gwt", "gwe"]: - if model_type in tree["sim"]: - assert tree["sim"][model_type]["name"] == model_type - assert tree["sim"][model_type]["parent"] == "sim" - - if "gwf" in tree["sim"]: - gwf_packages = [ - k - for k in tree["sim"]["gwf"].keys() - if k.startswith("gwf-") and isinstance(tree["sim"]["gwf"][k], dict) - ] - assert len(gwf_packages) > 0 - - if "gwf-dis" in tree["sim"]["gwf"]: - dis = tree["sim"]["gwf"]["gwf-dis"] - assert dis["name"] == "gwf-dis" - assert dis["parent"] == "gwf" - assert "options" in dis or "dimensions" in dis + convert(DFN_DIR, function_tmpdir) + + assert (function_tmpdir / "sim-nam.toml").exists() + assert (function_tmpdir / "gwf-nam.toml").exists() + + with (function_tmpdir / "sim-nam.toml").open("rb") as f: + sim_data = tomli.load(f) + assert sim_data["name"] == "sim-nam" + assert sim_data["schema_version"] == "2" + assert "parent" not in sim_data + + with (function_tmpdir / "gwf-nam.toml").open("rb") as f: + gwf_data = tomli.load(f) + assert gwf_data["name"] == "gwf-nam" + assert gwf_data["parent"] == "sim-nam" + assert gwf_data["schema_version"] == "2" + + dfns = load_flat(function_tmpdir) + roots = [] + for dfn in dfns.values(): + if dfn.parent: + assert dfn.parent in dfns + else: + roots.append(dfn.name) + assert len(roots) == 1 + root = dfns[roots[0]] + assert root.name == "sim-nam" + + models = root.children or {} + for mdl in models: + assert models[mdl].name == mdl + assert models[mdl].parent == "sim-nam" + + if gwf := models.get("gwf-nam", None): + pkgs = gwf.children or {} + pkgs = { + k: v + for k, v in pkgs.items() + if k.startswith("gwf-") and isinstance(v, dict) + } + assert len(pkgs) > 0 + if dis := pkgs.get("gwf-dis", None): + assert dis.name == "gwf-dis" + assert dis.parent == "gwf" + assert "options" in (dis.blocks or {}) + assert "dimensions" in (dis.blocks or {}) diff --git a/modflow_devtools/dfn.py b/modflow_devtools/dfn.py deleted file mode 100644 index 069beefc..00000000 --- a/modflow_devtools/dfn.py +++ /dev/null @@ -1,764 +0,0 @@ -""" -MODFLOW 6 definition file tools. Includes types for field -and component specification, a parser for the original -DFN format as well as for TOML definition files, and -a function to fetch DFNs from the MF6 repository. -""" - -import shutil -import tempfile -from ast import literal_eval -from collections.abc import Mapping -from itertools import groupby -from os import PathLike -from pathlib import Path -from typing import ( - Any, - Literal, - Optional, - TypedDict, -) -from warnings import warn - -import tomli -from boltons.dictutils import OMD -from boltons.iterutils import remap - -from modflow_devtools.download import download_and_unzip -from modflow_devtools.misc import try_literal_eval - - -def try_parse_bool(value: Any) -> Any: - """ - Try to parse a boolean from a string as represented - in a DFN file, otherwise return the value unaltered. - """ - if isinstance(value, str): - value = value.lower() - if value in ["true", "false"]: - return value == "true" - return value - - -def field_attr_sort_key(item) -> int: - """ - Sort key for input field attributes. The order is: - -1. block - 0. name - 1. type - 2. shape - 3. default - 4. reader - 5. optional - 6. longname - 7. description - """ - - k, _ = item - if k == "block": - return -1 - if k == "name": - return 0 - if k == "type": - return 1 - if k == "shape": - return 2 - if k == "default": - return 3 - if k == "reader": - return 4 - if k == "optional": - return 5 - if k == "longname": - return 6 - if k == "description": - return 7 - return 8 - - -def block_sort_key(item) -> int: - k, _ = item - if k == "options": - return 0 - elif k == "dimensions": - return 1 - elif k == "griddata": - return 2 - elif k == "packagedata": - return 3 - elif "period" in k: - return 4 - else: - return 5 - - -FormatVersion = Literal[1, 2] -"""DFN format version number.""" - - -FieldType = Literal[ - "keyword", - "integer", - "double precision", - "string", - "record", - "recarray", - "keystring", -] - - -Reader = Literal[ - "urword", - "u1ddbl", - "u2ddbl", - "readarray", -] - - -SCALAR_TYPES = ("keyword", "integer", "double precision", "string") -_SCALAR_TYPES = SCALAR_TYPES # allow backwards compat; imported by flopy - - -Dfns = dict[str, "Dfn"] -Fields = dict[str, "Field"] -Block = Fields -Blocks = dict[str, Block] - - -def get_blocks(dfn: "Dfn") -> Blocks: - """ - Extract blocks from an input definition. - """ - - def _is_block(item: tuple[str, Any]) -> bool: - k, _v = item - return k not in Dfn.__annotations__ - - return dict( - sorted( - {k: v for k, v in dfn.items() if _is_block((k, v))}.items(), # type: ignore - key=block_sort_key, - ) - ) - - -def get_fields(dfn: "Dfn") -> Fields: - """ - Extract a flat dictionary of fields from an input definition. - Only top-level fields are included, i.e. subfields of records - or recarrays are not included. - """ - fields = {} - for block in get_blocks(dfn).values(): - for field in block.values(): - if field["name"] in fields: - warn(f"Duplicate field name {field['name']} in {dfn['name']}") - fields[field["name"]] = field - return fields - - -class Field(TypedDict): - """A field specification.""" - - name: str - type: FieldType - shape: Any | None - block: str | None - default: Any | None - children: Optional["Fields"] - description: str | None - reader: Reader - - -class Ref(TypedDict): - """ - A foreign-key-like reference between a file input variable - in a referring input component and another input component - referenced by it. Previously known as a "subpackage". - - A `Dfn` with a nonempty `ref` can be referred to by other - component definitions, via a filepath variable which acts - as a foreign key. If such a variable is detected when any - component is loaded, the component's `__init__` method is - modified, such that the variable named `val`, residing in - the referenced component, replaces the variable with name - `key` in the referencing component, i.e., the foreign key - filepath variable, This forces a referencing component to - accept a subcomponent's data directly, as if it were just - a variable, rather than indirectly, with the subcomponent - loaded up from a file identified by the filepath variable. - """ - - key: str - val: str - abbr: str - param: str - parent: str - description: str | None - - -class Sln(TypedDict): - """ - A solution package specification. - """ - - abbr: str - pattern: str - - -class Dfn(TypedDict): - """ - MODFLOW 6 input definition. An input definition - specifies a component in an MF6 simulation, e.g. - a model or package. A component contains input - variables, and may contain other metadata such - as foreign key references to other components - (i.e. subpackages), package-specific metadata - (e.g. for solutions), advanced package status, - and whether the component is a multi-package. - - An input definition must have a name. Other top- - level keys are blocks, which must be mappings of - `str` to `Field`, and metadata, of which only a - limited set of keys are allowed. Block names and - metadata keys may not overlap. - """ - - name: str - advanced: bool - multi: bool - parent: str | None - ref: Ref | None - sln: Sln | None - - @staticmethod # type: ignore[misc] - def _load_v1_flat(f, common: dict | None = None) -> tuple[Mapping, list[str]]: - field = {} - flat = [] - meta = [] - common = common or {} - - for line in f: - # remove whitespace/etc from the line - line = line.strip() - - # record context name and flopy metadata - # attributes, skip all other comment lines - if line.startswith("#"): - _, sep, tail = line.partition("flopy") - if sep == "flopy": - if ( - "multi-package" in tail - or "solution_package" in tail - or "subpackage" in tail - or "parent" in tail - ): - meta.append(tail.strip()) - _, sep, tail = line.partition("package-type") - if sep == "package-type": - meta.append(f"package-type {tail.strip()}") - continue - - # if we hit a newline and the parameter dict - # is nonempty, we've reached the end of its - # block of attributes - if not any(line): - if any(field): - flat.append((field["name"], field)) - field = {} - continue - - # split the attribute's key and value and - # store it in the parameter dictionary - key, _, value = line.partition(" ") - if key == "default_value": - key = "default" - field[key] = value - - # make substitutions from common variable definitions, - # remove backslashes, TODO: generate/insert citations. - descr = field.get("description", None) - if descr: - descr = descr.replace("\\", "").replace("``", "'").replace("''", "'") - _, replace, tail = descr.strip().partition("REPLACE") - if replace: - key, _, subs = tail.strip().partition(" ") - subs = literal_eval(subs) - cmmn = common.get(key, None) - if cmmn is None: - warn(f"Can't substitute description text, common variable not found: {key}") - else: - descr = cmmn.get("description", "") - if any(subs): - descr = descr.replace("\\", "").replace("{#1}", subs["{#1}"]) - field["description"] = descr - - # add the final parameter - if any(field): - flat.append((field["name"], field)) - - # the point of the OMD is to losslessly handle duplicate variable names - return OMD(flat), meta - - @classmethod # type: ignore[misc] - def _load_v1(cls, f, name, **kwargs) -> "Dfn": - """ - Temporary load routine for the v1 DFN format. - """ - - flat, meta = Dfn._load_v1_flat(f, **kwargs) - - def _convert_recarray_block(block: Block, block_name: str) -> Block: - """ - Convert a recarray block to individual arrays, one per column. - - Extract recarray fields and create separate array variables. For period - blocks, give each an appropriate grid- or time-aligned shape (nper, nnodes). - For other blocks, uses the declared dimensions directly. - """ - - fields = list(block.values()) - if fields[0]["type"] == "recarray": - assert len(fields) == 1 - recarray_field = fields[0] - recarray_name = recarray_field["name"] - item = next(iter(recarray_field["children"].values())) - columns = item["children"] - - # Get the original recarray shape to determine base dimensions - recarray_shape = recarray_field.get("shape") - if recarray_shape: - # Parse shape like "(nexg)" or "(maxbound)" - base_dims = recarray_shape[1:-1].split(",") - base_dims = [dim.strip() for dim in base_dims if dim.strip()] - else: - base_dims = [] - else: - recarray_name = None - columns = block - base_dims = [] - - # Remove the original recarray field - block.pop(recarray_name, None) - - # Handle cellid specially - it indicates spatial indexing - cellid = columns.pop("cellid", None) - - for col_name, column in columns.items(): - col_copy = column.copy() - old_dims = col_copy.get("shape") - if old_dims: - old_dims = old_dims[1:-1].split(",") - old_dims = [dim.strip() for dim in old_dims if dim.strip()] - else: - old_dims = [] - - # Determine new dimensions based on block type - if block_name == "period": - # Period blocks get time + spatial dimensions - new_dims = ["nper"] - if cellid: - new_dims.append("nnodes") - # Add any additional dimensions, excluding maxbound - if old_dims: - new_dims.extend([dim for dim in old_dims if dim != "maxbound"]) - else: - # Non-period blocks use declared dimensions - new_dims = [] - if base_dims: - # Use the dimensions from the recarray shape - # Only drop maxbound if there are other meaningful dimensions - filtered_base_dims = [ - dim for dim in base_dims if dim != "maxbound" - ] - if filtered_base_dims: - new_dims.extend(filtered_base_dims) - else: - # Keep maxbound if no other dimensions are available - new_dims.extend(base_dims) - # Add any column-specific dimensions - if old_dims: - filtered_old_dims = [ - dim for dim in old_dims if dim != "maxbound" - ] - if filtered_old_dims: - new_dims.extend(filtered_old_dims) - else: - # Keep maxbound if no other dimensions are available - new_dims.extend(old_dims) - - if new_dims: - col_copy["shape"] = f"({', '.join(new_dims)})" - else: - # Scalar field - col_copy["shape"] = None - - block[col_name] = col_copy - - return block - - def _convert_field(var: dict[str, Any]) -> Field: - """ - Convert an input field specification from its representation - in a v1 format definition file to the v2 (structured) format. - - Notes - ----- - If the field does not have a `default` attribute, it will - default to `False` if it is a keyword, otherwise to `None`. - - A filepath field whose name functions as a foreign key - for a separate context will be given a reference to it. - """ - - def _load(field) -> Field: - field = field.copy() - - # parse booleans from strings. everything else can - # stay a string except default values, which we'll - # try to parse as arbitrary literals below, and at - # some point types, once we introduce type hinting - field = {k: try_parse_bool(v) for k, v in field.items()} - - _name = field.pop("name") - _type = field.pop("type", None) - shape = field.pop("shape", None) - shape = None if shape == "" else shape - block = field.pop("block", None) - default = field.pop("default", None) - default = try_literal_eval(default) if _type != "string" else default - description = field.pop("description", "") - reader = field.pop("reader", "urword") - - def _item() -> Field: - """Load list item.""" - - item_names = _type.split()[1:] - item_types = [ - v["type"] - for v in flat.values(multi=True) - if v["name"] in item_names and v.get("in_record", False) - ] - n_item_names = len(item_names) - if n_item_names < 1: - raise ValueError(f"Missing list definition: {_type}") - - # explicit record - if n_item_names == 1 and ( - item_types[0].startswith("record") or item_types[0].startswith("keystring") - ): - return _convert_field(next(iter(flat.getlist(item_names[0])))) - - # implicit simple record (no children) - if all(t in SCALAR_TYPES for t in item_types): - return Field( - name=_name, - type="record", - block=block, - children=_fields(), - description=description.replace( - "is the list of", "is the record of" - ), - reader=reader, - **field, - ) - - # implicit complex record (has children) - fields = { - v["name"]: _convert_field(v) - for v in flat.values(multi=True) - if v["name"] in item_names and v.get("in_record", False) - } - first = next(iter(fields.values())) - single = len(fields) == 1 - item_type = "keystring" if single and "keystring" in first["type"] else "record" - return Field( - name=first["name"] if single else _name, - type=item_type, - block=block, - children=first["children"] if single else fields, - description=description.replace( - "is the list of", f"is the {item_type} of" - ), - reader=reader, - **field, - ) - - def _choices() -> Fields: - """Load keystring (union) choices.""" - names = _type.split()[1:] - return { - v["name"]: _convert_field(v) - for v in flat.values(multi=True) - if v["name"] in names and v.get("in_record", False) - } - - def _fields() -> Fields: - """Load record fields.""" - names = _type.split()[1:] - fields = {} - for name in names: - v = flat.get(name, None) - if not v or not v.get("in_record", False) or v["type"].startswith("record"): - continue - fields[name] = _convert_field(v) - return fields - - var_ = Field( - name=_name, - shape=shape, - block=block, - description=description, - default=default, - reader=reader, - **field, - ) - - if _type.startswith("recarray"): - item = _item() - var_["children"] = {item["name"]: item} - var_["type"] = "recarray" - - elif _type.startswith("keystring"): - var_["children"] = _choices() - var_["type"] = "keystring" - - elif _type.startswith("record"): - var_["children"] = _fields() - var_["type"] = "record" - - # for now, we can tell a var is an array if its type - # is scalar and it has a shape. once we have proper - # typing, this can be read off the type itself. - elif shape is not None and _type not in SCALAR_TYPES: - raise TypeError(f"Unsupported array type: {_type}") - - else: - var_["type"] = _type - - return var_ - - return dict(sorted(_load(var).items(), key=field_attr_sort_key)) - - # load top-level fields. any nested - # fields will be loaded recursively - fields = { - field["name"]: _convert_field(field) - for field in flat.values(multi=True) - if not field.get("in_record", False) - } - - # group variables by block - blocks = { - block_name: {v["name"]: v for v in block} - for block_name, block in groupby(fields.values(), lambda v: v["block"]) - } - - # extract distinct arrays from recarray-style definitions in all blocks - for block_name, block in blocks.items(): - # Check if this block contains any recarray fields - has_recarray = any(field["type"] == "recarray" for field in block.values()) - if has_recarray: - blocks[block_name] = _convert_recarray_block(block, block_name) - - # remove unneeded variable attributes - def remove_attrs(path, key, value): - if key in ["in_record", "tagged", "preserve_case"]: - return False - return True - - blocks = remap(blocks, visit=remove_attrs) - - def _advanced() -> bool | None: - return any("package-type advanced" in m for m in meta) - - def _multi() -> bool: - return any("multi-package" in m for m in meta) - - def _sln() -> Sln | None: - sln = next( - iter(m for m in meta if isinstance(m, str) and m.startswith("solution_package")), - None, - ) - if sln: - abbr, pattern = sln.split()[1:] - return Sln(abbr=abbr, pattern=pattern) - return None - - def _sub() -> Ref | None: - def _parent(): - line = next( - iter(m for m in meta if isinstance(m, str) and m.startswith("parent")), - None, - ) - if not line: - return None - split = line.split() - return split[1] - - def _rest(): - line = next( - iter(m for m in meta if isinstance(m, str) and m.startswith("subpac")), - None, - ) - if not line: - return None - _, key, abbr, param, val = line.split() - matches = [v for v in fields.values() if v["name"] == val] - if not any(matches): - descr = None - else: - if len(matches) > 1: - warn(f"Multiple matches for referenced variable {val}") - match = matches[0] - descr = match["description"] - - return { - "key": key, - "val": val, - "abbr": abbr, - "param": param, - "description": descr, - } - - parent = _parent() - rest = _rest() - if parent and rest: - return Ref(parent=parent, **rest) - return None - - sln = _sln() - multi = ( - _multi() - or sln is not None - or ("nam" in name and "sim" not in name) - or name.startswith("exg-") - ) - - return cls( - name=name, - advanced=_advanced(), - multi=multi, - sln=sln, - ref=_sub(), - **blocks, - ) - - @classmethod # type: ignore[misc] - def _load_v2(cls, f, name) -> "Dfn": - data = tomli.load(f) - if name and name != data.get("name", None): - raise ValueError(f"Name mismatch, expected {name}") - return cls(**data) - - @classmethod # type: ignore[misc] - def load( - cls, - f, - name: str | None = None, - version: FormatVersion = 1, - **kwargs, - ) -> "Dfn": - """ - Load a component definition from a definition file. - """ - - if version == 1: - return cls._load_v1(f, name, **kwargs) - elif version == 2: - return cls._load_v2(f, name) - else: - raise ValueError(f"Unsupported version, expected one of {version.__args__}") - - @staticmethod # type: ignore[misc] - def _load_all_v1(dfndir: PathLike) -> Dfns: - paths: list[Path] = [p for p in dfndir.glob("*.dfn") if p.stem not in ["common", "flopy"]] - - # load common variables - common_path: Path | None = dfndir / "common.dfn" - if not common_path.is_file(): - common = None - else: - with common_path.open() as f: - common, _ = Dfn._load_v1_flat(f) - - # load definitions - dfns: Dfns = {} - for path in paths: - with path.open() as f: - dfn = Dfn.load(f, name=path.stem, common=common) - dfns[path.stem] = dfn - - return dfns - - @staticmethod # type: ignore[misc] - def _load_all_v2(dfndir: PathLike) -> Dfns: - paths: list[Path] = [p for p in dfndir.glob("*.toml") if p.stem not in ["common", "flopy"]] - dfns: Dfns = {} - for path in paths: - with path.open(mode="rb") as f: - dfn = Dfn.load(f, name=path.stem, version=2) - dfns[path.stem] = dfn - - return dfns - - @staticmethod # type: ignore[misc] - def load_all(dfndir: PathLike, version: FormatVersion = 1) -> Dfns: - """Load all component definitions from the given directory.""" - if version == 1: - return Dfn._load_all_v1(dfndir) - elif version == 2: - return Dfn._load_all_v2(dfndir) - else: - raise ValueError(f"Unsupported version, expected one of {version.__args__}") - - @staticmethod # type: ignore[misc] - def load_tree(dfndir: PathLike, version: FormatVersion = 2) -> dict: - """Load all definitions and return as hierarchical tree.""" - dfns = Dfn.load_all(dfndir, version) - return infer_tree(dfns) - - -def infer_tree(dfns: dict[str, Dfn]) -> dict: - """Infer the component hierarchy from definitions. - - Enforces single root requirement - must be exactly one component - with no parent, and it must be named 'sim'. - """ - roots = [name for name, dfn in dfns.items() if not dfn.get("parent")] - - if len(roots) != 1: - raise ValueError( - f"Expected exactly one root component, found {len(roots)}: {roots}" - ) - - root_name = roots[0] - if root_name != "sim": - raise ValueError(f"Root component must be named 'sim', found '{root_name}'") - - def add_children(node_name: str) -> dict[str, Any]: - node = dict(dfns[node_name]) - children = [ - name for name, dfn in dfns.items() if dfn.get("parent") == node_name - ] - for child in children: - node[child] = add_children(child) - return node - - return {root_name: add_children(root_name)} - - -def get_dfns(owner: str, repo: str, ref: str, outdir: str | PathLike, verbose: bool = False): - """Fetch definition files from the MODFLOW 6 repository.""" - url = f"https://github.com/{owner}/{repo}/archive/{ref}.zip" - if verbose: - print(f"Downloading MODFLOW 6 repository from {url}") - with tempfile.TemporaryDirectory() as tmp: - dl_path = download_and_unzip(url, Path(tmp), verbose=verbose) - contents = list(dl_path.glob("modflow6-*")) - proj_path = next(iter(contents), None) - if not proj_path: - raise ValueError(f"Missing proj dir in {dl_path}, found {contents}") - if verbose: - print("Copying dfns from download dir to output dir") - shutil.copytree(proj_path / "doc" / "mf6io" / "mf6ivar" / "dfn", outdir, dirs_exist_ok=True) diff --git a/modflow_devtools/dfn/__init__.py b/modflow_devtools/dfn/__init__.py new file mode 100644 index 00000000..c532d099 --- /dev/null +++ b/modflow_devtools/dfn/__init__.py @@ -0,0 +1,523 @@ +""" +MODFLOW 6 definition file tools. +""" + +from abc import ABC, abstractmethod +from dataclasses import asdict, dataclass, replace +from itertools import groupby +from os import PathLike +from pathlib import Path +from typing import ( + Literal, + cast, +) + +import tomli +from boltons.dictutils import OMD +from boltons.iterutils import remap +from packaging.version import Version + +from modflow_devtools.dfn.parse import ( + is_advanced_package, + is_multi_package, + parse_dfn, + try_parse_bool, + try_parse_parent, +) +from modflow_devtools.dfn.schema.block import Block, Blocks +from modflow_devtools.dfn.schema.field import SCALAR_TYPES, Field, Fields +from modflow_devtools.dfn.schema.ref import Ref +from modflow_devtools.dfn.schema.v1 import FieldV1 +from modflow_devtools.dfn.schema.v2 import FieldV2 +from modflow_devtools.misc import drop_none_or_empty, try_literal_eval + +__all__ = [ + "SCALAR_TYPES", + "Block", + "Blocks", + "Dfn", + "Dfns", + "Field", + "FieldV1", + "FieldV2", + "Fields", + "Ref", + "load", + "load_flat", + "load_tree", + "map", + "to_flat", + "to_tree", +] + + +Format = Literal["dfn", "toml"] +"""DFN serialization format.""" + + +Dfns = dict[str, "Dfn"] + + +@dataclass +class Dfn: + """ + MODFLOW 6 input component definition. + """ + + schema_version: Version + name: str + parent: str | None = None + advanced: bool = False + multi: bool = False + ref: Ref | None = None + blocks: Blocks | None = None + children: Dfns | None = None + + @property + def fields(self) -> Fields: + """ + A combined map of fields from all blocks. + + Only top-level fields are included, no subfields of composites + such as records or recarrays. + """ + fields = [] + for block in (self.blocks or {}).values(): + for field in block.values(): + fields.append((field.name, field)) + + # for now return a multidict to support duplicate field names. + # TODO: change to normal dict after deprecating v1 schema + return OMD(fields) + + +class SchemaMap(ABC): + @abstractmethod + def map(self, dfn: Dfn) -> Dfn: ... + + +class MapV1To2(SchemaMap): + @staticmethod + def map_period_block(dfn: Dfn, block: Block) -> Block: + """ + Convert a period block recarray to individual arrays, one per column. + + Extracts recarray fields and creates separate array variables. Gives + each an appropriate grid- or tdis-aligned shape as opposed to sparse + list shape in terms of maxbound as previously. + """ + + block = dict(block) + fields = list(block.values()) + if fields[0].type == "recarray": + assert len(fields) == 1 + recarray_name = fields[0].name + block.pop(recarray_name, None) + item = next(iter((fields[0].children or {}).values())) + columns = dict(item.children or {}) + else: + recarray_name = None + columns = block + + cellid = columns.pop("cellid", None) + for col_name, column in columns.items(): + old_dims = column.shape + if old_dims: + old_dims = old_dims[1:-1].split(",") # type: ignore + new_dims = ["nper"] + if cellid: + new_dims.append("nnodes") + if old_dims: + new_dims.extend([dim for dim in old_dims if dim != "maxbound"]) + block[col_name] = replace(column, shape=f"({', '.join(new_dims)})") + + return block + + @staticmethod + def map_field(dfn: Dfn, field: Field) -> Field: + """ + Convert an input field specification from its representation + in a v1 format definition file to the v2 (structured) format. + + Notes + ----- + If the field does not have a `default` attribute, it will + default to `False` if it is a keyword, otherwise to `None`. + + A filepath field whose name functions as a foreign key + for a separate context will be given a reference to it. + """ + + fields = cast(OMD, dfn.fields) + + def _map_field(_field) -> Field: + field_dict = asdict(_field) + # parse booleans from strings. everything else can + # stay a string except default values, which we'll + # try to parse as arbitrary literals below, and at + # some point types, once we introduce type hinting + field_dict = {k: try_parse_bool(v) for k, v in field_dict.items()} + _name = field_dict.pop("name") + _type = field_dict.pop("type", None) + shape = field_dict.pop("shape", None) + shape = None if shape == "" else shape + block = field_dict.pop("block", None) + default = field_dict.pop("default", None) + default = try_literal_eval(default) if _type != "string" else default + description = field_dict.pop("description", "") + + def _row_field() -> Field: + """Parse a table's record (row) field""" + item_names = _type.split()[1:] + item_types = [ + f.type + for f in fields.values(multi=True) + if f.name in item_names and f.in_record + ] + n_item_names = len(item_names) + if n_item_names < 1: + raise ValueError(f"Missing list definition: {_type}") + + # explicit record or keystring + if n_item_names == 1 and ( + item_types[0].startswith("record") + or item_types[0].startswith("keystring") + ): + return MapV1To2.map_field( + dfn, next(iter(fields.getlist(item_names[0]))) + ) + + # implicit record with all scalar fields + if all(t in SCALAR_TYPES for t in item_types): + children = _record_fields() + return FieldV2.from_dict( + { + **field_dict, + "name": _name, + "type": "record", + "block": block, + "children": children, + "description": description.replace( + "is the list of", "is the record of" + ), + } + ) + + # implicit record with composite fields + children = { + f.name: MapV1To2.map_field(dfn, f) + for f in fields.values(multi=True) + if f.name in item_names and f.in_record + } + first = next(iter(children.values())) + if not first.type: + raise ValueError(f"Missing type for field: {first.name}") + single = len(children) == 1 + item_type = ( + "keystring" if single and "keystring" in first.type else "record" + ) + return FieldV2.from_dict( + { + "name": first.name if single else _name, + "type": item_type, + "block": block, + "children": first.children if single else children, + "description": description.replace( + "is the list of", f"is the {item_type} of" + ), + **field_dict, + } + ) + + def _union_fields() -> Fields: + """Parse a union's fields""" + names = _type.split()[1:] + return { + f.name: MapV1To2.map_field(dfn, f) + for f in fields.values(multi=True) + if f.name in names and f.in_record + } + + def _record_fields() -> Fields: + """Parse a record's fields""" + names = _type.split()[1:] + return { + f.name: _map_field(f) + for f in fields.values(multi=True) + if f.name in names + and f.in_record + and not f.type.startswith("record") + } + + _field = FieldV2.from_dict( + { + "name": _name, + "shape": shape, + "block": block, + "description": description, + "default": default, + **field_dict, + } + ) + + if _type.startswith("recarray"): + child = _row_field() + _field.children = {child.name: child} + _field.type = "recarray" + + elif _type.startswith("keystring"): + _field.children = _union_fields() + _field.type = "keystring" + + elif _type.startswith("record"): + _field.children = _record_fields() + _field.type = "record" + + # for now, we can tell a var is an array if its type + # is scalar and it has a shape. once we have proper + # typing, this can be read off the type itself. + elif shape is not None and _type not in SCALAR_TYPES: + raise TypeError(f"Unsupported array type: {_type}") + + else: + _field.type = _type + + return _field + + return _map_field(field) + + @staticmethod + def map_blocks(dfn: Dfn) -> Blocks: + fields = { + field.name: MapV1To2.map_field(dfn, field) + for field in cast(OMD, dfn.fields).values(multi=True) + if not field.in_record # type: ignore + } + block_dicts = { + block_name: {f.name: f for f in block} + for block_name, block in groupby(fields.values(), lambda f: f.block) + } + blocks = {} + + # Handle period blocks specially + if (period_block := block_dicts.get("period", None)) is not None: + blocks["period"] = MapV1To2.map_period_block(dfn, period_block) + + for block_name, block_data in block_dicts.items(): + if block_name != "period": + blocks[block_name] = block_data + + def remove_attrs(path, key, value): + # remove unneeded variable attributes + if key in ["in_record", "tagged", "preserve_case"]: + return False + return True + + return remap(blocks, visit=remove_attrs) + + def map(self, dfn: Dfn) -> Dfn: + if dfn.schema_version == (v2 := Version("2")): + return dfn + + return Dfn( + name=dfn.name, + advanced=dfn.advanced, + multi=dfn.multi, + ref=dfn.ref, + blocks=MapV1To2.map_blocks(dfn), + schema_version=v2, + parent=dfn.parent, + ) + + +def map( + dfn: Dfn, + schema_version: str | Version = "2", +) -> Dfn: + """Map a MODFLOW 6 specification to another schema version.""" + if dfn.schema_version == schema_version: + return dfn + elif Version(str(schema_version)) == Version("1"): + raise NotImplementedError("Mapping to schema version 1 is not implemented yet.") + elif Version(str(schema_version)) == Version("2"): + return MapV1To2().map(dfn) + raise ValueError(f"Unsupported schema version: {schema_version}. Expected 1 or 2.") + + +def load(f, format: str = "dfn", **kwargs) -> Dfn: + """Load a MODFLOW 6 definition file.""" + if format == "dfn": + name = kwargs.pop("name") + fields, meta = parse_dfn(f, **kwargs) + blocks = { + block_name: {field["name"]: FieldV1.from_dict(field) for field in block} + for block_name, block in groupby( + fields.values(), lambda field: field["block"] + ) + } + return Dfn( + name=name, + schema_version=Version("1"), + parent=try_parse_parent(meta), + advanced=is_advanced_package(meta), + multi=is_multi_package(meta), + blocks=blocks, + ) + + elif format == "toml": + data = tomli.load(f) + + dfn_fields = { + "name": data.pop("name", kwargs.pop("name", None)), + "schema_version": Version(str(data.pop("schema_version", "2"))), + "parent": data.pop("parent", None), + "advanced": data.pop("advanced", False), + "multi": data.pop("multi", False), + "ref": data.pop("ref", None), + } + + if (expected_name := kwargs.pop("name", None)) is not None: + if dfn_fields["name"] != expected_name: + raise ValueError( + f"DFN name mismatch: {expected_name} != {dfn_fields['name']}" + ) + + blocks = {} + for section_name, section_data in data.items(): + if isinstance(section_data, dict): + block_fields = {} + for field_name, field_data in section_data.items(): + if isinstance(field_data, dict): + block_fields[field_name] = FieldV2.from_dict(field_data) + else: + block_fields[field_name] = field_data + blocks[section_name] = block_fields # type: ignore + + dfn_fields["blocks"] = blocks if blocks else None + + return Dfn(**dfn_fields) + + raise ValueError(f"Unsupported format: {format}. Expected 'dfn' or 'toml'.") + + +def _load_common(f) -> Fields: + common, _ = parse_dfn(f) + return common + + +def load_flat(path: str | PathLike) -> Dfns: + """ + Load a flat MODFLOW 6 specification from definition files in a directory. + + Returns a dictionary of unlinked DFNs, i.e. without `children` populated. + Components will have `parent` populated if the schema is v2 but not if v1. + """ + exclude = ["common", "flopy"] + path = Path(path).expanduser().resolve() + dfn_paths = {p.stem: p for p in path.glob("*.dfn") if p.stem not in exclude} + toml_paths = {p.stem: p for p in path.glob("*.toml") if p.stem not in exclude} + dfns = {} + if dfn_paths: + with (path / "common.dfn").open() as f: + common = _load_common(f) + for dfn_name, dfn_path in dfn_paths.items(): + with dfn_path.open() as f: + dfns[dfn_name] = load(f, name=dfn_name, common=common, format="dfn") + if toml_paths: + for toml_name, toml_path in toml_paths.items(): + with toml_path.open("rb") as f: + dfns[toml_name] = load(f, name=toml_name, format="toml") + return dfns + + +def load_tree(path: str | PathLike) -> Dfn: + """ + Load a structured MODFLOW 6 specification from definition files in a directory. + + A single root component definition (the simulation) is returned. This contains + child (and grandchild) components for the relevant models and packages. + """ + return to_tree(load_flat(path)) + + +def to_tree(dfns: Dfns) -> Dfn: + """ + Infer the MODFLOW 6 input component hierarchy from a flat spec: + unlinked DFNs, i.e. without `children` populated, only `parent`. + + Returns the root component. There must be exactly one root, i.e. + component with no `parent`. Composite components have `children` + populated. + + Assumes DFNs are already in v2 schema, just lacking parent-child + links; before calling this function, map them first with `map()`. + """ + + def set_parent(dfn): + dfn = asdict(dfn) + if (dfn_name := dfn["name"]) == "sim-nam": + pass + elif dfn_name.endswith("-nam"): + dfn["parent"] = "sim-nam" + elif ( + dfn_name.startswith("exg-") + or dfn_name.startswith("sln-") + or dfn_name.startswith("utl-") + ): + dfn["parent"] = "sim-nam" + elif "-" in dfn_name: + mdl = dfn_name.split("-")[0] + dfn["parent"] = f"{mdl}-nam" + + return Dfn(**remap(dfn, visit=drop_none_or_empty)) + + dfns = {name: set_parent(dfn) for name, dfn in dfns.items()} + first_dfn = next(iter(dfns.values()), None) + match schema_version := str( + first_dfn.schema_version if first_dfn else Version("1") + ): + case "1": + raise NotImplementedError("Tree inference from v1 schema not implemented") + case "2": + if ( + nroots := len( + roots := { + name: dfn for name, dfn in dfns.items() if dfn.parent is None + } + ) + ) != 1: + raise ValueError(f"Expected one root component, found {nroots}") + + def _build_tree(node_name: str) -> Dfn: + node = dfns[node_name] + children = { + name: dfn for name, dfn in dfns.items() if dfn.parent == node_name + } + if any(children): + node.children = { + name: _build_tree(name) for name in children.keys() + } + return node + + return _build_tree(next(iter(roots.keys()))) + case _: + raise ValueError( + f"Unsupported schema version: {schema_version}. Expected 1 or 2." + ) + + +def to_flat(dfn: Dfn) -> Dfns: + """ + Flatten a MODFLOW 6 input component hierarchy to a flat spec: + unlinked DFNs, i.e. without `children` populated, only `parent`. + + Returns a dictionary of all components in the specification. + """ + + def _flatten(dfn: Dfn) -> Dfns: + dfns = {dfn.name: replace(dfn, children=None)} + for child in (dfn.children or {}).values(): + dfns.update(_flatten(child)) + return dfns + + return _flatten(dfn) diff --git a/modflow_devtools/dfn/fetch.py b/modflow_devtools/dfn/fetch.py new file mode 100644 index 00000000..34cdfa76 --- /dev/null +++ b/modflow_devtools/dfn/fetch.py @@ -0,0 +1,29 @@ +from os import PathLike +from pathlib import Path +from shutil import copytree +from tempfile import TemporaryDirectory + +from modflow_devtools.download import download_and_unzip + + +def fetch_dfns( + owner: str, repo: str, ref: str, outdir: str | PathLike, verbose: bool = False +): + """Fetch definition files from the MODFLOW 6 repository.""" + url = f"https://github.com/{owner}/{repo}/archive/{ref}.zip" + if verbose: + print(f"Downloading MODFLOW 6 repository archive from {url}") + with TemporaryDirectory() as tmp: + dl_path = download_and_unzip(url, Path(tmp), verbose=verbose) + contents = list(dl_path.glob("modflow6-*")) + proj_path = next(iter(contents), None) + if not proj_path: + raise ValueError(f"Missing proj dir in {dl_path}, found {contents}") + if verbose: + print("Copying dfns from download dir to output dir") + copytree( + proj_path / "doc" / "mf6io" / "mf6ivar" / "dfn", outdir, dirs_exist_ok=True + ) + + +get_dfns = fetch_dfns # alias for backward compatibility diff --git a/modflow_devtools/dfn/parse.py b/modflow_devtools/dfn/parse.py new file mode 100644 index 00000000..5716d9cf --- /dev/null +++ b/modflow_devtools/dfn/parse.py @@ -0,0 +1,177 @@ +from ast import literal_eval +from typing import Any +from warnings import warn + +from boltons.dictutils import OMD + + +def field_attr_sort_key(item) -> int: + """ + Sort key for input field attributes. The order is: + -1. block + 0. name + 1. type + 2. shape + 3. default + 4. reader + 5. optional + 6. longname + 7. description + """ + + k, _ = item + if k == "block": + return -1 + if k == "name": + return 0 + if k == "type": + return 1 + if k == "shape": + return 2 + if k == "default": + return 3 + if k == "reader": + return 4 + if k == "optional": + return 5 + if k == "longname": + return 6 + if k == "description": + return 7 + return 8 + + +def try_parse_bool(value: Any) -> Any: + """ + Try to parse a boolean from a string as represented + in a DFN file, otherwise return the value unaltered. + 1. `"true"` -> `True` + 2. `"false"` -> `False` + 3. anything else -> `value` + """ + if isinstance(value, str): + value = value.lower() + if value in ["true", "false"]: + return value == "true" + return value + + +def try_parse_parent(meta: list[str]) -> str | None: + """ + Try to parse a component's parent component name from its metadata. + Return `None` if it has no parent specified. + """ + line = next( + iter(m for m in meta if isinstance(m, str) and m.startswith("parent")), + None, + ) + if not line: + return None + split = line.split() + return split[1] + + +def is_advanced_package(meta: list[str]) -> bool: + """Determine if the component is an advanced package from its metadata.""" + return any("package-type advanced" in m for m in meta) + + +def is_multi_package(meta: list[str]) -> bool: + """Determine if the component is a multi-package from its metadata.""" + return any("multi-package" in m for m in meta) + + +def parse_dfn(f, common: dict | None = None) -> tuple[OMD, list[str]]: + """ + Parse a DFN file into an ordered dict of fields and a list of metadata. + + Parameters + ---------- + f : readable file-like + A file-like object to read the DFN file from. + common : dict, optional + A dictionary of common variable definitions to use for + description substitutions, by default None. + + Returns + ------- + tuple[OMD, list[str]] + A tuple containing an ordered multi-dict of fields and a list of metadata. + + Notes + ----- + A DFN file consists of field definitions (each as a set of attributes) and a + number of comment lines either a) containing metadata about the component or + b) delimiting variables into blocks. This parser reads the file line-by-line + and saves component metadata and field attributes, ignoring block delimiters; + There is a `block` attribute on each field anyway so delimiters are unneeded. + + The returned ordered multi-dict (OMD) maps names to dicts of their attributes, + with duplicate field names allowed. This is important because some DFN files + have fields with the same name defined multiple times for different purposes + (e.g., an `auxiliary` options block keyword, and column in the period block). + + """ + + common = common or {} + field: dict = {} + fields: list = [] + metadata: list = [] + + for line in f: + # parse metadata line + if (line := line.strip()).startswith("#"): + _, sep, tail = line.partition("flopy") + if sep == "flopy": + if ( + "multi-package" in tail + or "solution_package" in tail + or "subpackage" in tail + or "parent" in tail + ): + metadata.append(tail.strip()) + _, sep, tail = line.partition("package-type") + if sep == "package-type": + metadata.append(f"package-type {tail.strip()}") + continue + + # if we hit a newline and the field has attributes, + # we've reached the end of the field. Save it. + if not any(line): + if any(field): + fields.append((field["name"], field)) + field = {} + continue + + # parse field attribute + key, _, value = line.partition(" ") + if key == "default_value": + key = "default" + field[key] = value + + # if this is the description attribute, substitute + # from common variable definitions if needed. drop + # backslashes too, TODO: generate/insert citations. + if key == "description": + descr = value.replace("\\", "").replace("``", "'").replace("''", "'") + _, replace, tail = descr.strip().partition("REPLACE") + if replace: + key, _, subs = tail.strip().partition(" ") + subs = literal_eval(subs) + cmmn = common.get(key, None) + if cmmn is None: + warn( + "Can't substitute description text, " + f"common variable not found: {key}" + ) + else: + descr = cmmn["description"] + if any(subs): + descr = descr.replace("\\", "").replace("{#1}", subs["{#1}"]) # type: ignore + field["description"] = descr + + # Save the last field if needed. + if any(field): + fields.append((field["name"], field)) + + return OMD(fields), metadata diff --git a/modflow_devtools/dfn/schema/block.py b/modflow_devtools/dfn/schema/block.py new file mode 100644 index 00000000..ed0f32af --- /dev/null +++ b/modflow_devtools/dfn/schema/block.py @@ -0,0 +1,22 @@ +from collections.abc import Mapping + +from modflow_devtools.dfn.schema.field import Fields + +Block = Fields +Blocks = Mapping[str, Block] + + +def block_sort_key(item) -> int: + k, _ = item + if k == "options": + return 0 + elif k == "dimensions": + return 1 + elif k == "griddata": + return 2 + elif k == "packagedata": + return 3 + elif "period" in k: + return 4 + else: + return 5 diff --git a/modflow_devtools/dfn/schema/field.py b/modflow_devtools/dfn/schema/field.py new file mode 100644 index 00000000..d48bde68 --- /dev/null +++ b/modflow_devtools/dfn/schema/field.py @@ -0,0 +1,39 @@ +from collections.abc import Mapping +from dataclasses import dataclass +from typing import Any, Literal + +SCALAR_TYPES = ("keyword", "integer", "double precision", "string") + +Fields = Mapping[str, "Field"] + +FieldType = Literal[ + "keyword", + "integer", + "double precision", + "string", + "record", + "recarray", + "keystring", +] + + +Reader = Literal[ + "urword", + "u1ddbl", + "u2ddbl", + "readarray", +] + + +@dataclass(kw_only=True) +class Field: + name: str + type: str | None = None + block: str | None = None + default: Any | None = None + description: str | None = None + children: Fields | None = None + optional: bool | None = None + reader: Reader = "urword" + shape: str | None = None + valid: tuple[str, ...] | None = None diff --git a/modflow_devtools/dfn/schema/ref.py b/modflow_devtools/dfn/schema/ref.py new file mode 100644 index 00000000..e9306a2f --- /dev/null +++ b/modflow_devtools/dfn/schema/ref.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass + + +@dataclass +class Ref: + """ + A foreign-key-like reference between a file input variable + in a referring input component and another input component + referenced by it. + """ + + key: str # name of file path field in referring component + tgt: str # name of target component diff --git a/modflow_devtools/dfn/schema/v1.py b/modflow_devtools/dfn/schema/v1.py new file mode 100644 index 00000000..5919881e --- /dev/null +++ b/modflow_devtools/dfn/schema/v1.py @@ -0,0 +1,23 @@ +from dataclasses import dataclass + +from modflow_devtools.dfn.schema.field import Field + + +@dataclass(kw_only=True) +class FieldV1(Field): + valid: tuple[str, ...] | None = None + tagged: bool | None = None + in_record: bool | None = None + layered: bool | None = None + longname: str | None = None + preserve_case: bool | None = None + numeric_index: bool | None = None + deprecated: bool = False + removed: bool = False + mf6internal: str | None = None + + @classmethod + def from_dict(cls, d: dict) -> "FieldV1": + """Create a FieldV1 instance from a dictionary.""" + keys = list(cls.__annotations__.keys()) + list(Field.__annotations__.keys()) + return cls(**{k: v for k, v in d.items() if k in keys}) diff --git a/modflow_devtools/dfn/schema/v2.py b/modflow_devtools/dfn/schema/v2.py new file mode 100644 index 00000000..e13846cb --- /dev/null +++ b/modflow_devtools/dfn/schema/v2.py @@ -0,0 +1,19 @@ +from dataclasses import dataclass +from typing import Literal + +from modflow_devtools.dfn.schema.field import Field + +FieldType = Literal[ + "keyword", "integer", "double", "string", "array", "record", "union" +] + + +@dataclass(kw_only=True) +class FieldV2(Field): + pass + + @classmethod + def from_dict(cls, d: dict) -> "FieldV2": + """Create a FieldV2 instance from a dictionary.""" + keys = list(cls.__annotations__.keys()) + list(Field.__annotations__.keys()) + return cls(**{k: v for k, v in d.items() if k in keys}) diff --git a/modflow_devtools/dfn2toml.py b/modflow_devtools/dfn2toml.py index 7d346e6d..db7d5eaf 100644 --- a/modflow_devtools/dfn2toml.py +++ b/modflow_devtools/dfn2toml.py @@ -1,74 +1,61 @@ """Convert DFNs to TOML.""" import argparse +from dataclasses import asdict from os import PathLike from pathlib import Path import tomli_w as tomli from boltons.iterutils import remap -from modflow_devtools.dfn import Dfn +from modflow_devtools.dfn import load_flat, map, to_flat, to_tree +from modflow_devtools.dfn.schema.block import block_sort_key +from modflow_devtools.misc import drop_none_or_empty # mypy: ignore-errors -def convert(indir: PathLike, outdir: PathLike): +def convert(indir: PathLike, outdir: PathLike, schema_version: str = "2") -> None: indir = Path(indir).expanduser().absolute() outdir = Path(outdir).expanduser().absolute() outdir.mkdir(exist_ok=True, parents=True) - for dfn in Dfn.load_all(indir).values(): - dfn_name = dfn["name"] - # Determine new filename and parent relationship - if dfn_name == "sim-nam": - filename = "sim.toml" - dfn = dfn.copy() - dfn["name"] = "sim" - # No parent - this is root - elif dfn_name.endswith("-nam"): - # Model name files: gwf-nam -> gwf.toml, parent = "sim" - model_type = dfn_name[:-4] # Remove "-nam" - filename = f"{model_type}.toml" - dfn = dfn.copy() - dfn["name"] = model_type - dfn["parent"] = "sim" - elif dfn_name.startswith("exg-"): - # Exchanges: parent = "sim" - filename = f"{dfn_name}.toml" - dfn = dfn.copy() - dfn["parent"] = "sim" - elif dfn_name.startswith("sln-"): - # Solutions: parent = "sim" - filename = f"{dfn_name}.toml" - dfn = dfn.copy() - dfn["parent"] = "sim" - elif dfn_name.startswith("utl-"): - # Utilities: parent = "sim" - filename = f"{dfn_name}.toml" - dfn = dfn.copy() - dfn["parent"] = "sim" - elif "-" in dfn_name: - # Packages: gwf-dis -> parent = "gwf" - model_type = dfn_name.split("-")[0] - filename = f"{dfn_name}.toml" - dfn = dfn.copy() - dfn["parent"] = model_type - else: - # Default case - filename = f"{dfn_name}.toml" + dfns = { + name: map(dfn, schema_version=schema_version) + for name, dfn in load_flat(indir).items() + } + tree = to_tree(dfns) + flat = to_flat(tree) + for dfn_name, dfn in flat.items(): + with Path.open(outdir / f"{dfn_name}.toml", "wb") as f: + # TODO if we start using c/attrs, swap out + # all this for a custom unstructuring hook + dfn_dict = asdict(dfn) + dfn_dict["schema_version"] = str(dfn_dict["schema_version"]) + if dfn_dict.get("blocks"): + blocks = dfn_dict.pop("blocks") + for block_name, block_fields in blocks.items(): + if block_name not in dfn_dict: + dfn_dict[block_name] = {} + for field_name, field_data in block_fields.items(): + dfn_dict[block_name][field_name] = field_data - with Path.open(outdir / filename, "wb") as f: - - def drop_none_or_empty(path, key, value): - if value is None or value == "" or value == [] or value == {}: - return False - return True - - tomli.dump(remap(dfn, visit=drop_none_or_empty), f) + tomli.dump( + dict( + sorted( + remap(dfn_dict, visit=drop_none_or_empty).items(), + key=block_sort_key, + ) + ), + f, + ) if __name__ == "__main__": - """Convert DFN files to TOML.""" + """ + Convert DFN files in the original format and schema version (1) + to TOML files with a new schema version. + """ parser = argparse.ArgumentParser(description="Convert DFN files to TOML.") parser.add_argument( @@ -82,5 +69,12 @@ def drop_none_or_empty(path, key, value): "-o", help="Output directory.", ) + parser.add_argument( + "--schema-version", + "-s", + type=str, + default="2", + help="Schema version to convert to.", + ) args = parser.parse_args() - convert(args.indir, args.outdir) + convert(args.indir, args.outdir, args.schema_version) From dc0d0b5b43193215d6f10f34c9ddf207f5c28b82 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Sat, 11 Oct 2025 10:33:21 -0400 Subject: [PATCH 10/36] feat(dfn): add longname and developmode to field (#238) --- modflow_devtools/dfn/schema/field.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modflow_devtools/dfn/schema/field.py b/modflow_devtools/dfn/schema/field.py index d48bde68..713201aa 100644 --- a/modflow_devtools/dfn/schema/field.py +++ b/modflow_devtools/dfn/schema/field.py @@ -31,9 +31,11 @@ class Field: type: str | None = None block: str | None = None default: Any | None = None + longname: str | None = None description: str | None = None children: Fields | None = None optional: bool | None = None + developmode: bool = False reader: Reader = "urword" shape: str | None = None valid: tuple[str, ...] | None = None From 007532b908ca737ef7d431137bb90606d3986660 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Mon, 13 Oct 2025 08:55:23 -0400 Subject: [PATCH 11/36] refactor(dfn): rename default -> default_value in schema map (#240) Any changes like this should happen in the v1 -> v2 schema map instead of in the DFN parser --- modflow_devtools/dfn/__init__.py | 2 +- modflow_devtools/dfn/parse.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/modflow_devtools/dfn/__init__.py b/modflow_devtools/dfn/__init__.py index c532d099..4233ef25 100644 --- a/modflow_devtools/dfn/__init__.py +++ b/modflow_devtools/dfn/__init__.py @@ -162,7 +162,7 @@ def _map_field(_field) -> Field: shape = field_dict.pop("shape", None) shape = None if shape == "" else shape block = field_dict.pop("block", None) - default = field_dict.pop("default", None) + default = field_dict.pop("default_value", None) default = try_literal_eval(default) if _type != "string" else default description = field_dict.pop("description", "") diff --git a/modflow_devtools/dfn/parse.py b/modflow_devtools/dfn/parse.py index 5716d9cf..dc7f5e62 100644 --- a/modflow_devtools/dfn/parse.py +++ b/modflow_devtools/dfn/parse.py @@ -28,7 +28,7 @@ def field_attr_sort_key(item) -> int: return 1 if k == "shape": return 2 - if k == "default": + if k == "default_value": return 3 if k == "reader": return 4 @@ -145,8 +145,6 @@ def parse_dfn(f, common: dict | None = None) -> tuple[OMD, list[str]]: # parse field attribute key, _, value = line.partition(" ") - if key == "default_value": - key = "default" field[key] = value # if this is the description attribute, substitute From dac768e81f8c8bbd90f83ce7a89d6be2fdcfb314 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Wed, 15 Oct 2025 01:03:37 -0400 Subject: [PATCH 12/36] feat(dfn): add from_dict method (#241) Allows using Dfn/Field spec objects explicitly by passing a dictionary to __init__ via double star syntax, or via from_dict which handles field structuring automatically, and with strict=False ignores unrecognized keys (like pydantic's extra="ignore"), if strict=True unrecognized keys cause an error (like pydantic's extra="forbid") --- autotest/test_dfn.py | 236 +++++++++++++++++++++++++++++- modflow_devtools/dfn/__init__.py | 49 ++++++- modflow_devtools/dfn/schema/v1.py | 18 ++- modflow_devtools/dfn/schema/v2.py | 18 ++- 4 files changed, 315 insertions(+), 6 deletions(-) diff --git a/autotest/test_dfn.py b/autotest/test_dfn.py index 31573a68..b54090e5 100644 --- a/autotest/test_dfn.py +++ b/autotest/test_dfn.py @@ -1,9 +1,13 @@ +from dataclasses import asdict from pathlib import Path import pytest +from packaging.version import Version -from modflow_devtools.dfn import _load_common, load, load_flat +from modflow_devtools.dfn import Dfn, _load_common, load, load_flat from modflow_devtools.dfn.fetch import fetch_dfns +from modflow_devtools.dfn.schema.v1 import FieldV1 +from modflow_devtools.dfn.schema.v2 import FieldV2 from modflow_devtools.dfn2toml import convert from modflow_devtools.markers import requires_pkg @@ -115,3 +119,233 @@ def test_convert(function_tmpdir): assert dis.parent == "gwf" assert "options" in (dis.blocks or {}) assert "dimensions" in (dis.blocks or {}) + + +def test_dfn_from_dict_ignores_extra_keys(): + d = { + "schema_version": Version("2"), + "name": "test-dfn", + "extra_key": "should be allowed", + "another_extra": 123, + } + dfn = Dfn.from_dict(d) + assert dfn.name == "test-dfn" + assert dfn.schema_version == Version("2") + + +def test_dfn_from_dict_strict_mode(): + d = { + "schema_version": Version("2"), + "name": "test-dfn", + "extra_key": "should cause error", + } + with pytest.raises(ValueError, match="Unrecognized keys in DFN data"): + Dfn.from_dict(d, strict=True) + + +def test_dfn_from_dict_strict_mode_nested(): + d = { + "schema_version": Version("2"), + "name": "test-dfn", + "blocks": { + "options": { + "test_field": { + "name": "test_field", + "type": "keyword", + "extra_key": "should cause error", + }, + }, + }, + } + with pytest.raises(ValueError, match="Unrecognized keys in field data"): + Dfn.from_dict(d, strict=True) + + +def test_dfn_from_dict_roundtrip(): + original = Dfn( + schema_version=Version("2"), + name="gwf-nam", + parent="sim-nam", + advanced=False, + multi=True, + blocks={"options": {}}, + ) + d = asdict(original) + reconstructed = Dfn.from_dict(d) + assert reconstructed.name == original.name + assert reconstructed.schema_version == original.schema_version + assert reconstructed.parent == original.parent + assert reconstructed.advanced == original.advanced + assert reconstructed.multi == original.multi + assert reconstructed.blocks == original.blocks + + +def test_fieldv1_from_dict_ignores_extra_keys(): + d = { + "name": "test_field", + "type": "keyword", + "extra_key": "should be allowed", + "another_extra": 123, + } + field = FieldV1.from_dict(d) + assert field.name == "test_field" + assert field.type == "keyword" + + +def test_fieldv1_from_dict_strict_mode(): + d = { + "name": "test_field", + "type": "keyword", + "extra_key": "should cause error", + } + with pytest.raises(ValueError, match="Unrecognized keys in field data"): + FieldV1.from_dict(d, strict=True) + + +def test_fieldv1_from_dict_roundtrip(): + original = FieldV1( + name="maxbound", + type="integer", + block="dimensions", + description="maximum number of cells", + tagged=True, + ) + d = asdict(original) + reconstructed = FieldV1.from_dict(d) + assert reconstructed.name == original.name + assert reconstructed.type == original.type + assert reconstructed.block == original.block + assert reconstructed.description == original.description + assert reconstructed.tagged == original.tagged + + +def test_fieldv2_from_dict_ignores_extra_keys(): + d = { + "name": "test_field", + "type": "keyword", + "extra_key": "should be allowed", + "another_extra": 123, + } + field = FieldV2.from_dict(d) + assert field.name == "test_field" + assert field.type == "keyword" + + +def test_fieldv2_from_dict_strict_mode(): + d = { + "name": "test_field", + "type": "keyword", + "extra_key": "should cause error", + } + with pytest.raises(ValueError, match="Unrecognized keys in field data"): + FieldV2.from_dict(d, strict=True) + + +def test_fieldv2_from_dict_roundtrip(): + original = FieldV2( + name="nper", + type="integer", + block="dimensions", + description="number of stress periods", + optional=False, + ) + d = asdict(original) + reconstructed = FieldV2.from_dict(d) + assert reconstructed.name == original.name + assert reconstructed.type == original.type + assert reconstructed.block == original.block + assert reconstructed.description == original.description + assert reconstructed.optional == original.optional + + +def test_dfn_from_dict_with_v1_field_dicts(): + d = { + "schema_version": Version("1"), + "name": "test-dfn", + "blocks": { + "options": { + "save_flows": { + "name": "save_flows", + "type": "keyword", + "tagged": True, + "in_record": False, + }, + }, + }, + } + dfn = Dfn.from_dict(d) + assert dfn.schema_version == Version("1") + assert dfn.name == "test-dfn" + assert dfn.blocks is not None + assert "options" in dfn.blocks + assert "save_flows" in dfn.blocks["options"] + + field = dfn.blocks["options"]["save_flows"] + assert isinstance(field, FieldV1) + assert field.name == "save_flows" + assert field.type == "keyword" + assert field.tagged is True + assert field.in_record is False + + +def test_dfn_from_dict_with_v2_field_dicts(): + d = { + "schema_version": Version("2"), + "name": "test-dfn", + "blocks": { + "dimensions": { + "nper": { + "name": "nper", + "type": "integer", + "optional": False, + }, + }, + }, + } + dfn = Dfn.from_dict(d) + assert dfn.schema_version == Version("2") + assert dfn.name == "test-dfn" + assert dfn.blocks is not None + assert "dimensions" in dfn.blocks + assert "nper" in dfn.blocks["dimensions"] + + field = dfn.blocks["dimensions"]["nper"] + assert isinstance(field, FieldV2) + assert field.name == "nper" + assert field.type == "integer" + assert field.optional is False + + +def test_dfn_from_dict_defaults_to_v2_fields(): + d = { + "name": "test-dfn", + "blocks": { + "options": { + "some_field": { + "name": "some_field", + "type": "keyword", + }, + }, + }, + } + dfn = Dfn.from_dict(d) + assert dfn.blocks is not None + field = dfn.blocks["options"]["some_field"] + assert isinstance(field, FieldV2) + assert dfn.schema_version == Version("2") + + +def test_dfn_from_dict_with_already_deserialized_fields(): + field = FieldV2(name="test", type="keyword") + d = { + "schema_version": Version("2"), + "name": "test-dfn", + "blocks": { + "options": { + "test": field, + }, + }, + } + dfn = Dfn.from_dict(d) + assert dfn.blocks is not None + assert dfn.blocks["options"]["test"] is field diff --git a/modflow_devtools/dfn/__init__.py b/modflow_devtools/dfn/__init__.py index 4233ef25..1ad4f136 100644 --- a/modflow_devtools/dfn/__init__.py +++ b/modflow_devtools/dfn/__init__.py @@ -24,7 +24,7 @@ try_parse_bool, try_parse_parent, ) -from modflow_devtools.dfn.schema.block import Block, Blocks +from modflow_devtools.dfn.schema.block import Block, Blocks, block_sort_key from modflow_devtools.dfn.schema.field import SCALAR_TYPES, Field, Fields from modflow_devtools.dfn.schema.ref import Ref from modflow_devtools.dfn.schema.v1 import FieldV1 @@ -42,6 +42,7 @@ "FieldV2", "Fields", "Ref", + "block_sort_key", "load", "load_flat", "load_tree", @@ -90,6 +91,52 @@ def fields(self) -> Fields: # TODO: change to normal dict after deprecating v1 schema return OMD(fields) + @classmethod + def from_dict(cls, d: dict, strict: bool = False) -> "Dfn": + """ + Create a Dfn instance from a dictionary. + + Parameters + ---------- + d : dict + Dictionary containing DFN data + strict : bool, optional + If True, raise ValueError if dict contains unrecognized keys at the + top level or in nested field dicts. If False (default), ignore + unrecognized keys. + """ + keys = list(cls.__annotations__.keys()) + if strict: + extra_keys = set(d.keys()) - set(keys) + if extra_keys: + raise ValueError(f"Unrecognized keys in DFN data: {extra_keys}") + data = {k: v for k, v in d.items() if k in keys} + schema_version = data.get("schema_version", Version("2")) + field_cls = FieldV1 if schema_version == Version("1") else FieldV2 + + def _fields(block_name, block_data): + fields = {} + for field_name, field_data in block_data.items(): + if isinstance(field_data, dict): + fields[field_name] = field_cls.from_dict(field_data, strict=strict) + elif isinstance(field_data, field_cls): + fields[field_name] = field_data + else: + raise TypeError( + f"Invalid field data for {field_name} in block {block_name}: " + f"expected dict or Field, got {type(field_data)}" + ) + return fields + + if blocks := data.get("blocks"): + data["schema_version"] = schema_version + data["blocks"] = { + block_name: _fields(block_name, block_data) + for block_name, block_data in blocks.items() + } + + return cls(**data) + class SchemaMap(ABC): @abstractmethod diff --git a/modflow_devtools/dfn/schema/v1.py b/modflow_devtools/dfn/schema/v1.py index 5919881e..7d142ea4 100644 --- a/modflow_devtools/dfn/schema/v1.py +++ b/modflow_devtools/dfn/schema/v1.py @@ -17,7 +17,21 @@ class FieldV1(Field): mf6internal: str | None = None @classmethod - def from_dict(cls, d: dict) -> "FieldV1": - """Create a FieldV1 instance from a dictionary.""" + def from_dict(cls, d: dict, strict: bool = False) -> "FieldV1": + """ + Create a FieldV1 instance from a dictionary. + + Parameters + ---------- + d : dict + Dictionary containing field data + strict : bool, optional + If True, raise ValueError if dict contains unrecognized keys. + If False (default), ignore unrecognized keys. + """ keys = list(cls.__annotations__.keys()) + list(Field.__annotations__.keys()) + if strict: + extra_keys = set(d.keys()) - set(keys) + if extra_keys: + raise ValueError(f"Unrecognized keys in field data: {extra_keys}") return cls(**{k: v for k, v in d.items() if k in keys}) diff --git a/modflow_devtools/dfn/schema/v2.py b/modflow_devtools/dfn/schema/v2.py index e13846cb..2bdab02b 100644 --- a/modflow_devtools/dfn/schema/v2.py +++ b/modflow_devtools/dfn/schema/v2.py @@ -13,7 +13,21 @@ class FieldV2(Field): pass @classmethod - def from_dict(cls, d: dict) -> "FieldV2": - """Create a FieldV2 instance from a dictionary.""" + def from_dict(cls, d: dict, strict: bool = False) -> "FieldV2": + """ + Create a FieldV2 instance from a dictionary. + + Parameters + ---------- + d : dict + Dictionary containing field data + strict : bool, optional + If True, raise ValueError if dict contains unrecognized keys. + If False (default), ignore unrecognized keys. + """ keys = list(cls.__annotations__.keys()) + list(Field.__annotations__.keys()) + if strict: + extra_keys = set(d.keys()) - set(keys) + if extra_keys: + raise ValueError(f"Unrecognized keys in field data: {extra_keys}") return cls(**{k: v for k, v in d.items() if k in keys}) From 1d6bc8fefaae7cfc35c24d9f78243092a18c3295 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Wed, 15 Oct 2025 01:55:22 -0400 Subject: [PATCH 13/36] feat(dfn2toml): support single files, add validate mode (#242) --- autotest/test_dfn.py | 37 +++++++++- docs/md/dfns.md | 6 +- modflow_devtools/dfn2toml.py | 132 ++++++++++++++++++++++++++--------- 3 files changed, 139 insertions(+), 36 deletions(-) diff --git a/autotest/test_dfn.py b/autotest/test_dfn.py index b54090e5..a5974c2f 100644 --- a/autotest/test_dfn.py +++ b/autotest/test_dfn.py @@ -8,7 +8,7 @@ from modflow_devtools.dfn.fetch import fetch_dfns from modflow_devtools.dfn.schema.v1 import FieldV1 from modflow_devtools.dfn.schema.v2 import FieldV2 -from modflow_devtools.dfn2toml import convert +from modflow_devtools.dfn2toml import convert, validate from modflow_devtools.markers import requires_pkg PROJ_ROOT = Path(__file__).parents[1] @@ -349,3 +349,38 @@ def test_dfn_from_dict_with_already_deserialized_fields(): dfn = Dfn.from_dict(d) assert dfn.blocks is not None assert dfn.blocks["options"]["test"] is field + + +@requires_pkg("boltons") +def test_validate_directory(): + """Test validation on a directory of DFN files.""" + assert validate(DFN_DIR) is True + + +@requires_pkg("boltons") +def test_validate_single_file(dfn_name): + """Test validation on a single DFN file.""" + if dfn_name == "common": + pytest.skip("common.dfn is handled separately") + assert validate(DFN_DIR / f"{dfn_name}.dfn") is True + + +@requires_pkg("boltons") +def test_validate_common_file(): + """Test validation on common.dfn.""" + assert validate(DFN_DIR / "common.dfn") is True + + +@requires_pkg("boltons") +def test_validate_invalid_file(function_tmpdir): + """Test validation on an invalid DFN file.""" + invalid_dfn = function_tmpdir / "invalid.dfn" + invalid_dfn.write_text("invalid content") + assert validate(invalid_dfn) is False + + +@requires_pkg("boltons") +def test_validate_nonexistent_file(function_tmpdir): + """Test validation on a nonexistent file.""" + nonexistent = function_tmpdir / "nonexistent.dfn" + assert validate(nonexistent) is False diff --git a/docs/md/dfns.md b/docs/md/dfns.md index b4e5ad7f..3bc07482 100644 --- a/docs/md/dfns.md +++ b/docs/md/dfns.md @@ -22,8 +22,12 @@ Where legacy DFNs are flat lists of variables, with comments demarcating blocks, The `dfn` dependency group is necessary to use the TOML conversion utility. -To convert definition files to TOML, use: +To convert legacy format definition files to TOML, use: ```shell python -m modflow_devtools.dfn.dfn2toml -i -o ``` + +The tool may also be used on individual files. + +To validate legacy format definition files, use the `--validate` flag. diff --git a/modflow_devtools/dfn2toml.py b/modflow_devtools/dfn2toml.py index db7d5eaf..c4de3bb9 100644 --- a/modflow_devtools/dfn2toml.py +++ b/modflow_devtools/dfn2toml.py @@ -1,6 +1,7 @@ """Convert DFNs to TOML.""" import argparse +import sys from dataclasses import asdict from os import PathLike from pathlib import Path @@ -8,47 +9,99 @@ import tomli_w as tomli from boltons.iterutils import remap -from modflow_devtools.dfn import load_flat, map, to_flat, to_tree +from modflow_devtools.dfn import Dfn, load, load_flat, map, parse_dfn, to_flat, to_tree from modflow_devtools.dfn.schema.block import block_sort_key from modflow_devtools.misc import drop_none_or_empty # mypy: ignore-errors -def convert(indir: PathLike, outdir: PathLike, schema_version: str = "2") -> None: - indir = Path(indir).expanduser().absolute() +def validate(path: str | PathLike) -> bool: + """Validate DFN file(s) by attempting to parse them.""" + path = Path(path).expanduser().absolute() + try: + if not path.exists(): + raise FileNotFoundError(f"Path does not exist: {path}") + + if path.is_file(): + if path.name == "common.dfn": + with path.open() as f: + parse_dfn(f) + else: + common_path = path.parent / "common.dfn" + if common_path.exists(): + with common_path.open() as f: + common, _ = parse_dfn(f) + else: + common = {} + with path.open() as f: + load(f, name=path.stem, common=common, format="dfn") + else: + load_flat(path) + return True + except Exception as e: + print(f"Validation failed: {e}") + return False + + +def convert(inpath: PathLike, outdir: PathLike, schema_version: str = "2") -> None: + inpath = Path(inpath).expanduser().absolute() outdir = Path(outdir).expanduser().absolute() outdir.mkdir(exist_ok=True, parents=True) - dfns = { - name: map(dfn, schema_version=schema_version) - for name, dfn in load_flat(indir).items() - } - tree = to_tree(dfns) - flat = to_flat(tree) - for dfn_name, dfn in flat.items(): - with Path.open(outdir / f"{dfn_name}.toml", "wb") as f: - # TODO if we start using c/attrs, swap out - # all this for a custom unstructuring hook - dfn_dict = asdict(dfn) - dfn_dict["schema_version"] = str(dfn_dict["schema_version"]) - if dfn_dict.get("blocks"): - blocks = dfn_dict.pop("blocks") - for block_name, block_fields in blocks.items(): - if block_name not in dfn_dict: - dfn_dict[block_name] = {} - for field_name, field_data in block_fields.items(): - dfn_dict[block_name][field_name] = field_data - - tomli.dump( - dict( - sorted( - remap(dfn_dict, visit=drop_none_or_empty).items(), - key=block_sort_key, - ) - ), - f, - ) + if inpath.is_file(): + if inpath.name == "common.dfn": + raise ValueError("Cannot convert common.dfn as a standalone file") + + common_path = inpath.parent / "common.dfn" + if common_path.exists(): + with common_path.open() as f: + from modflow_devtools.dfn import parse_dfn + + common, _ = parse_dfn(f) + else: + common = {} + + with inpath.open() as f: + dfn = load(f, name=inpath.stem, common=common, format="dfn") + + dfn = map(dfn, schema_version=schema_version) + _convert(outdir / f"{inpath.stem}.toml", dfn) + else: + dfns = { + name: map(dfn, schema_version=schema_version) + for name, dfn in load_flat(inpath).items() + } + tree = to_tree(dfns) + flat = to_flat(tree) + for dfn_name, dfn in flat.items(): + _convert(outdir / f"{dfn_name}.toml", dfn) + + +def _convert(outpath: Path, dfn: Dfn) -> None: + """Write a DFN object to a TOML file.""" + with Path.open(outpath, "wb") as f: + # TODO if we start using c/attrs, swap out + # all this for a custom unstructuring hook + dfn_dict = asdict(dfn) + dfn_dict["schema_version"] = str(dfn_dict["schema_version"]) + if dfn_dict.get("blocks"): + blocks = dfn_dict.pop("blocks") + for block_name, block_fields in blocks.items(): + if block_name not in dfn_dict: + dfn_dict[block_name] = {} + for field_name, field_data in block_fields.items(): + dfn_dict[block_name][field_name] = field_data + + tomli.dump( + dict( + sorted( + remap(dfn_dict, visit=drop_none_or_empty).items(), + key=block_sort_key, + ) + ), + f, + ) if __name__ == "__main__": @@ -62,7 +115,7 @@ def convert(indir: PathLike, outdir: PathLike, schema_version: str = "2") -> Non "--indir", "-i", type=str, - help="Directory containing DFN files.", + help="Directory containing DFN files, or a single DFN file.", ) parser.add_argument( "--outdir", @@ -76,5 +129,16 @@ def convert(indir: PathLike, outdir: PathLike, schema_version: str = "2") -> Non default="2", help="Schema version to convert to.", ) + parser.add_argument( + "--validate", + "-v", + action="store_true", + help="Validate DFN files without converting them.", + ) args = parser.parse_args() - convert(args.indir, args.outdir, args.schema_version) + + if args.validate: + if not validate(args.indir): + sys.exit(1) + else: + convert(args.indir, args.outdir, args.schema_version) From 301fe4946f04850e70a7a5f4d6908226c3b6bf02 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Wed, 15 Oct 2025 15:57:28 -0400 Subject: [PATCH 14/36] fix(dfn): add missing field attributes, model type constant (#244) and move v1 FieldType and Reader to the v1 schema module where they belong --- modflow_devtools/dfn/schema/field.py | 23 ++--------------- modflow_devtools/dfn/schema/v1.py | 38 +++++++++++++++++++++++----- modflow_devtools/dfn/schema/v2.py | 7 ++--- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/modflow_devtools/dfn/schema/field.py b/modflow_devtools/dfn/schema/field.py index 713201aa..1557731a 100644 --- a/modflow_devtools/dfn/schema/field.py +++ b/modflow_devtools/dfn/schema/field.py @@ -1,29 +1,11 @@ from collections.abc import Mapping from dataclasses import dataclass -from typing import Any, Literal +from typing import Any SCALAR_TYPES = ("keyword", "integer", "double precision", "string") Fields = Mapping[str, "Field"] -FieldType = Literal[ - "keyword", - "integer", - "double precision", - "string", - "record", - "recarray", - "keystring", -] - - -Reader = Literal[ - "urword", - "u1ddbl", - "u2ddbl", - "readarray", -] - @dataclass(kw_only=True) class Field: @@ -34,8 +16,7 @@ class Field: longname: str | None = None description: str | None = None children: Fields | None = None - optional: bool | None = None + optional: bool = False developmode: bool = False - reader: Reader = "urword" shape: str | None = None valid: tuple[str, ...] | None = None diff --git a/modflow_devtools/dfn/schema/v1.py b/modflow_devtools/dfn/schema/v1.py index 7d142ea4..7c88bea8 100644 --- a/modflow_devtools/dfn/schema/v1.py +++ b/modflow_devtools/dfn/schema/v1.py @@ -1,20 +1,43 @@ from dataclasses import dataclass +from typing import Literal from modflow_devtools.dfn.schema.field import Field +FieldType = Literal[ + "keyword", + "integer", + "double precision", + "string", + "record", + "recarray", + "keystring", +] + + +Reader = Literal[ + "urword", + "u1ddbl", + "u2ddbl", + "readarray", +] + @dataclass(kw_only=True) class FieldV1(Field): valid: tuple[str, ...] | None = None - tagged: bool | None = None - in_record: bool | None = None + reader: Reader = "urword" + tagged: bool = False + in_record: bool = False layered: bool | None = None longname: str | None = None - preserve_case: bool | None = None - numeric_index: bool | None = None + preserve_case: bool = False + numeric_index: bool = False deprecated: bool = False removed: bool = False mf6internal: str | None = None + netcdf: str | None = None + block_variable: bool = False + just_data: bool = False @classmethod def from_dict(cls, d: dict, strict: bool = False) -> "FieldV1": @@ -29,9 +52,10 @@ def from_dict(cls, d: dict, strict: bool = False) -> "FieldV1": If True, raise ValueError if dict contains unrecognized keys. If False (default), ignore unrecognized keys. """ - keys = list(cls.__annotations__.keys()) + list(Field.__annotations__.keys()) + keys = set( + list(cls.__annotations__.keys()) + list(Field.__annotations__.keys()) + ) if strict: - extra_keys = set(d.keys()) - set(keys) - if extra_keys: + if extra_keys := set(d.keys()) - keys: raise ValueError(f"Unrecognized keys in field data: {extra_keys}") return cls(**{k: v for k, v in d.items() if k in keys}) diff --git a/modflow_devtools/dfn/schema/v2.py b/modflow_devtools/dfn/schema/v2.py index 2bdab02b..d3cff00d 100644 --- a/modflow_devtools/dfn/schema/v2.py +++ b/modflow_devtools/dfn/schema/v2.py @@ -25,9 +25,10 @@ def from_dict(cls, d: dict, strict: bool = False) -> "FieldV2": If True, raise ValueError if dict contains unrecognized keys. If False (default), ignore unrecognized keys. """ - keys = list(cls.__annotations__.keys()) + list(Field.__annotations__.keys()) + keys = set( + list(cls.__annotations__.keys()) + list(Field.__annotations__.keys()) + ) if strict: - extra_keys = set(d.keys()) - set(keys) - if extra_keys: + if extra_keys := set(d.keys()) - keys: raise ValueError(f"Unrecognized keys in field data: {extra_keys}") return cls(**{k: v for k, v in d.items() if k in keys}) From 5e591d708e9118c97fc95ff45b51ce8862c1e4ab Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Wed, 15 Oct 2025 21:14:05 -0400 Subject: [PATCH 15/36] fix(dfn): add list to v2 FieldType, drop union (#245) --- modflow_devtools/dfn/schema/v2.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modflow_devtools/dfn/schema/v2.py b/modflow_devtools/dfn/schema/v2.py index d3cff00d..b6afd680 100644 --- a/modflow_devtools/dfn/schema/v2.py +++ b/modflow_devtools/dfn/schema/v2.py @@ -3,9 +3,7 @@ from modflow_devtools.dfn.schema.field import Field -FieldType = Literal[ - "keyword", "integer", "double", "string", "array", "record", "union" -] +FieldType = Literal["keyword", "integer", "double", "string", "record", "array", "list"] @dataclass(kw_only=True) From dd90b715c6bfba013290114ca079e6630098244c Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Thu, 16 Oct 2025 12:23:53 -0400 Subject: [PATCH 16/36] fix(dfn): sort blocks, coerce schema_version to Version (#246) add a __post_init__ hook to Dfn --- modflow_devtools/dfn/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modflow_devtools/dfn/__init__.py b/modflow_devtools/dfn/__init__.py index 1ad4f136..4f357f92 100644 --- a/modflow_devtools/dfn/__init__.py +++ b/modflow_devtools/dfn/__init__.py @@ -91,6 +91,12 @@ def fields(self) -> Fields: # TODO: change to normal dict after deprecating v1 schema return OMD(fields) + def __post_init__(self): + if not isinstance(self.schema_version, Version): + self.schema_version = Version(str(self.schema_version)) + if self.blocks: + self.blocks = dict(sorted(self.blocks.items(), key=block_sort_key)) + @classmethod def from_dict(cls, d: dict, strict: bool = False) -> "Dfn": """ From 485dadfaeab3aad890b783d69a2d9fb494bf24c0 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Mon, 27 Oct 2025 08:43:59 -0400 Subject: [PATCH 17/36] chore(dfn): remove duplicate longname from v1 schema field (#249) longname is already an attribute on the base field --- modflow_devtools/dfn/schema/v1.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modflow_devtools/dfn/schema/v1.py b/modflow_devtools/dfn/schema/v1.py index 7c88bea8..5e885fa9 100644 --- a/modflow_devtools/dfn/schema/v1.py +++ b/modflow_devtools/dfn/schema/v1.py @@ -29,7 +29,6 @@ class FieldV1(Field): tagged: bool = False in_record: bool = False layered: bool | None = None - longname: str | None = None preserve_case: bool = False numeric_index: bool = False deprecated: bool = False From 94e167e6a1f04d85a890699268af1be01edd8de6 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Mon, 27 Oct 2025 19:42:16 -0400 Subject: [PATCH 18/36] fix(dfn): move SCALAR_TYPES to schema-version-specific modules (#250) --- modflow_devtools/dfn/__init__.py | 8 ++++---- modflow_devtools/dfn/schema/field.py | 2 -- modflow_devtools/dfn/schema/v1.py | 2 ++ modflow_devtools/dfn/schema/v2.py | 2 ++ 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/modflow_devtools/dfn/__init__.py b/modflow_devtools/dfn/__init__.py index 4f357f92..323d75af 100644 --- a/modflow_devtools/dfn/__init__.py +++ b/modflow_devtools/dfn/__init__.py @@ -25,14 +25,14 @@ try_parse_parent, ) from modflow_devtools.dfn.schema.block import Block, Blocks, block_sort_key -from modflow_devtools.dfn.schema.field import SCALAR_TYPES, Field, Fields +from modflow_devtools.dfn.schema.field import Field, Fields from modflow_devtools.dfn.schema.ref import Ref +from modflow_devtools.dfn.schema.v1 import SCALAR_TYPES as V1_SCALAR_TYPES from modflow_devtools.dfn.schema.v1 import FieldV1 from modflow_devtools.dfn.schema.v2 import FieldV2 from modflow_devtools.misc import drop_none_or_empty, try_literal_eval __all__ = [ - "SCALAR_TYPES", "Block", "Blocks", "Dfn", @@ -241,7 +241,7 @@ def _row_field() -> Field: ) # implicit record with all scalar fields - if all(t in SCALAR_TYPES for t in item_types): + if all(t in V1_SCALAR_TYPES for t in item_types): children = _record_fields() return FieldV2.from_dict( { @@ -329,7 +329,7 @@ def _record_fields() -> Fields: # for now, we can tell a var is an array if its type # is scalar and it has a shape. once we have proper # typing, this can be read off the type itself. - elif shape is not None and _type not in SCALAR_TYPES: + elif shape is not None and _type not in V1_SCALAR_TYPES: raise TypeError(f"Unsupported array type: {_type}") else: diff --git a/modflow_devtools/dfn/schema/field.py b/modflow_devtools/dfn/schema/field.py index 1557731a..1eeef3c7 100644 --- a/modflow_devtools/dfn/schema/field.py +++ b/modflow_devtools/dfn/schema/field.py @@ -2,8 +2,6 @@ from dataclasses import dataclass from typing import Any -SCALAR_TYPES = ("keyword", "integer", "double precision", "string") - Fields = Mapping[str, "Field"] diff --git a/modflow_devtools/dfn/schema/v1.py b/modflow_devtools/dfn/schema/v1.py index 5e885fa9..803cf584 100644 --- a/modflow_devtools/dfn/schema/v1.py +++ b/modflow_devtools/dfn/schema/v1.py @@ -13,6 +13,8 @@ "keystring", ] +SCALAR_TYPES = ("keyword", "integer", "double precision", "string") + Reader = Literal[ "urword", diff --git a/modflow_devtools/dfn/schema/v2.py b/modflow_devtools/dfn/schema/v2.py index b6afd680..89a2fe45 100644 --- a/modflow_devtools/dfn/schema/v2.py +++ b/modflow_devtools/dfn/schema/v2.py @@ -5,6 +5,8 @@ FieldType = Literal["keyword", "integer", "double", "string", "record", "array", "list"] +SCALAR_TYPES = ("keyword", "integer", "double", "string") + @dataclass(kw_only=True) class FieldV2(Field): From fba155cb0f465127adfe1ed11aacc56f3f71356d Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Wed, 29 Oct 2025 10:38:46 -0400 Subject: [PATCH 19/36] refactor(dfn): rename validate to is_valid and move to dfn module (#257) * rename validate -> is_valid and move to dfn module * trigger ci * fix field type conversion, add tests * tests for hairier parts of conversion --- .github/workflows/ci.yml | 1 + autotest/test_dfn.py | 246 ++++++++++++++++++++++++++++++- modflow_devtools/dfn/__init__.py | 32 +++- modflow_devtools/dfn2toml.py | 61 +++----- 4 files changed, 294 insertions(+), 46 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f7836db..bd9d92ba 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,6 +9,7 @@ on: branches: - main - develop + - dfn paths-ignore: - '**.md' - '.github/workflows/release.yml' diff --git a/autotest/test_dfn.py b/autotest/test_dfn.py index a5974c2f..dabcf066 100644 --- a/autotest/test_dfn.py +++ b/autotest/test_dfn.py @@ -8,7 +8,7 @@ from modflow_devtools.dfn.fetch import fetch_dfns from modflow_devtools.dfn.schema.v1 import FieldV1 from modflow_devtools.dfn.schema.v2 import FieldV2 -from modflow_devtools.dfn2toml import convert, validate +from modflow_devtools.dfn2toml import convert, is_valid from modflow_devtools.markers import requires_pkg PROJ_ROOT = Path(__file__).parents[1] @@ -354,7 +354,7 @@ def test_dfn_from_dict_with_already_deserialized_fields(): @requires_pkg("boltons") def test_validate_directory(): """Test validation on a directory of DFN files.""" - assert validate(DFN_DIR) is True + assert is_valid(DFN_DIR) @requires_pkg("boltons") @@ -362,13 +362,13 @@ def test_validate_single_file(dfn_name): """Test validation on a single DFN file.""" if dfn_name == "common": pytest.skip("common.dfn is handled separately") - assert validate(DFN_DIR / f"{dfn_name}.dfn") is True + assert is_valid(DFN_DIR / f"{dfn_name}.dfn") @requires_pkg("boltons") def test_validate_common_file(): """Test validation on common.dfn.""" - assert validate(DFN_DIR / "common.dfn") is True + assert is_valid(DFN_DIR / "common.dfn") @requires_pkg("boltons") @@ -376,11 +376,245 @@ def test_validate_invalid_file(function_tmpdir): """Test validation on an invalid DFN file.""" invalid_dfn = function_tmpdir / "invalid.dfn" invalid_dfn.write_text("invalid content") - assert validate(invalid_dfn) is False + assert not is_valid(invalid_dfn) @requires_pkg("boltons") def test_validate_nonexistent_file(function_tmpdir): """Test validation on a nonexistent file.""" nonexistent = function_tmpdir / "nonexistent.dfn" - assert validate(nonexistent) is False + assert not is_valid(nonexistent) + + +def test_fieldv1_to_fieldv2_conversion(): + """Test that FieldV1 instances are properly converted to FieldV2.""" + from modflow_devtools.dfn import map + + dfn_v1 = Dfn( + schema_version=Version("1"), + name="test-dfn", + blocks={ + "options": { + "save_flows": FieldV1( + name="save_flows", + type="keyword", + block="options", + description="save calculated flows", + tagged=True, + in_record=False, + reader="urword", + ), + "some_float": FieldV1( + name="some_float", + type="double precision", + block="options", + description="a floating point value", + ), + } + }, + ) + + dfn_v2 = map(dfn_v1, schema_version="2") + assert dfn_v2.schema_version == Version("2") + assert dfn_v2.blocks is not None + assert "options" in dfn_v2.blocks + assert "save_flows" in dfn_v2.blocks["options"] + + save_flows = dfn_v2.blocks["options"]["save_flows"] + assert isinstance(save_flows, FieldV2) + assert save_flows.name == "save_flows" + assert save_flows.type == "keyword" + assert save_flows.block == "options" + assert save_flows.description == "save calculated flows" + assert not hasattr(save_flows, "tagged") + assert not hasattr(save_flows, "in_record") + assert not hasattr(save_flows, "reader") + + some_float = dfn_v2.blocks["options"]["some_float"] + assert isinstance(some_float, FieldV2) + assert some_float.name == "some_float" + assert some_float.type == "double" + assert some_float.block == "options" + assert some_float.description == "a floating point value" + + +def test_fieldv1_to_fieldv2_conversion_with_children(): + """Test that FieldV1 with nested children are properly converted to FieldV2.""" + from modflow_devtools.dfn import map + + # Create nested fields for a record + child_field_v1 = FieldV1( + name="cellid", + type="integer", + block="period", + description="cell identifier", + in_record=True, + tagged=False, + ) + + parent_field_v1 = FieldV1( + name="stress_period_data", + type="recarray cellid", + block="period", + description="stress period data", + in_record=False, + ) + + dfn_v1 = Dfn( + schema_version=Version("1"), + name="test-dfn", + blocks={ + "period": { + "stress_period_data": parent_field_v1, + "cellid": child_field_v1, + } + }, + ) + + # Convert to v2 + dfn_v2 = map(dfn_v1, schema_version="2") + + # Check that all fields are FieldV2 instances + assert dfn_v2.blocks is not None + for block_name, block_fields in dfn_v2.blocks.items(): + for field_name, field in block_fields.items(): + assert isinstance(field, FieldV2) + # Check nested children too + if field.children: + for child_name, child_field in field.children.items(): + assert isinstance(child_field, FieldV2) + + +def test_period_block_conversion(): + """Test period block recarray conversion to individual arrays.""" + from modflow_devtools.dfn import map + + dfn_v1 = Dfn( + schema_version=Version("1"), + name="test-pkg", + blocks={ + "period": { + "stress_period_data": FieldV1( + name="stress_period_data", + type="recarray cellid q", + block="period", + description="stress period data", + ), + "cellid": FieldV1( + name="cellid", + type="integer", + block="period", + shape="(ncelldim)", + in_record=True, + ), + "q": FieldV1( + name="q", + type="double precision", + block="period", + shape="(maxbound)", + in_record=True, + ), + } + }, + ) + + dfn_v2 = map(dfn_v1, schema_version="2") + + period_block = dfn_v2.blocks["period"] + assert "cellid" not in period_block # cellid removed + assert "q" in period_block + assert isinstance(period_block["q"], FieldV2) + # Shape should be transformed: maxbound removed, nper and nnodes added + assert "nper" in period_block["q"].shape + assert "nnodes" in period_block["q"].shape + assert "maxbound" not in period_block["q"].shape + + +def test_record_type_conversion(): + """Test record type with multiple scalar fields.""" + from modflow_devtools.dfn import map + + dfn_v1 = Dfn( + schema_version=Version("1"), + name="test-dfn", + blocks={ + "options": { + "auxrecord": FieldV1( + name="auxrecord", + type="record auxiliary auxname", + block="options", + in_record=False, + ), + "auxiliary": FieldV1( + name="auxiliary", + type="keyword", + block="options", + in_record=True, + ), + "auxname": FieldV1( + name="auxname", + type="string", + block="options", + in_record=True, + ), + } + }, + ) + + dfn_v2 = map(dfn_v1, schema_version="2") + + auxrecord = dfn_v2.blocks["options"]["auxrecord"] + assert isinstance(auxrecord, FieldV2) + assert auxrecord.type == "record" + assert auxrecord.children is not None + assert "auxiliary" in auxrecord.children + assert "auxname" in auxrecord.children + assert isinstance(auxrecord.children["auxiliary"], FieldV2) + assert isinstance(auxrecord.children["auxname"], FieldV2) + + +def test_keystring_type_conversion(): + """Test keystring type conversion.""" + from modflow_devtools.dfn import map + + dfn_v1 = Dfn( + schema_version=Version("1"), + name="test-dfn", + blocks={ + "options": { + "obs_filerecord": FieldV1( + name="obs_filerecord", + type="record obs6 filein obs6_filename", + block="options", + tagged=True, + ), + "obs6": FieldV1( + name="obs6", + type="keyword", + block="options", + in_record=True, + ), + "filein": FieldV1( + name="filein", + type="keyword", + block="options", + in_record=True, + ), + "obs6_filename": FieldV1( + name="obs6_filename", + type="string", + block="options", + in_record=True, + preserve_case=True, + ), + } + }, + ) + + dfn_v2 = map(dfn_v1, schema_version="2") + + obs_rec = dfn_v2.blocks["options"]["obs_filerecord"] + assert isinstance(obs_rec, FieldV2) + assert obs_rec.type == "record" + assert obs_rec.children is not None + assert all(isinstance(child, FieldV2) for child in obs_rec.children.values()) diff --git a/modflow_devtools/dfn/__init__.py b/modflow_devtools/dfn/__init__.py index 323d75af..743a7ea7 100644 --- a/modflow_devtools/dfn/__init__.py +++ b/modflow_devtools/dfn/__init__.py @@ -43,6 +43,7 @@ "Fields", "Ref", "block_sort_key", + "is_valid", "load", "load_flat", "load_tree", @@ -333,7 +334,11 @@ def _record_fields() -> Fields: raise TypeError(f"Unsupported array type: {_type}") else: - _field.type = _type + # Map v1 type names to v2 type names + type_map = { + "double precision": "double", + } + _field.type = type_map.get(_type, _type) return _field @@ -574,3 +579,28 @@ def _flatten(dfn: Dfn) -> Dfns: return dfns return _flatten(dfn) + + +def is_valid(path: str | PathLike, format: str = "dfn", verbose: bool = False) -> bool: + """Validate DFN file(s).""" + path = Path(path).expanduser().absolute() + try: + if not path.exists(): + raise FileNotFoundError(f"Path does not exist: {path}") + + if path.is_file(): + common = {} # type: ignore + if (common_path := path.parent / "common.dfn").exists(): + with common_path.open() as f: + common, _ = parse_dfn(f) + if path.name == "common.dfn": + return True + with path.open() as f: + load(f, name=path.stem, common=common, format=format) + else: + load_flat(path) + return True + except Exception as e: + if verbose: + print(f"Validation failed: {e}") + return False diff --git a/modflow_devtools/dfn2toml.py b/modflow_devtools/dfn2toml.py index c4de3bb9..90104b95 100644 --- a/modflow_devtools/dfn2toml.py +++ b/modflow_devtools/dfn2toml.py @@ -2,6 +2,7 @@ import argparse import sys +import textwrap from dataclasses import asdict from os import PathLike from pathlib import Path @@ -9,42 +10,18 @@ import tomli_w as tomli from boltons.iterutils import remap -from modflow_devtools.dfn import Dfn, load, load_flat, map, parse_dfn, to_flat, to_tree +from modflow_devtools.dfn import Dfn, is_valid, load, load_flat, map, to_flat, to_tree from modflow_devtools.dfn.schema.block import block_sort_key from modflow_devtools.misc import drop_none_or_empty # mypy: ignore-errors -def validate(path: str | PathLike) -> bool: - """Validate DFN file(s) by attempting to parse them.""" - path = Path(path).expanduser().absolute() - try: - if not path.exists(): - raise FileNotFoundError(f"Path does not exist: {path}") - - if path.is_file(): - if path.name == "common.dfn": - with path.open() as f: - parse_dfn(f) - else: - common_path = path.parent / "common.dfn" - if common_path.exists(): - with common_path.open() as f: - common, _ = parse_dfn(f) - else: - common = {} - with path.open() as f: - load(f, name=path.stem, common=common, format="dfn") - else: - load_flat(path) - return True - except Exception as e: - print(f"Validation failed: {e}") - return False - - def convert(inpath: PathLike, outdir: PathLike, schema_version: str = "2") -> None: + """ + Convert DFN files in `inpath` to TOML files in `outdir`. + By default, convert the definitions to schema version 2. + """ inpath = Path(inpath).expanduser().absolute() outdir = Path(outdir).expanduser().absolute() outdir.mkdir(exist_ok=True, parents=True) @@ -66,7 +43,7 @@ def convert(inpath: PathLike, outdir: PathLike, schema_version: str = "2") -> No dfn = load(f, name=inpath.stem, common=common, format="dfn") dfn = map(dfn, schema_version=schema_version) - _convert(outdir / f"{inpath.stem}.toml", dfn) + _convert(dfn, outdir / f"{inpath.stem}.toml") else: dfns = { name: map(dfn, schema_version=schema_version) @@ -75,18 +52,16 @@ def convert(inpath: PathLike, outdir: PathLike, schema_version: str = "2") -> No tree = to_tree(dfns) flat = to_flat(tree) for dfn_name, dfn in flat.items(): - _convert(outdir / f"{dfn_name}.toml", dfn) + _convert(dfn, outdir / f"{dfn_name}.toml") -def _convert(outpath: Path, dfn: Dfn) -> None: - """Write a DFN object to a TOML file.""" +def _convert(dfn: Dfn, outpath: Path) -> None: with Path.open(outpath, "wb") as f: # TODO if we start using c/attrs, swap out # all this for a custom unstructuring hook dfn_dict = asdict(dfn) dfn_dict["schema_version"] = str(dfn_dict["schema_version"]) - if dfn_dict.get("blocks"): - blocks = dfn_dict.pop("blocks") + if blocks := dfn_dict.pop("blocks", None): for block_name, block_fields in blocks.items(): if block_name not in dfn_dict: dfn_dict[block_name] = {} @@ -106,11 +81,19 @@ def _convert(outpath: Path, dfn: Dfn) -> None: if __name__ == "__main__": """ - Convert DFN files in the original format and schema version (1) - to TOML files with a new schema version. + Convert DFN files in the original format and schema version 1 + to TOML files, by default also converting to schema version 2. """ - parser = argparse.ArgumentParser(description="Convert DFN files to TOML.") + parser = argparse.ArgumentParser( + description="Convert DFN files to TOML.", + epilog=textwrap.dedent( + """\ +Convert DFN files in the original format and schema version 1 +to TOML files, by default also converting to schema version 2. +""" + ), + ) parser.add_argument( "--indir", "-i", @@ -138,7 +121,7 @@ def _convert(outpath: Path, dfn: Dfn) -> None: args = parser.parse_args() if args.validate: - if not validate(args.indir): + if not is_valid(args.indir): sys.exit(1) else: convert(args.indir, args.outdir, args.schema_version) From d48c5f7edd536132b362e78f1592bef80c9c5125 Mon Sep 17 00:00:00 2001 From: w-bonelli Date: Sat, 1 Nov 2025 19:23:01 -0400 Subject: [PATCH 20/36] fix(dfn): fix record subfield order --- modflow_devtools/dfn/__init__.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/modflow_devtools/dfn/__init__.py b/modflow_devtools/dfn/__init__.py index 743a7ea7..488387ad 100644 --- a/modflow_devtools/dfn/__init__.py +++ b/modflow_devtools/dfn/__init__.py @@ -295,13 +295,18 @@ def _union_fields() -> Fields: def _record_fields() -> Fields: """Parse a record's fields""" names = _type.split()[1:] - return { - f.name: _map_field(f) - for f in fields.values(multi=True) - if f.name in names - and f.in_record - and not f.type.startswith("record") - } + result = {} + for name in names: + matching = [ + f + for f in fields.values(multi=True) + if f.name == name + and f.in_record + and not f.type.startswith("record") + ] + if matching: + result[name] = _map_field(matching[0]) + return result _field = FieldV2.from_dict( { From 5660d38d3b12d8ddd9e12b66c548f9093dd0dbae Mon Sep 17 00:00:00 2001 From: w-bonelli Date: Sat, 1 Nov 2025 19:26:47 -0400 Subject: [PATCH 21/36] fix(dfn): fix v1 -> v2 field type names recarray -> list, keystring -> union --- modflow_devtools/dfn/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modflow_devtools/dfn/__init__.py b/modflow_devtools/dfn/__init__.py index 488387ad..760267cd 100644 --- a/modflow_devtools/dfn/__init__.py +++ b/modflow_devtools/dfn/__init__.py @@ -163,7 +163,7 @@ def map_period_block(dfn: Dfn, block: Block) -> Block: block = dict(block) fields = list(block.values()) - if fields[0].type == "recarray": + if fields[0].type == "list": assert len(fields) == 1 recarray_name = fields[0].name block.pop(recarray_name, None) @@ -322,11 +322,11 @@ def _record_fields() -> Fields: if _type.startswith("recarray"): child = _row_field() _field.children = {child.name: child} - _field.type = "recarray" + _field.type = "list" elif _type.startswith("keystring"): _field.children = _union_fields() - _field.type = "keystring" + _field.type = "union" elif _type.startswith("record"): _field.children = _record_fields() From 9adb9221842619a6644d9e2a9422975bc12c1034 Mon Sep 17 00:00:00 2001 From: mjreno Date: Mon, 8 Dec 2025 07:57:04 -0500 Subject: [PATCH 22/36] dfn: add netcdf to v2 field (#265) --- modflow_devtools/dfn/schema/field.py | 1 + modflow_devtools/dfn/schema/v1.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/modflow_devtools/dfn/schema/field.py b/modflow_devtools/dfn/schema/field.py index 1eeef3c7..4730f48f 100644 --- a/modflow_devtools/dfn/schema/field.py +++ b/modflow_devtools/dfn/schema/field.py @@ -18,3 +18,4 @@ class Field: developmode: bool = False shape: str | None = None valid: tuple[str, ...] | None = None + netcdf: bool = False diff --git a/modflow_devtools/dfn/schema/v1.py b/modflow_devtools/dfn/schema/v1.py index 803cf584..7e36bc4b 100644 --- a/modflow_devtools/dfn/schema/v1.py +++ b/modflow_devtools/dfn/schema/v1.py @@ -36,7 +36,6 @@ class FieldV1(Field): deprecated: bool = False removed: bool = False mf6internal: str | None = None - netcdf: str | None = None block_variable: bool = False just_data: bool = False From 603f5ec8c55125a8c28b1dd3453563f84db720e4 Mon Sep 17 00:00:00 2001 From: w-bonelli Date: Tue, 20 Jan 2026 10:25:17 -0500 Subject: [PATCH 23/36] implement per plan --- autotest/test_dfn.py | 6 +- autotest/test_dfn_registry.py | 656 +++++++++++++++++++++++ modflow_devtools/dfn/__init__.py | 327 ++++++++++-- modflow_devtools/dfn/__main__.py | 267 ++++++++++ modflow_devtools/dfn/dfns.toml | 24 + modflow_devtools/dfn/fetch.py | 8 +- modflow_devtools/dfn/make_registry.py | 184 +++++++ modflow_devtools/dfn/parse.py | 5 +- modflow_devtools/dfn/registry.py | 741 ++++++++++++++++++++++++++ modflow_devtools/dfn/schema/v1.py | 4 +- modflow_devtools/dfn/schema/v2.py | 4 +- modflow_devtools/dfn2toml.py | 3 +- pyproject.toml | 5 + 13 files changed, 2178 insertions(+), 56 deletions(-) create mode 100644 autotest/test_dfn_registry.py create mode 100644 modflow_devtools/dfn/__main__.py create mode 100644 modflow_devtools/dfn/dfns.toml create mode 100644 modflow_devtools/dfn/make_registry.py create mode 100644 modflow_devtools/dfn/registry.py diff --git a/autotest/test_dfn.py b/autotest/test_dfn.py index dabcf066..9eb0f401 100644 --- a/autotest/test_dfn.py +++ b/autotest/test_dfn.py @@ -108,11 +108,7 @@ def test_convert(function_tmpdir): if gwf := models.get("gwf-nam", None): pkgs = gwf.children or {} - pkgs = { - k: v - for k, v in pkgs.items() - if k.startswith("gwf-") and isinstance(v, dict) - } + pkgs = {k: v for k, v in pkgs.items() if k.startswith("gwf-") and isinstance(v, dict)} assert len(pkgs) > 0 if dis := pkgs.get("gwf-dis", None): assert dis.name == "gwf-dis" diff --git a/autotest/test_dfn_registry.py b/autotest/test_dfn_registry.py new file mode 100644 index 00000000..9644d86b --- /dev/null +++ b/autotest/test_dfn_registry.py @@ -0,0 +1,656 @@ +"""Tests for the DFNs API registry infrastructure.""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import patch + +import pytest +from packaging.version import Version + +from modflow_devtools.dfn.fetch import fetch_dfns +from modflow_devtools.markers import requires_pkg + +PROJ_ROOT = Path(__file__).parents[1] +DFN_DIR = PROJ_ROOT / "autotest" / "temp" / "dfn" +MF6_OWNER = "MODFLOW-ORG" +MF6_REPO = "modflow6" +MF6_REF = "develop" + + +@pytest.fixture(scope="module") +def dfn_dir(): + """Ensure DFN files are downloaded for testing.""" + if not any(DFN_DIR.glob("*.dfn")): + fetch_dfns(MF6_OWNER, MF6_REPO, MF6_REF, DFN_DIR, verbose=True) + return DFN_DIR + + +# ============================================================================= +# DfnSpec Tests +# ============================================================================= + + +@requires_pkg("boltons") +class TestDfnSpec: + """Tests for the DfnSpec class.""" + + def test_load_from_directory(self, dfn_dir): + """Test loading a DfnSpec from a directory of DFN files.""" + from modflow_devtools.dfn import DfnSpec + + spec = DfnSpec.load(dfn_dir) + + # Should have loaded and mapped to v2 + assert spec.schema_version == Version("2") + assert spec.root is not None + assert spec.root.name == "sim-nam" + + def test_load_with_explicit_schema_version(self, dfn_dir): + """Test loading with explicit schema version.""" + from modflow_devtools.dfn import DfnSpec + + spec = DfnSpec.load(dfn_dir, schema_version="2") + + assert spec.schema_version == Version("2") + + def test_mapping_protocol(self, dfn_dir): + """Test that DfnSpec implements the Mapping protocol.""" + from modflow_devtools.dfn import DfnSpec + + spec = DfnSpec.load(dfn_dir) + + # Test __len__ + assert len(spec) > 100 # Should have many components + + # Test __iter__ + names = list(spec) + assert "sim-nam" in names + assert "gwf-nam" in names + assert "gwf-chd" in names + + # Test __getitem__ + gwf_chd = spec["gwf-chd"] + assert gwf_chd.name == "gwf-chd" + assert gwf_chd.parent == "gwf-nam" + + # Test __contains__ + assert "gwf-chd" in spec + assert "nonexistent" not in spec + + # Test keys(), values(), items() + assert "gwf-wel" in spec.keys() + assert any(d.name == "gwf-wel" for d in spec.values()) + assert any(n == "gwf-wel" for n, d in spec.items()) + + def test_getitem_raises_keyerror(self, dfn_dir): + """Test that __getitem__ raises KeyError for missing components.""" + from modflow_devtools.dfn import DfnSpec + + spec = DfnSpec.load(dfn_dir) + + with pytest.raises(KeyError, match="nonexistent"): + _ = spec["nonexistent"] + + def test_hierarchical_access(self, dfn_dir): + """Test accessing components through the hierarchical tree.""" + from modflow_devtools.dfn import DfnSpec + + spec = DfnSpec.load(dfn_dir) + + # Root should be sim-nam + assert spec.root.name == "sim-nam" + + # Root should have children + assert spec.root.children is not None + assert "gwf-nam" in spec.root.children + + # gwf-nam should have its own children + gwf_nam = spec.root.children["gwf-nam"] + assert gwf_nam.children is not None + assert "gwf-chd" in gwf_nam.children + + def test_load_empty_directory_raises(self, tmp_path): + """Test that loading from empty directory raises ValueError.""" + from modflow_devtools.dfn import DfnSpec + + with pytest.raises(ValueError, match="No DFN files found"): + DfnSpec.load(tmp_path) + + +# ============================================================================= +# Bootstrap and Registry Schema Tests +# ============================================================================= + + +@requires_pkg("pydantic") +class TestBootstrapConfig: + """Tests for bootstrap configuration schemas.""" + + def test_source_config_defaults(self): + """Test SourceConfig default values.""" + from modflow_devtools.dfn.registry import SourceConfig + + config = SourceConfig(repo="owner/repo") + + assert config.repo == "owner/repo" + assert config.dfn_path == "doc/mf6io/mf6ivar/dfn" + assert config.registry_path == ".registry/dfns.toml" + assert config.refs == [] + + def test_source_config_custom_values(self): + """Test SourceConfig with custom values.""" + from modflow_devtools.dfn.registry import SourceConfig + + config = SourceConfig( + repo="custom/repo", + dfn_path="custom/path", + registry_path="custom/registry.toml", + refs=["main", "v1.0"], + ) + + assert config.repo == "custom/repo" + assert config.dfn_path == "custom/path" + assert config.registry_path == "custom/registry.toml" + assert config.refs == ["main", "v1.0"] + + def test_bootstrap_config_load(self, tmp_path): + """Test loading BootstrapConfig from TOML file.""" + from modflow_devtools.dfn.registry import BootstrapConfig + + config_file = tmp_path / "dfns.toml" + config_file.write_text(""" +[sources.test] +repo = "test/repo" +refs = ["main"] +""") + + config = BootstrapConfig.load(config_file) + + assert "test" in config.sources + assert config.sources["test"].repo == "test/repo" + assert config.sources["test"].refs == ["main"] + + def test_bootstrap_config_load_nonexistent(self, tmp_path): + """Test loading from nonexistent file returns empty config.""" + from modflow_devtools.dfn.registry import BootstrapConfig + + config = BootstrapConfig.load(tmp_path / "nonexistent.toml") + + assert config.sources == {} + + def test_bootstrap_config_merge(self): + """Test merging two bootstrap configs.""" + from modflow_devtools.dfn.registry import BootstrapConfig, SourceConfig + + base = BootstrapConfig( + sources={ + "source1": SourceConfig(repo="base/source1", refs=["v1"]), + "source2": SourceConfig(repo="base/source2"), + } + ) + overlay = BootstrapConfig( + sources={ + "source1": SourceConfig(repo="overlay/source1", refs=["v2"]), + "source3": SourceConfig(repo="overlay/source3"), + } + ) + + merged = BootstrapConfig.merge(base, overlay) + + # overlay overrides base for source1 + assert merged.sources["source1"].repo == "overlay/source1" + assert merged.sources["source1"].refs == ["v2"] + # source2 from base preserved + assert merged.sources["source2"].repo == "base/source2" + # source3 from overlay added + assert merged.sources["source3"].repo == "overlay/source3" + + def test_get_bootstrap_config(self): + """Test loading bundled bootstrap config.""" + from modflow_devtools.dfn.registry import get_bootstrap_config + + config = get_bootstrap_config() + + assert "modflow6" in config.sources + assert config.sources["modflow6"].repo == "MODFLOW-ORG/modflow6" + + +@requires_pkg("pydantic") +class TestRegistryMeta: + """Tests for registry metadata schemas.""" + + def test_dfn_registry_file(self): + """Test DfnRegistryFile schema.""" + from modflow_devtools.dfn.registry import DfnRegistryFile + + file_entry = DfnRegistryFile(hash="sha256:abc123") + assert file_entry.hash == "sha256:abc123" + + def test_dfn_registry_meta_defaults(self): + """Test DfnRegistryMeta default values.""" + from modflow_devtools.dfn.registry import DfnRegistryMeta + + meta = DfnRegistryMeta() + + assert meta.schema_version == "1.0" + assert meta.generated_at is None + assert meta.devtools_version is None + assert meta.ref is None + assert meta.files == {} + + def test_dfn_registry_meta_load(self, tmp_path): + """Test loading DfnRegistryMeta from TOML file.""" + from modflow_devtools.dfn.registry import DfnRegistryMeta + + registry_file = tmp_path / "dfns.toml" + registry_file.write_text(""" +schema_version = "1.0" + +[metadata] +ref = "6.6.0" + +[files."gwf-chd.dfn"] +hash = "sha256:abc123" + +[files."gwf-wel.dfn"] +hash = "sha256:def456" +""") + + meta = DfnRegistryMeta.load(registry_file) + + assert meta.schema_version == "1.0" + assert meta.ref == "6.6.0" + assert len(meta.files) == 2 + assert meta.files["gwf-chd.dfn"].hash == "sha256:abc123" + assert meta.files["gwf-wel.dfn"].hash == "sha256:def456" + + def test_dfn_registry_meta_save(self, tmp_path): + """Test saving DfnRegistryMeta to TOML file.""" + import tomli + + from modflow_devtools.dfn.registry import DfnRegistryFile, DfnRegistryMeta + + meta = DfnRegistryMeta( + schema_version="1.0", + ref="test-ref", + files={ + "test.dfn": DfnRegistryFile(hash="sha256:abc123"), + }, + ) + + output_path = tmp_path / "output.toml" + meta.save(output_path) + + assert output_path.exists() + + with output_path.open("rb") as f: + data = tomli.load(f) + + assert data["schema_version"] == "1.0" + assert data["metadata"]["ref"] == "test-ref" + assert data["files"]["test.dfn"]["hash"] == "sha256:abc123" + + +# ============================================================================= +# LocalDfnRegistry Tests +# ============================================================================= + + +@requires_pkg("boltons", "pydantic") +class TestLocalDfnRegistry: + """Tests for LocalDfnRegistry class.""" + + def test_init(self, dfn_dir): + """Test LocalDfnRegistry initialization.""" + from modflow_devtools.dfn import LocalDfnRegistry + + registry = LocalDfnRegistry(path=dfn_dir, ref="local") + + assert registry.source == "modflow6" + assert registry.ref == "local" + assert registry.path == dfn_dir.resolve() + + def test_spec_property(self, dfn_dir): + """Test accessing spec through registry.""" + from modflow_devtools.dfn import LocalDfnRegistry + + registry = LocalDfnRegistry(path=dfn_dir) + + spec = registry.spec + + assert spec.schema_version == Version("2") + assert len(spec) > 100 + + def test_get_dfn(self, dfn_dir): + """Test getting a DFN by name.""" + from modflow_devtools.dfn import LocalDfnRegistry + + registry = LocalDfnRegistry(path=dfn_dir) + + dfn = registry.get_dfn("gwf-chd") + + assert dfn.name == "gwf-chd" + assert dfn.parent == "gwf-nam" + + def test_get_dfn_path(self, dfn_dir): + """Test getting file path for a component.""" + from modflow_devtools.dfn import LocalDfnRegistry + + registry = LocalDfnRegistry(path=dfn_dir) + + path = registry.get_dfn_path("gwf-chd") + + assert path.exists() + assert path.name == "gwf-chd.dfn" + + def test_get_dfn_path_not_found(self, dfn_dir): + """Test getting path for nonexistent component raises FileNotFoundError.""" + from modflow_devtools.dfn import LocalDfnRegistry + + registry = LocalDfnRegistry(path=dfn_dir) + + with pytest.raises(FileNotFoundError, match="nonexistent"): + registry.get_dfn_path("nonexistent") + + def test_schema_version_property(self, dfn_dir): + """Test schema_version property.""" + from modflow_devtools.dfn import LocalDfnRegistry + + registry = LocalDfnRegistry(path=dfn_dir) + + assert registry.schema_version == Version("2") + + def test_components_property(self, dfn_dir): + """Test components property returns flat dict.""" + from modflow_devtools.dfn import LocalDfnRegistry + + registry = LocalDfnRegistry(path=dfn_dir) + + components = registry.components + + assert isinstance(components, dict) + assert "gwf-chd" in components + assert components["gwf-chd"].name == "gwf-chd" + + +# ============================================================================= +# Cache Utilities Tests +# ============================================================================= + + +@requires_pkg("pydantic") +class TestCacheUtilities: + """Tests for cache and config utilities.""" + + def test_get_cache_dir(self): + """Test getting cache directory path.""" + from modflow_devtools.dfn.registry import get_cache_dir + + cache_dir = get_cache_dir("dfn") + + assert cache_dir.name == "dfn" + assert "modflow-devtools" in str(cache_dir) + + def test_get_user_config_path(self): + """Test getting user config path.""" + from modflow_devtools.dfn.registry import get_user_config_path + + config_path = get_user_config_path("dfn") + + assert config_path.name == "dfns.toml" + assert "modflow-devtools" in str(config_path) + + def test_get_cache_dir_custom_subdir(self): + """Test cache dir with custom subdirectory.""" + from modflow_devtools.dfn.registry import get_cache_dir + + cache_dir = get_cache_dir("custom") + + assert cache_dir.name == "custom" + + +# ============================================================================= +# make_registry Tool Tests +# ============================================================================= + + +@requires_pkg("tomli", "tomli_w") +class TestMakeRegistry: + """Tests for the registry generation tool.""" + + def test_compute_file_hash(self, tmp_path): + """Test computing file hash.""" + from modflow_devtools.dfn.make_registry import compute_file_hash + + test_file = tmp_path / "test.txt" + test_file.write_text("hello world") + + hash_value = compute_file_hash(test_file) + + assert hash_value.startswith("sha256:") + # Known hash for "hello world" + assert "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9" in hash_value + + def test_scan_dfn_directory(self, dfn_dir): + """Test scanning a DFN directory.""" + from modflow_devtools.dfn.make_registry import scan_dfn_directory + + files = scan_dfn_directory(dfn_dir) + + assert len(files) > 100 + assert "gwf-chd.dfn" in files + assert "common.dfn" in files + assert all(h.startswith("sha256:") for h in files.values()) + + def test_generate_registry(self, dfn_dir, tmp_path): + """Test generating a registry file.""" + import tomli + + from modflow_devtools.dfn.make_registry import generate_registry + + output_path = tmp_path / "dfns.toml" + + generate_registry( + dfn_path=dfn_dir, + output_path=output_path, + ref="test-ref", + ) + + assert output_path.exists() + + with output_path.open("rb") as f: + data = tomli.load(f) + + assert data["schema_version"] == "1.0" + assert "generated_at" in data + assert data["metadata"]["ref"] == "test-ref" + assert "gwf-chd.dfn" in data["files"] + + def test_generate_registry_empty_dir(self, tmp_path): + """Test generating registry from empty directory raises ValueError.""" + from modflow_devtools.dfn.make_registry import generate_registry + + with pytest.raises(ValueError, match="No DFN files found"): + generate_registry( + dfn_path=tmp_path, + output_path=tmp_path / "dfns.toml", + ) + + def test_cli_help(self): + """Test CLI help output.""" + from modflow_devtools.dfn.make_registry import main + + # --help should exit with 0 + with pytest.raises(SystemExit) as exc_info: + main(["--help"]) + assert exc_info.value.code == 0 + + def test_cli_generate(self, dfn_dir, tmp_path): + """Test CLI generate command.""" + from modflow_devtools.dfn.make_registry import main + + output_path = tmp_path / "dfns.toml" + + result = main( + [ + "--dfn-path", + str(dfn_dir), + "--output", + str(output_path), + "--ref", + "test-ref", + ] + ) + + assert result == 0 + assert output_path.exists() + + +# ============================================================================= +# CLI Tests +# ============================================================================= + + +@requires_pkg("pydantic") +class TestCLI: + """Tests for the dfn CLI.""" + + def test_main_help(self): + """Test CLI help output.""" + from modflow_devtools.dfn.__main__ import main + + result = main([]) + assert result == 0 + + def test_info_command(self): + """Test info command.""" + from modflow_devtools.dfn.__main__ import main + + result = main(["info"]) + assert result == 0 + + def test_clean_command_no_cache(self, tmp_path): + """Test clean command when cache doesn't exist.""" + from modflow_devtools.dfn.__main__ import main + + # Patch get_cache_dir to return nonexistent directory + with patch("modflow_devtools.dfn.__main__.get_cache_dir") as mock_cache_dir: + mock_cache_dir.return_value = tmp_path / "nonexistent" + result = main(["clean"]) + + assert result == 0 + + def test_sync_command_no_registry(self): + """Test sync command when registry doesn't exist (expected to fail).""" + from modflow_devtools.dfn.__main__ import main + + # This should fail because MODFLOW 6 repo doesn't have the registry yet + result = main(["sync", "--ref", "nonexistent-ref"]) + assert result == 1 + + +# ============================================================================= +# RemoteDfnRegistry Tests (Mocked) +# ============================================================================= + + +@requires_pkg("pydantic", "pooch") +class TestRemoteDfnRegistry: + """Tests for RemoteDfnRegistry with mocked network calls.""" + + def test_init(self): + """Test RemoteDfnRegistry initialization.""" + from modflow_devtools.dfn import RemoteDfnRegistry + + registry = RemoteDfnRegistry(source="modflow6", ref="develop") + + assert registry.source == "modflow6" + assert registry.ref == "develop" + + def test_unknown_source_raises(self): + """Test that unknown source raises ValueError.""" + from modflow_devtools.dfn import RemoteDfnRegistry + + with pytest.raises(ValueError, match="Unknown source"): + RemoteDfnRegistry(source="nonexistent", ref="develop") + + def test_construct_raw_url(self): + """Test URL construction.""" + from modflow_devtools.dfn.registry import RemoteDfnRegistry + + registry = RemoteDfnRegistry(source="modflow6", ref="6.6.0") + + url = registry._construct_raw_url("doc/mf6io/mf6ivar/dfn") + + assert "raw.githubusercontent.com" in url + assert "MODFLOW-ORG/modflow6" in url + assert "6.6.0" in url + + def test_get_registry_cache_path(self): + """Test getting registry cache path.""" + from modflow_devtools.dfn.registry import RemoteDfnRegistry + + registry = RemoteDfnRegistry(source="modflow6", ref="6.6.0") + + path = registry._get_registry_cache_path() + + assert "registries" in str(path) + assert "modflow6" in str(path) + assert "6.6.0" in str(path) + assert path.name == "dfns.toml" + + def test_get_files_cache_dir(self): + """Test getting files cache directory.""" + from modflow_devtools.dfn.registry import RemoteDfnRegistry + + registry = RemoteDfnRegistry(source="modflow6", ref="6.6.0") + + path = registry._get_files_cache_dir() + + assert "files" in str(path) + assert "modflow6" in str(path) + assert "6.6.0" in str(path) + + def test_fetch_registry_not_found(self): + """Test that fetching nonexistent registry raises appropriate error.""" + from modflow_devtools.dfn.registry import ( + DfnRegistryNotFoundError, + RemoteDfnRegistry, + ) + + registry = RemoteDfnRegistry(source="modflow6", ref="nonexistent-ref-12345") + + with pytest.raises(DfnRegistryNotFoundError): + registry._fetch_registry(force=True) + + +# ============================================================================= +# Module-level Convenience Functions Tests +# ============================================================================= + + +@requires_pkg("boltons", "pydantic") +class TestModuleFunctions: + """Tests for module-level convenience functions.""" + + def test_list_components_local(self, dfn_dir): + """Test list_components with local registry.""" + from modflow_devtools.dfn import LocalDfnRegistry + + registry = LocalDfnRegistry(path=dfn_dir) + components = list(registry.spec.keys()) + + assert len(components) > 100 + assert "gwf-chd" in components + assert "sim-nam" in components + + def test_get_sync_status(self): + """Test get_sync_status function.""" + from modflow_devtools.dfn.registry import get_sync_status + + status = get_sync_status() + + assert isinstance(status, dict) + # All refs should be either True or False + assert all(isinstance(v, bool) for v in status.values()) diff --git a/modflow_devtools/dfn/__init__.py b/modflow_devtools/dfn/__init__.py index 760267cd..83ad73ce 100644 --- a/modflow_devtools/dfn/__init__.py +++ b/modflow_devtools/dfn/__init__.py @@ -3,7 +3,8 @@ """ from abc import ABC, abstractmethod -from dataclasses import asdict, dataclass, replace +from collections.abc import Iterator, Mapping +from dataclasses import asdict, dataclass, field, replace from itertools import groupby from os import PathLike from pathlib import Path @@ -33,15 +34,25 @@ from modflow_devtools.misc import drop_none_or_empty, try_literal_eval __all__ = [ + # Core data models "Block", "Blocks", "Dfn", "Dfns", + "DfnSpec", "Field", "FieldV1", "FieldV2", "Fields", "Ref", + # Registry classes + "DfnRegistry", + "DfnRegistryDiscoveryError", + "DfnRegistryError", + "DfnRegistryNotFoundError", + "LocalDfnRegistry", + "RemoteDfnRegistry", + # Loading and mapping functions "block_sort_key", "is_valid", "load", @@ -50,6 +61,13 @@ "map", "to_flat", "to_tree", + # Registry functions + "get_dfn", + "get_dfn_path", + "get_registry", + "get_sync_status", + "list_components", + "sync_dfns", ] @@ -145,6 +163,134 @@ def _fields(block_name, block_data): return cls(**data) +@dataclass +class DfnSpec(Mapping): + """ + Full MODFLOW 6 input specification with hierarchical structure and flat dict access. + + The specification maintains a single canonical hierarchical representation via + the `root` property (simulation component with nested children), while also + providing flat dict-like access to any component by name via the Mapping protocol. + + Parameters + ---------- + schema_version : Version + The schema version of the specification (e.g., "1", "1.1", "2"). + root : Dfn + The root component (simulation) with hierarchical children populated. + + Examples + -------- + >>> spec = DfnSpec.load("/path/to/dfns") + >>> spec.schema_version + Version('2') + >>> spec.root.name + 'sim-nam' + >>> spec["gwf-chd"] # Flat access by component name + Dfn(name='gwf-chd', ...) + >>> list(spec.keys())[:3] + ['sim-nam', 'sim-tdis', 'gwf-nam'] + """ + + schema_version: Version + root: "Dfn" + _flat: Dfns = field(default_factory=dict, repr=False, compare=False) + + def __post_init__(self): + if not isinstance(self.schema_version, Version): + self.schema_version = Version(str(self.schema_version)) + # Build flat index if not already populated + if not self._flat: + self._flat = to_flat(self.root) + + def __getitem__(self, name: str) -> "Dfn": + """Get a component by name (flattened lookup).""" + if name not in self._flat: + raise KeyError(f"Component '{name}' not found in specification") + return self._flat[name] + + def __iter__(self) -> Iterator[str]: + """Iterate over all component names.""" + return iter(self._flat) + + def __len__(self) -> int: + """Total number of components in the specification.""" + return len(self._flat) + + def __contains__(self, name: object) -> bool: + """Check if a component exists by name.""" + return name in self._flat + + @classmethod + def load( + cls, + path: str | PathLike, + schema_version: str | Version | None = None, + ) -> "DfnSpec": + """ + Load a specification from a directory of DFN files. + + The specification is always loaded as a hierarchical tree, + with flat access available via the Mapping protocol. + + Parameters + ---------- + path : str or PathLike + Path to directory containing DFN files. + schema_version : str or Version, optional + Target schema version. If provided and different from the native + schema version, DFNs will be mapped to the target version. + If not provided, uses the native schema version from the files. + + Returns + ------- + DfnSpec + The loaded specification with hierarchical structure. + + Examples + -------- + >>> spec = DfnSpec.load("/path/to/dfns") + >>> spec.root.name + 'sim-nam' + >>> spec["gwf-dis"] + Dfn(name='gwf-dis', ...) + """ + path = Path(path).expanduser().resolve() + + # Load flat DFNs from directory + dfns = load_flat(path) + + if not dfns: + raise ValueError(f"No DFN files found in {path}") + + # Determine native schema version from first DFN + first_dfn = next(iter(dfns.values())) + native_version = first_dfn.schema_version + + # Determine target version: + # - If explicitly specified, use that + # - If native is v1, default to v2 (since to_tree only works with v2) + # - Otherwise use native version + if schema_version: + target_version = Version(str(schema_version)) + elif native_version == Version("1"): + target_version = Version("2") + else: + target_version = native_version + + if target_version != native_version: + # Map DFNs to target schema version + dfns = {name: map(dfn, target_version) for name, dfn in dfns.items()} + + # Build hierarchical tree + root = to_tree(dfns) + + return cls( + schema_version=target_version, + root=root, + ) + + class SchemaMap(ABC): @abstractmethod def map(self, dfn: Dfn) -> Dfn: ... @@ -234,12 +380,9 @@ def _row_field() -> Field: # explicit record or keystring if n_item_names == 1 and ( - item_types[0].startswith("record") - or item_types[0].startswith("keystring") + item_types[0].startswith("record") or item_types[0].startswith("keystring") ): - return MapV1To2.map_field( - dfn, next(iter(fields.getlist(item_names[0]))) - ) + return MapV1To2.map_field(dfn, next(iter(fields.getlist(item_names[0])))) # implicit record with all scalar fields if all(t in V1_SCALAR_TYPES for t in item_types): @@ -267,9 +410,7 @@ def _row_field() -> Field: if not first.type: raise ValueError(f"Missing type for field: {first.name}") single = len(children) == 1 - item_type = ( - "keystring" if single and "keystring" in first.type else "record" - ) + item_type = "keystring" if single and "keystring" in first.type else "record" return FieldV2.from_dict( { "name": first.name if single else _name, @@ -300,9 +441,7 @@ def _record_fields() -> Fields: matching = [ f for f in fields.values(multi=True) - if f.name == name - and f.in_record - and not f.type.startswith("record") + if f.name == name and f.in_record and not f.type.startswith("record") ] if matching: result[name] = _map_field(matching[0]) @@ -414,9 +553,7 @@ def load(f, format: str = "dfn", **kwargs) -> Dfn: fields, meta = parse_dfn(f, **kwargs) blocks = { block_name: {field["name"]: FieldV1.from_dict(field) for field in block} - for block_name, block in groupby( - fields.values(), lambda field: field["block"] - ) + for block_name, block in groupby(fields.values(), lambda field: field["block"]) } return Dfn( name=name, @@ -441,9 +578,7 @@ def load(f, format: str = "dfn", **kwargs) -> Dfn: if (expected_name := kwargs.pop("name", None)) is not None: if dfn_fields["name"] != expected_name: - raise ValueError( - f"DFN name mismatch: {expected_name} != {dfn_fields['name']}" - ) + raise ValueError(f"DFN name mismatch: {expected_name} != {dfn_fields['name']}") blocks = {} for section_name, section_data in data.items(): @@ -536,37 +671,27 @@ def set_parent(dfn): dfns = {name: set_parent(dfn) for name, dfn in dfns.items()} first_dfn = next(iter(dfns.values()), None) - match schema_version := str( - first_dfn.schema_version if first_dfn else Version("1") - ): + match schema_version := str(first_dfn.schema_version if first_dfn else Version("1")): case "1": raise NotImplementedError("Tree inference from v1 schema not implemented") case "2": if ( nroots := len( - roots := { - name: dfn for name, dfn in dfns.items() if dfn.parent is None - } + roots := {name: dfn for name, dfn in dfns.items() if dfn.parent is None} ) ) != 1: raise ValueError(f"Expected one root component, found {nroots}") def _build_tree(node_name: str) -> Dfn: node = dfns[node_name] - children = { - name: dfn for name, dfn in dfns.items() if dfn.parent == node_name - } + children = {name: dfn for name, dfn in dfns.items() if dfn.parent == node_name} if any(children): - node.children = { - name: _build_tree(name) for name in children.keys() - } + node.children = {name: _build_tree(name) for name in children.keys()} return node return _build_tree(next(iter(roots.keys()))) case _: - raise ValueError( - f"Unsupported schema version: {schema_version}. Expected 1 or 2." - ) + raise ValueError(f"Unsupported schema version: {schema_version}. Expected 1 or 2.") def to_flat(dfn: Dfn) -> Dfns: @@ -609,3 +734,139 @@ def is_valid(path: str | PathLike, format: str = "dfn", verbose: bool = False) - if verbose: print(f"Validation failed: {e}") return False + + +# ============================================================================= +# Registry imports and convenience functions +# ============================================================================= + +# Import registry classes and functions (lazy to avoid circular imports) +# These are re-exported for convenience + + +def _get_registry_module(): + """Lazy import of registry module to avoid circular imports.""" + from modflow_devtools.dfn import registry + + return registry + + +# Re-export registry classes +def __getattr__(name: str): + """Lazy attribute access for registry classes.""" + registry_exports = { + "DfnRegistry", + "DfnRegistryDiscoveryError", + "DfnRegistryError", + "DfnRegistryNotFoundError", + "LocalDfnRegistry", + "RemoteDfnRegistry", + "get_registry", + "get_sync_status", + "sync_dfns", + } + if name in registry_exports: + registry = _get_registry_module() + return getattr(registry, name) + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +# ============================================================================= +# Module-level convenience functions +# ============================================================================= + + +def get_dfn( + component: str, + ref: str = "develop", + source: str = "modflow6", +) -> "Dfn": + """ + Get a DFN by component name from the registry. + + This is a convenience function that gets the registry and retrieves + the specified component. + + Parameters + ---------- + component : str + Component name (e.g., "gwf-chd", "sim-nam"). + ref : str, optional + Git ref (branch, tag, or commit hash). Default is "develop". + source : str, optional + Source repository name. Default is "modflow6". + + Returns + ------- + Dfn + The requested component definition. + + Examples + -------- + >>> dfn = get_dfn("gwf-chd") + >>> dfn = get_dfn("gwf-chd", ref="6.6.0") + """ + registry = _get_registry_module() + reg = registry.get_registry(source=source, ref=ref) + return reg.get_dfn(component) + + +def get_dfn_path( + component: str, + ref: str = "develop", + source: str = "modflow6", +) -> Path: + """ + Get the local cached file path for a DFN component. + + Parameters + ---------- + component : str + Component name (e.g., "gwf-chd", "sim-nam"). + ref : str, optional + Git ref (branch, tag, or commit hash). Default is "develop". + source : str, optional + Source repository name. Default is "modflow6". + + Returns + ------- + Path + Path to the local cached DFN file. + + Examples + -------- + >>> path = get_dfn_path("gwf-chd", ref="6.6.0") + """ + registry = _get_registry_module() + reg = registry.get_registry(source=source, ref=ref) + return reg.get_dfn_path(component) + + +def list_components( + ref: str = "develop", + source: str = "modflow6", +) -> list[str]: + """ + List available components for a registry. + + Parameters + ---------- + ref : str, optional + Git ref (branch, tag, or commit hash). Default is "develop". + source : str, optional + Source repository name. Default is "modflow6". + + Returns + ------- + list[str] + List of component names available in the registry. + + Examples + -------- + >>> components = list_components(ref="6.6.0") + >>> "gwf-chd" in components + True + """ + registry = _get_registry_module() + reg = registry.get_registry(source=source, ref=ref) + return list(reg.spec.keys()) diff --git a/modflow_devtools/dfn/__main__.py b/modflow_devtools/dfn/__main__.py new file mode 100644 index 00000000..41d602d7 --- /dev/null +++ b/modflow_devtools/dfn/__main__.py @@ -0,0 +1,267 @@ +""" +Command-line interface for the DFNs API. + +Usage: + python -m modflow_devtools.dfn sync [--ref REF] [--force] + python -m modflow_devtools.dfn info + python -m modflow_devtools.dfn list [--ref REF] + python -m modflow_devtools.dfn clean [--all] +""" + +from __future__ import annotations + +import argparse +import shutil +import sys + +from modflow_devtools.dfn.registry import ( + DfnRegistryDiscoveryError, + DfnRegistryNotFoundError, + get_bootstrap_config, + get_cache_dir, + get_registry, + get_sync_status, + sync_dfns, +) + + +def cmd_sync(args: argparse.Namespace) -> int: + """Sync DFN registries from remote sources.""" + source = args.source + ref = args.ref + force = args.force + + try: + if ref: + print(f"Syncing {source}@{ref}...") + registries = sync_dfns(source=source, ref=ref, force=force) + else: + print(f"Syncing all configured refs for {source}...") + registries = sync_dfns(source=source, force=force) + + for registry in registries: + meta = registry.registry_meta + print(f" {registry.ref}: {len(meta.files)} files") + + print(f"Synced {len(registries)} registry(ies)") + return 0 + + except DfnRegistryNotFoundError as e: + print(f"Error: {e}", file=sys.stderr) + return 1 + except DfnRegistryDiscoveryError as e: + print(f"Error: {e}", file=sys.stderr) + return 1 + except Exception as e: + print(f"Unexpected error: {e}", file=sys.stderr) + return 1 + + +def cmd_info(args: argparse.Namespace) -> int: + """Show sync status and cache information.""" + source = args.source + + try: + config = get_bootstrap_config() + + if source not in config.sources: + print(f"Unknown source: {source}", file=sys.stderr) + print(f"Available sources: {list(config.sources.keys())}", file=sys.stderr) + return 1 + + source_config = config.sources[source] + print(f"Source: {source}") + print(f" Repository: {source_config.repo}") + print(f" DFN path: {source_config.dfn_path}") + print(f" Registry path: {source_config.registry_path}") + print() + + # Show sync status + status = get_sync_status(source=source) + print("Configured refs:") + for ref, synced in status.items(): + status_str = "synced" if synced else "not synced" + print(f" {ref}: {status_str}") + print() + + # Show cache info + cache_dir = get_cache_dir("dfn") + if cache_dir.exists(): + # Count cached files + registries_dir = cache_dir / "registries" / source + files_dir = cache_dir / "files" / source + + registry_count = 0 + file_count = 0 + total_size = 0 + + if registries_dir.exists(): + for p in registries_dir.rglob("*"): + if p.is_file(): + registry_count += 1 + total_size += p.stat().st_size + + if files_dir.exists(): + for p in files_dir.rglob("*"): + if p.is_file(): + file_count += 1 + total_size += p.stat().st_size + + print(f"Cache directory: {cache_dir}") + print(f" Registries: {registry_count}") + print(f" DFN files: {file_count}") + print(f" Total size: {_format_size(total_size)}") + else: + print("Cache directory: (not created)") + + return 0 + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + return 1 + + +def cmd_list(args: argparse.Namespace) -> int: + """List available components.""" + source = args.source + ref = args.ref + + try: + registry = get_registry(source=source, ref=ref, auto_sync=True) + components = list(registry.spec.keys()) + + print(f"Components in {source}@{ref} ({len(components)} total):") + for component in sorted(components): + print(f" {component}") + + return 0 + + except DfnRegistryNotFoundError as e: + print(f"Error: {e}", file=sys.stderr) + print("Try running 'python -m modflow_devtools.dfn sync' first.", file=sys.stderr) + return 1 + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + return 1 + + +def cmd_clean(args: argparse.Namespace) -> int: + """Clean the cache directory.""" + source = args.source + clean_all = args.all + + cache_dir = get_cache_dir("dfn") + + if not cache_dir.exists(): + print("Cache directory does not exist.") + return 0 + + if clean_all: + # Clean entire cache + print(f"Removing entire cache directory: {cache_dir}") + shutil.rmtree(cache_dir) + print("Cache cleaned.") + else: + # Clean only the specified source + registries_dir = cache_dir / "registries" / source + files_dir = cache_dir / "files" / source + + removed = False + if registries_dir.exists(): + print(f"Removing registries for {source}: {registries_dir}") + shutil.rmtree(registries_dir) + removed = True + + if files_dir.exists(): + print(f"Removing files for {source}: {files_dir}") + shutil.rmtree(files_dir) + removed = True + + if removed: + print(f"Cache cleaned for {source}.") + else: + print(f"No cache found for {source}.") + + return 0 + + +def _format_size(size_bytes: int) -> str: + """Format size in bytes to human-readable string.""" + for unit in ["B", "KB", "MB", "GB"]: + if size_bytes < 1024: + return f"{size_bytes:.1f} {unit}" + size_bytes /= 1024 + return f"{size_bytes:.1f} TB" + + +def main(argv: list[str] | None = None) -> int: + """Main entry point for the CLI.""" + parser = argparse.ArgumentParser( + prog="python -m modflow_devtools.dfn", + description="MODFLOW 6 definition file tools", + ) + parser.add_argument( + "--source", + "-s", + default="modflow6", + help="Source repository name (default: modflow6)", + ) + + subparsers = parser.add_subparsers(dest="command", help="Available commands") + + # sync command + sync_parser = subparsers.add_parser("sync", help="Sync DFN registries from remote") + sync_parser.add_argument( + "--ref", + "-r", + help="Specific ref to sync (default: all configured refs)", + ) + sync_parser.add_argument( + "--force", + "-f", + action="store_true", + help="Force re-sync even if already cached", + ) + + # info command + subparsers.add_parser("info", help="Show sync status and cache info") + + # list command + list_parser = subparsers.add_parser("list", help="List available components") + list_parser.add_argument( + "--ref", + "-r", + default="develop", + help="Git ref to list components from (default: develop)", + ) + + # clean command + clean_parser = subparsers.add_parser("clean", help="Clean the cache") + clean_parser.add_argument( + "--all", + "-a", + action="store_true", + help="Clean entire cache, not just the specified source", + ) + + args = parser.parse_args(argv) + + if args.command is None: + parser.print_help() + return 0 + + if args.command == "sync": + return cmd_sync(args) + elif args.command == "info": + return cmd_info(args) + elif args.command == "list": + return cmd_list(args) + elif args.command == "clean": + return cmd_clean(args) + else: + parser.print_help() + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/modflow_devtools/dfn/dfns.toml b/modflow_devtools/dfn/dfns.toml new file mode 100644 index 00000000..4a84ae67 --- /dev/null +++ b/modflow_devtools/dfn/dfns.toml @@ -0,0 +1,24 @@ +# DFNs API bootstrap configuration +# +# This file tells modflow-devtools where to find DFN registries. +# Users can override or extend this by creating a config file at: +# - Linux/macOS: ~/.config/modflow-devtools/dfns.toml +# - Windows: %APPDATA%/modflow-devtools/dfns.toml + +[sources.modflow6] +# GitHub repository containing DFN files +repo = "MODFLOW-ORG/modflow6" + +# Path within the repository to the DFN files directory +dfn_path = "doc/mf6io/mf6ivar/dfn" + +# Path within the repository to the registry metadata file +registry_path = ".registry/dfns.toml" + +# Git refs (branches, tags, commit hashes) to sync by default +refs = [ + "develop", + "6.6.0", + "6.5.0", + "6.4.4", +] diff --git a/modflow_devtools/dfn/fetch.py b/modflow_devtools/dfn/fetch.py index 34cdfa76..ecbb7b28 100644 --- a/modflow_devtools/dfn/fetch.py +++ b/modflow_devtools/dfn/fetch.py @@ -6,9 +6,7 @@ from modflow_devtools.download import download_and_unzip -def fetch_dfns( - owner: str, repo: str, ref: str, outdir: str | PathLike, verbose: bool = False -): +def fetch_dfns(owner: str, repo: str, ref: str, outdir: str | PathLike, verbose: bool = False): """Fetch definition files from the MODFLOW 6 repository.""" url = f"https://github.com/{owner}/{repo}/archive/{ref}.zip" if verbose: @@ -21,9 +19,7 @@ def fetch_dfns( raise ValueError(f"Missing proj dir in {dl_path}, found {contents}") if verbose: print("Copying dfns from download dir to output dir") - copytree( - proj_path / "doc" / "mf6io" / "mf6ivar" / "dfn", outdir, dirs_exist_ok=True - ) + copytree(proj_path / "doc" / "mf6io" / "mf6ivar" / "dfn", outdir, dirs_exist_ok=True) get_dfns = fetch_dfns # alias for backward compatibility diff --git a/modflow_devtools/dfn/make_registry.py b/modflow_devtools/dfn/make_registry.py new file mode 100644 index 00000000..bd510aa0 --- /dev/null +++ b/modflow_devtools/dfn/make_registry.py @@ -0,0 +1,184 @@ +""" +Registry generation tool for DFN files. + +This tool scans a directory of DFN files and generates a registry file +that can be used by the DFNs API for discovery and verification. + +Usage: + python -m modflow_devtools.dfn.make_registry --dfn-path PATH --output FILE [--ref REF] + +Example (for MODFLOW 6 CI): + python -m modflow_devtools.dfn.make_registry \\ + --dfn-path doc/mf6io/mf6ivar/dfn \\ + --output .registry/dfns.toml \\ + --ref ${{ github.ref_name }} +""" + +from __future__ import annotations + +import argparse +import hashlib +import sys +from datetime import datetime, timezone +from pathlib import Path + +import tomli_w + + +def compute_file_hash(path: Path) -> str: + """Compute SHA256 hash of a file.""" + sha256 = hashlib.sha256() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + sha256.update(chunk) + return f"sha256:{sha256.hexdigest()}" + + +def scan_dfn_directory(dfn_path: Path) -> dict[str, str]: + """ + Scan a directory for DFN files and compute their hashes. + + Parameters + ---------- + dfn_path : Path + Path to directory containing DFN files. + + Returns + ------- + dict[str, str] + Map of filename to SHA256 hash. + """ + files = {} + + # Find all .dfn files + for p in sorted(dfn_path.glob("*.dfn")): + files[p.name] = compute_file_hash(p) + + # Find all .toml files (spec.toml and/or component files) + for p in sorted(dfn_path.glob("*.toml")): + files[p.name] = compute_file_hash(p) + + return files + + +def generate_registry( + dfn_path: Path, + output_path: Path, + ref: str | None = None, + devtools_version: str | None = None, +) -> None: + """ + Generate a DFN registry file. + + Parameters + ---------- + dfn_path : Path + Path to directory containing DFN files. + output_path : Path + Path to write the registry file. + ref : str, optional + Git ref this registry is being generated for. + devtools_version : str, optional + Version of modflow-devtools generating this registry. + """ + # Scan directory for files + files = scan_dfn_directory(dfn_path) + + if not files: + raise ValueError(f"No DFN files found in {dfn_path}") + + # Get devtools version if not provided + if devtools_version is None: + try: + from modflow_devtools import __version__ + + devtools_version = __version__ + except ImportError: + devtools_version = "unknown" + + # Build registry structure + registry: dict = { + "schema_version": "1.0", + "generated_at": datetime.now(timezone.utc).isoformat(), + "devtools_version": devtools_version, + } + + if ref: + registry["metadata"] = {"ref": ref} + + # Add files section + registry["files"] = {filename: {"hash": file_hash} for filename, file_hash in files.items()} + + # Write registry file + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("wb") as f: + tomli_w.dump(registry, f) + + +def main(argv: list[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + prog="python -m modflow_devtools.dfn.make_registry", + description="Generate a DFN registry file", + ) + parser.add_argument( + "--dfn-path", + "-d", + type=Path, + required=True, + help="Path to directory containing DFN files", + ) + parser.add_argument( + "--output", + "-o", + type=Path, + required=True, + help="Output path for registry file", + ) + parser.add_argument( + "--ref", + "-r", + help="Git ref this registry is being generated for", + ) + parser.add_argument( + "--devtools-version", + help="Version of modflow-devtools (default: auto-detect)", + ) + + args = parser.parse_args(argv) + + dfn_path = args.dfn_path.expanduser().resolve() + output_path = args.output.expanduser().resolve() + + if not dfn_path.exists(): + print(f"Error: DFN path does not exist: {dfn_path}", file=sys.stderr) + return 1 + + if not dfn_path.is_dir(): + print(f"Error: DFN path is not a directory: {dfn_path}", file=sys.stderr) + return 1 + + try: + generate_registry( + dfn_path=dfn_path, + output_path=output_path, + ref=args.ref, + devtools_version=args.devtools_version, + ) + + # Report results + files = scan_dfn_directory(dfn_path) + print(f"Generated registry: {output_path}") + print(f" Files: {len(files)}") + if args.ref: + print(f" Ref: {args.ref}") + + return 0 + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/modflow_devtools/dfn/parse.py b/modflow_devtools/dfn/parse.py index dc7f5e62..9485d48b 100644 --- a/modflow_devtools/dfn/parse.py +++ b/modflow_devtools/dfn/parse.py @@ -158,10 +158,7 @@ def parse_dfn(f, common: dict | None = None) -> tuple[OMD, list[str]]: subs = literal_eval(subs) cmmn = common.get(key, None) if cmmn is None: - warn( - "Can't substitute description text, " - f"common variable not found: {key}" - ) + warn(f"Can't substitute description text, common variable not found: {key}") else: descr = cmmn["description"] if any(subs): diff --git a/modflow_devtools/dfn/registry.py b/modflow_devtools/dfn/registry.py new file mode 100644 index 00000000..6e1da2da --- /dev/null +++ b/modflow_devtools/dfn/registry.py @@ -0,0 +1,741 @@ +""" +DFN registry infrastructure for discovery, caching, and synchronization. + +This module provides: +- Pydantic schemas for registry and bootstrap configuration +- Cache management for registries and DFN files +- Registry classes for local and remote DFN access +""" + +from __future__ import annotations + +import os +import sys +from datetime import datetime +from os import PathLike +from pathlib import Path +from typing import TYPE_CHECKING + +from packaging.version import Version +from pydantic import BaseModel, Field + +if TYPE_CHECKING: + from modflow_devtools.dfn import Dfn, DfnSpec + +__all__ = [ + "BootstrapConfig", + "DfnRegistry", + "DfnRegistryDiscoveryError", + "DfnRegistryError", + "DfnRegistryFile", + "DfnRegistryMeta", + "DfnRegistryNotFoundError", + "LocalDfnRegistry", + "RemoteDfnRegistry", + "SourceConfig", + "get_bootstrap_config", + "get_cache_dir", + "get_registry", + "get_sync_status", + "get_user_config_path", + "sync_dfns", +] + + +# ============================================================================= +# Pydantic Schemas for Bootstrap Configuration +# ============================================================================= + + +class SourceConfig(BaseModel): + """Configuration for a DFN source repository.""" + + repo: str = Field(description="GitHub repository identifier (owner/name)") + dfn_path: str = Field( + default="doc/mf6io/mf6ivar/dfn", + description="Path within the repository to the DFN files directory", + ) + registry_path: str = Field( + default=".registry/dfns.toml", + description="Path within the repository to the registry metadata file", + ) + refs: list[str] = Field( + default_factory=list, + description="Git refs (branches, tags, commit hashes) to sync by default", + ) + + +class BootstrapConfig(BaseModel): + """Bootstrap configuration for DFN sources.""" + + sources: dict[str, SourceConfig] = Field( + default_factory=dict, + description="Map of source names to their configurations", + ) + + @classmethod + def load(cls, path: str | PathLike) -> BootstrapConfig: + """Load bootstrap configuration from a TOML file.""" + import tomli + + path = Path(path) + if not path.exists(): + return cls() + + with path.open("rb") as f: + data = tomli.load(f) + + # Convert sources dict to SourceConfig instances + sources = {} + for name, config in data.get("sources", {}).items(): + sources[name] = SourceConfig(**config) + + return cls(sources=sources) + + @classmethod + def merge(cls, base: BootstrapConfig, overlay: BootstrapConfig) -> BootstrapConfig: + """Merge two bootstrap configs, with overlay taking precedence.""" + merged_sources = dict(base.sources) + merged_sources.update(overlay.sources) + return cls(sources=merged_sources) + + +# ============================================================================= +# Pydantic Schemas for Registry Files +# ============================================================================= + + +class DfnRegistryFile(BaseModel): + """Entry for a single file in the registry.""" + + hash: str = Field(description="SHA256 hash of the file (sha256:...)") + + +class DfnRegistryMeta(BaseModel): + """ + Registry metadata and file listings. + + This represents the contents of a dfns.toml registry file. + """ + + schema_version: str = Field( + default="1.0", + description="Registry schema version", + ) + generated_at: datetime | None = Field( + default=None, + description="When the registry was generated", + ) + devtools_version: str | None = Field( + default=None, + description="Version of modflow-devtools that generated this registry", + ) + ref: str | None = Field( + default=None, + description="Git ref this registry was generated from", + ) + files: dict[str, DfnRegistryFile] = Field( + default_factory=dict, + description="Map of filenames to file metadata", + ) + + @classmethod + def load(cls, path: str | PathLike) -> DfnRegistryMeta: + """Load registry metadata from a TOML file.""" + import tomli + + path = Path(path) + with path.open("rb") as f: + data = tomli.load(f) + + # Handle nested structure: files section contains filename -> {hash: ...} + files_data = data.pop("files", {}) + files = {} + for filename, file_info in files_data.items(): + if isinstance(file_info, dict): + files[filename] = DfnRegistryFile(**file_info) + elif isinstance(file_info, str): + # Support shorthand: filename = "hash" + files[filename] = DfnRegistryFile(hash=file_info) + + # Handle metadata section if present + metadata = data.pop("metadata", {}) + ref = metadata.get("ref") or data.pop("ref", None) + + return cls( + schema_version=data.get("schema_version", "1.0"), + generated_at=data.get("generated_at"), + devtools_version=data.get("devtools_version"), + ref=ref, + files=files, + ) + + def save(self, path: str | PathLike) -> None: + """Save registry metadata to a TOML file.""" + import tomli_w + + path = Path(path) + path.parent.mkdir(parents=True, exist_ok=True) + + data: dict = { + "schema_version": self.schema_version, + } + + if self.generated_at: + data["generated_at"] = self.generated_at.isoformat() + if self.devtools_version: + data["devtools_version"] = self.devtools_version + + if self.ref: + data["metadata"] = {"ref": self.ref} + + # Write files section + data["files"] = { + filename: {"hash": file_info.hash} for filename, file_info in self.files.items() + } + + with path.open("wb") as f: + tomli_w.dump(data, f) + + +# ============================================================================= +# Cache and Configuration Utilities +# ============================================================================= + + +def get_user_config_path(subdir: str = "dfn") -> Path: + """ + Get the user configuration directory path. + + Parameters + ---------- + subdir : str + Subdirectory name (e.g., "dfn", "models", "programs"). + + Returns + ------- + Path + Path to user config file (e.g., ~/.config/modflow-devtools/dfns.toml). + """ + if sys.platform == "win32": + base = Path(os.environ.get("APPDATA", Path.home() / "AppData" / "Roaming")) + else: + base = Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config")) + + return base / "modflow-devtools" / f"{subdir}s.toml" + + +def get_cache_dir(subdir: str = "dfn") -> Path: + """ + Get the cache directory path. + + Parameters + ---------- + subdir : str + Subdirectory name (e.g., "dfn", "models", "programs"). + + Returns + ------- + Path + Path to cache directory (e.g., ~/.cache/modflow-devtools/dfn/). + """ + if sys.platform == "win32": + base = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local")) + else: + base = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache")) + + return base / "modflow-devtools" / subdir + + +def get_bootstrap_config() -> BootstrapConfig: + """ + Load and merge bootstrap configuration. + + Loads the bundled bootstrap file and merges with user config if present. + + Returns + ------- + BootstrapConfig + Merged bootstrap configuration. + """ + # Load bundled bootstrap config + bundled_path = Path(__file__).parent / "dfns.toml" + bundled_config = BootstrapConfig.load(bundled_path) + + # Load user config if present + user_path = get_user_config_path("dfn") + if user_path.exists(): + user_config = BootstrapConfig.load(user_path) + return BootstrapConfig.merge(bundled_config, user_config) + + return bundled_config + + +# ============================================================================= +# Registry Classes +# ============================================================================= + + +class DfnRegistry(BaseModel): + """ + Base class for DFN registries. + + A registry provides access to DFN files and the parsed DfnSpec. + This is a Pydantic model that can be used directly for data-only use cases. + """ + + model_config = {"arbitrary_types_allowed": True} + + source: str = Field(default="modflow6", description="Source repository name") + ref: str = Field(default="develop", description="Git ref (branch, tag, or commit hash)") + + _spec: DfnSpec | None = None + + @property + def spec(self) -> DfnSpec: + """ + Get the full DFN specification. + + Returns + ------- + DfnSpec + The parsed specification with hierarchical structure. + """ + raise NotImplementedError("Subclasses must implement spec property") + + @property + def schema_version(self) -> Version: + """Get the schema version of the specification.""" + return self.spec.schema_version + + @property + def components(self) -> dict[str, Dfn]: + """Get all components as a flat dictionary.""" + return dict(self.spec.items()) + + def get_dfn(self, component: str) -> Dfn: + """ + Get a DFN by component name. + + Parameters + ---------- + component : str + Component name (e.g., "gwf-chd", "sim-nam"). + + Returns + ------- + Dfn + The requested component definition. + """ + return self.spec[component] + + def get_dfn_path(self, component: str) -> Path: + """ + Get the local file path for a DFN. + + Parameters + ---------- + component : str + Component name (e.g., "gwf-chd", "sim-nam"). + + Returns + ------- + Path + Path to the local DFN file. + """ + raise NotImplementedError("Subclasses must implement get_dfn_path") + + +class LocalDfnRegistry(DfnRegistry): + """ + Registry for local DFN files. + + Use this for working with DFN files on the local filesystem, + e.g., during development or with a local clone of the MODFLOW 6 repository. + """ + + path: Path = Field(description="Path to directory containing DFN files") + + def model_post_init(self, __context) -> None: + """Validate and resolve path after initialization.""" + if isinstance(self.path, str): + object.__setattr__(self, "path", Path(self.path)) + object.__setattr__(self, "path", self.path.expanduser().resolve()) + + @property + def spec(self) -> DfnSpec: + """Load and return the DFN specification from local files.""" + if self._spec is None: + from modflow_devtools.dfn import DfnSpec + + self._spec = DfnSpec.load(self.path) + return self._spec + + def get_dfn_path(self, component: str) -> Path: + """Get the local file path for a DFN component.""" + # Look for both .dfn and .toml extensions + for ext in [".dfn", ".toml"]: + p = self.path / f"{component}{ext}" + if p.exists(): + return p + raise FileNotFoundError(f"Component '{component}' not found in {self.path}") + + +class RemoteDfnRegistry(DfnRegistry): + """ + Registry for remote DFN files with Pooch-based caching. + + Handles remote registry discovery, caching, and DFN file fetching. + URLs are constructed dynamically from bootstrap metadata. + + Examples + -------- + >>> registry = RemoteDfnRegistry(source="modflow6", ref="6.6.0") + >>> dfn = registry.get_dfn("gwf-chd") + >>> path = registry.get_dfn_path("gwf-chd") + """ + + _registry_meta: DfnRegistryMeta | None = None + _source_config: SourceConfig | None = None + _pooch: pooch.Pooch | None = None + _files_dir: Path | None = None + + def model_post_init(self, __context) -> None: + """Initialize registry after model creation.""" + self._ensure_source_config() + + def _ensure_source_config(self) -> SourceConfig: + """Load and cache source configuration from bootstrap.""" + if self._source_config is None: + config = get_bootstrap_config() + if self.source not in config.sources: + raise ValueError( + f"Unknown source '{self.source}'. " + f"Available sources: {list(config.sources.keys())}" + ) + self._source_config = config.sources[self.source] + return self._source_config + + def _get_registry_cache_path(self) -> Path: + """Get path to cached registry file.""" + cache_dir = get_cache_dir("dfn") + return cache_dir / "registries" / self.source / self.ref / "dfns.toml" + + def _get_files_cache_dir(self) -> Path: + """Get directory for cached DFN files.""" + cache_dir = get_cache_dir("dfn") + return cache_dir / "files" / self.source / self.ref + + def _construct_raw_url(self, path: str) -> str: + """Construct GitHub raw content URL for a file.""" + source_config = self._ensure_source_config() + return f"https://raw.githubusercontent.com/{source_config.repo}/{self.ref}/{path}" + + def _fetch_registry(self, force: bool = False) -> DfnRegistryMeta: + """Fetch registry metadata from remote or cache.""" + cache_path = self._get_registry_cache_path() + + # Use cached registry if available and not forcing refresh + if cache_path.exists() and not force: + return DfnRegistryMeta.load(cache_path) + + # Fetch from remote + source_config = self._ensure_source_config() + registry_url = self._construct_raw_url(source_config.registry_path) + + import urllib.error + import urllib.request + + try: + with urllib.request.urlopen(registry_url, timeout=30) as response: + content = response.read() + except urllib.error.HTTPError as e: + if e.code == 404: + raise DfnRegistryNotFoundError( + f"Registry not found at {registry_url} for '{self.source}@{self.ref}'. " + f"The registry file may not exist for this ref." + ) from e + raise DfnRegistryDiscoveryError( + f"Failed to fetch registry from {registry_url}: {e}" + ) from e + except urllib.error.URLError as e: + raise DfnRegistryDiscoveryError( + f"Network error fetching registry from {registry_url}: {e}" + ) from e + + # Parse and cache + import tomli + + data = tomli.loads(content.decode("utf-8")) + + # Build registry meta from parsed data + files_data = data.pop("files", {}) + files = {} + for filename, file_info in files_data.items(): + if isinstance(file_info, dict): + files[filename] = DfnRegistryFile(**file_info) + elif isinstance(file_info, str): + files[filename] = DfnRegistryFile(hash=file_info) + + metadata = data.pop("metadata", {}) + registry_meta = DfnRegistryMeta( + schema_version=data.get("schema_version", "1.0"), + generated_at=data.get("generated_at"), + devtools_version=data.get("devtools_version"), + ref=metadata.get("ref") or data.get("ref") or self.ref, + files=files, + ) + + # Cache the registry + cache_path.parent.mkdir(parents=True, exist_ok=True) + registry_meta.save(cache_path) + + return registry_meta + + def _ensure_registry_meta(self, force: bool = False) -> DfnRegistryMeta: + """Ensure registry metadata is loaded.""" + if self._registry_meta is None or force: + self._registry_meta = self._fetch_registry(force=force) + return self._registry_meta + + def _setup_pooch(self) -> pooch.Pooch: + """Set up Pooch for DFN file fetching.""" + if self._pooch is not None: + return self._pooch + + import pooch + + registry_meta = self._ensure_registry_meta() + source_config = self._ensure_source_config() + + # Construct base URL for DFN files + base_url = self._construct_raw_url(source_config.dfn_path) + "/" + + # Build registry dict for Pooch (filename -> hash) + pooch_registry = {} + for filename, file_info in registry_meta.files.items(): + # Pooch expects hash without "sha256:" prefix for sha256 + hash_value = file_info.hash + if hash_value.startswith("sha256:"): + hash_value = hash_value[7:] + pooch_registry[filename] = f"sha256:{hash_value}" + + self._files_dir = self._get_files_cache_dir() + self._pooch = pooch.create( + path=self._files_dir, + base_url=base_url, + registry=pooch_registry, + ) + + return self._pooch + + def sync(self, force: bool = False) -> None: + """ + Synchronize registry and optionally pre-fetch all DFN files. + + Parameters + ---------- + force : bool, optional + If True, re-fetch registry even if cached. Default is False. + """ + self._ensure_registry_meta(force=force) + self._setup_pooch() + + @property + def registry_meta(self) -> DfnRegistryMeta: + """Get the registry metadata.""" + return self._ensure_registry_meta() + + @property + def spec(self) -> DfnSpec: + """Load and return the DFN specification from cached files.""" + if self._spec is None: + from modflow_devtools.dfn import DfnSpec + + # Ensure all files are fetched + self._fetch_all_files() + + # Load from cache directory + self._spec = DfnSpec.load(self._get_files_cache_dir()) + return self._spec + + def _fetch_all_files(self) -> None: + """Fetch all DFN files to cache.""" + p = self._setup_pooch() + registry_meta = self._ensure_registry_meta() + + for filename in registry_meta.files: + # Skip non-DFN files (like spec.toml) + if filename.endswith(".dfn") or filename.endswith(".toml"): + p.fetch(filename) + + def get_dfn_path(self, component: str) -> Path: + """Get the local cached file path for a DFN component.""" + p = self._setup_pooch() + registry_meta = self._ensure_registry_meta() + + # Look for both .dfn and .toml extensions + for ext in [".dfn", ".toml"]: + filename = f"{component}{ext}" + if filename in registry_meta.files: + return Path(p.fetch(filename)) + + raise FileNotFoundError( + f"Component '{component}' not found in registry for '{self.source}@{self.ref}'" + ) + + +# ============================================================================= +# Exceptions +# ============================================================================= + + +class DfnRegistryError(Exception): + """Base exception for DFN registry errors.""" + + pass + + +class DfnRegistryNotFoundError(DfnRegistryError): + """Registry file not found for the specified ref.""" + + pass + + +class DfnRegistryDiscoveryError(DfnRegistryError): + """Error during registry discovery.""" + + pass + + +# ============================================================================= +# Sync Functions +# ============================================================================= + + +def sync_dfns( + source: str = "modflow6", + ref: str | None = None, + force: bool = False, +) -> list[RemoteDfnRegistry]: + """ + Synchronize DFN registries from remote sources. + + Parameters + ---------- + source : str, optional + Source repository name. Default is "modflow6". + ref : str, optional + Specific git ref to sync. If not provided, syncs all configured refs. + force : bool, optional + If True, re-fetch registries even if cached. Default is False. + + Returns + ------- + list[RemoteDfnRegistry] + List of synced registries. + + Examples + -------- + >>> # Sync all configured refs + >>> registries = sync_dfns() + + >>> # Sync specific ref + >>> registries = sync_dfns(ref="6.6.0") + + >>> # Force re-sync + >>> registries = sync_dfns(force=True) + """ + config = get_bootstrap_config() + + if source not in config.sources: + raise ValueError( + f"Unknown source '{source}'. Available sources: {list(config.sources.keys())}" + ) + + source_config = config.sources[source] + + # Determine which refs to sync + refs_to_sync = [ref] if ref else source_config.refs + + registries = [] + for r in refs_to_sync: + registry = RemoteDfnRegistry(source=source, ref=r) + registry.sync(force=force) + registries.append(registry) + + return registries + + +def get_sync_status(source: str = "modflow6") -> dict[str, bool]: + """ + Check which refs have cached registries. + + Parameters + ---------- + source : str, optional + Source repository name. Default is "modflow6". + + Returns + ------- + dict[str, bool] + Map of ref names to whether they have a cached registry. + """ + config = get_bootstrap_config() + + if source not in config.sources: + raise ValueError( + f"Unknown source '{source}'. Available sources: {list(config.sources.keys())}" + ) + + source_config = config.sources[source] + cache_dir = get_cache_dir("dfn") + + status = {} + for ref in source_config.refs: + registry_path = cache_dir / "registries" / source / ref / "dfns.toml" + status[ref] = registry_path.exists() + + return status + + +def get_registry( + source: str = "modflow6", + ref: str = "develop", + auto_sync: bool = True, +) -> RemoteDfnRegistry: + """ + Get a registry for the specified source and ref. + + Parameters + ---------- + source : str, optional + Source repository name. Default is "modflow6". + ref : str, optional + Git ref (branch, tag, or commit hash). Default is "develop". + auto_sync : bool, optional + If True and registry is not cached, automatically sync. Default is True. + Can be disabled via MODFLOW_DEVTOOLS_NO_AUTO_SYNC environment variable. + + Returns + ------- + RemoteDfnRegistry + Registry for the specified source and ref. + + Examples + -------- + >>> registry = get_registry(ref="6.6.0") + >>> dfn = registry.get_dfn("gwf-chd") + """ + # Check for auto-sync opt-out + if os.environ.get("MODFLOW_DEVTOOLS_NO_AUTO_SYNC", "").lower() in ("1", "true", "yes"): + auto_sync = False + + registry = RemoteDfnRegistry(source=source, ref=ref) + + # Check if registry is cached + cache_path = registry._get_registry_cache_path() + if not cache_path.exists() and auto_sync: + registry.sync() + + return registry diff --git a/modflow_devtools/dfn/schema/v1.py b/modflow_devtools/dfn/schema/v1.py index 7e36bc4b..c1669f52 100644 --- a/modflow_devtools/dfn/schema/v1.py +++ b/modflow_devtools/dfn/schema/v1.py @@ -52,9 +52,7 @@ def from_dict(cls, d: dict, strict: bool = False) -> "FieldV1": If True, raise ValueError if dict contains unrecognized keys. If False (default), ignore unrecognized keys. """ - keys = set( - list(cls.__annotations__.keys()) + list(Field.__annotations__.keys()) - ) + keys = set(list(cls.__annotations__.keys()) + list(Field.__annotations__.keys())) if strict: if extra_keys := set(d.keys()) - keys: raise ValueError(f"Unrecognized keys in field data: {extra_keys}") diff --git a/modflow_devtools/dfn/schema/v2.py b/modflow_devtools/dfn/schema/v2.py index 89a2fe45..c26c25c6 100644 --- a/modflow_devtools/dfn/schema/v2.py +++ b/modflow_devtools/dfn/schema/v2.py @@ -25,9 +25,7 @@ def from_dict(cls, d: dict, strict: bool = False) -> "FieldV2": If True, raise ValueError if dict contains unrecognized keys. If False (default), ignore unrecognized keys. """ - keys = set( - list(cls.__annotations__.keys()) + list(Field.__annotations__.keys()) - ) + keys = set(list(cls.__annotations__.keys()) + list(Field.__annotations__.keys())) if strict: if extra_keys := set(d.keys()) - keys: raise ValueError(f"Unrecognized keys in field data: {extra_keys}") diff --git a/modflow_devtools/dfn2toml.py b/modflow_devtools/dfn2toml.py index 90104b95..0313d799 100644 --- a/modflow_devtools/dfn2toml.py +++ b/modflow_devtools/dfn2toml.py @@ -46,8 +46,7 @@ def convert(inpath: PathLike, outdir: PathLike, schema_version: str = "2") -> No _convert(dfn, outdir / f"{inpath.stem}.toml") else: dfns = { - name: map(dfn, schema_version=schema_version) - for name, dfn in load_flat(inpath).items() + name: map(dfn, schema_version=schema_version) for name, dfn in load_flat(inpath).items() } tree = to_tree(dfns) flat = to_flat(tree) diff --git a/pyproject.toml b/pyproject.toml index 57fdafc6..8e621884 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,8 @@ docs = [ # deprecated dfn = [ "boltons", + "pooch", + "pydantic", "tomli", "tomli-w" ] @@ -122,6 +124,8 @@ docs = [ # deprecated dfn = [ "boltons", + "pooch", + "pydantic", "tomli", "tomli-w" ] @@ -164,6 +168,7 @@ packages = ["modflow_devtools"] [tool.hatch.build] include = [ + "modflow_devtools/dfn/*.toml", "modflow_devtools/registry/*" ] From b51daec92f5175fdd3670e84a84ddc63a3ec0730 Mon Sep 17 00:00:00 2001 From: w-bonelli Date: Tue, 20 Jan 2026 10:39:34 -0500 Subject: [PATCH 24/36] appease mypy --- modflow_devtools/dfn/__init__.py | 32 ++++++++++++++------------------ modflow_devtools/dfn/__main__.py | 9 +++++---- modflow_devtools/dfn/registry.py | 6 +++--- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/modflow_devtools/dfn/__init__.py b/modflow_devtools/dfn/__init__.py index 83ad73ce..1ee34e1f 100644 --- a/modflow_devtools/dfn/__init__.py +++ b/modflow_devtools/dfn/__init__.py @@ -34,40 +34,36 @@ from modflow_devtools.misc import drop_none_or_empty, try_literal_eval __all__ = [ - # Core data models "Block", "Blocks", "Dfn", - "Dfns", + "DfnRegistry", + "DfnRegistryDiscoveryError", + "DfnRegistryError", + "DfnRegistryNotFoundError", "DfnSpec", + "Dfns", "Field", "FieldV1", "FieldV2", "Fields", - "Ref", - # Registry classes - "DfnRegistry", - "DfnRegistryDiscoveryError", - "DfnRegistryError", - "DfnRegistryNotFoundError", "LocalDfnRegistry", + "Ref", "RemoteDfnRegistry", - # Loading and mapping functions "block_sort_key", + "get_dfn", + "get_dfn_path", + "get_registry", + "get_sync_status", "is_valid", + "list_components", "load", "load_flat", "load_tree", "map", + "sync_dfns", "to_flat", "to_tree", - # Registry functions - "get_dfn", - "get_dfn_path", - "get_registry", - "get_sync_status", - "list_components", - "sync_dfns", ] @@ -103,8 +99,8 @@ def fields(self) -> Fields: """ fields = [] for block in (self.blocks or {}).values(): - for field in block.values(): - fields.append((field.name, field)) + for f in block.values(): + fields.append((f.name, f)) # for now return a multidict to support duplicate field names. # TODO: change to normal dict after deprecating v1 schema diff --git a/modflow_devtools/dfn/__main__.py b/modflow_devtools/dfn/__main__.py index 41d602d7..d9a216f9 100644 --- a/modflow_devtools/dfn/__main__.py +++ b/modflow_devtools/dfn/__main__.py @@ -187,11 +187,12 @@ def cmd_clean(args: argparse.Namespace) -> int: def _format_size(size_bytes: int) -> str: """Format size in bytes to human-readable string.""" + size = float(size_bytes) for unit in ["B", "KB", "MB", "GB"]: - if size_bytes < 1024: - return f"{size_bytes:.1f} {unit}" - size_bytes /= 1024 - return f"{size_bytes:.1f} TB" + if size < 1024: + return f"{size:.1f} {unit}" + size /= 1024 + return f"{size:.1f} TB" def main(argv: list[str] | None = None) -> int: diff --git a/modflow_devtools/dfn/registry.py b/modflow_devtools/dfn/registry.py index 6e1da2da..4decdd17 100644 --- a/modflow_devtools/dfn/registry.py +++ b/modflow_devtools/dfn/registry.py @@ -20,6 +20,8 @@ from pydantic import BaseModel, Field if TYPE_CHECKING: + import pooch + from modflow_devtools.dfn import Dfn, DfnSpec __all__ = [ @@ -358,9 +360,7 @@ class LocalDfnRegistry(DfnRegistry): def model_post_init(self, __context) -> None: """Validate and resolve path after initialization.""" - if isinstance(self.path, str): - object.__setattr__(self, "path", Path(self.path)) - object.__setattr__(self, "path", self.path.expanduser().resolve()) + object.__setattr__(self, "path", Path(self.path).expanduser().resolve()) @property def spec(self) -> DfnSpec: From 61e6928bd0e3aa8689dece19f865909e520bbe36 Mon Sep 17 00:00:00 2001 From: w-bonelli Date: Tue, 20 Jan 2026 13:38:33 -0500 Subject: [PATCH 25/36] testing --- .github/workflows/ci.yml | 24 +- autotest/models.toml | 7 + autotest/test_dfn.py | 18 +- autotest/test_dfn_registry.py | 249 ++++++++++++------ autotest/test_models.py | 221 +++++++--------- modflow_devtools/{dfn => dfns}/__init__.py | 28 +- modflow_devtools/{dfn => dfns}/__main__.py | 10 +- modflow_devtools/{dfn => dfns}/dfns.toml | 0 modflow_devtools/{dfn => dfns}/fetch.py | 0 .../{dfn => dfns}/make_registry.py | 0 modflow_devtools/{dfn => dfns}/parse.py | 0 modflow_devtools/{dfn => dfns}/registry.py | 56 +++- .../{dfn => dfns}/schema/block.py | 2 +- .../{dfn => dfns}/schema/field.py | 0 modflow_devtools/{dfn => dfns}/schema/ref.py | 0 modflow_devtools/{dfn => dfns}/schema/v1.py | 2 +- modflow_devtools/{dfn => dfns}/schema/v2.py | 2 +- 17 files changed, 365 insertions(+), 254 deletions(-) create mode 100644 autotest/models.toml rename modflow_devtools/{dfn => dfns}/__init__.py (97%) rename modflow_devtools/{dfn => dfns}/__main__.py (96%) rename modflow_devtools/{dfn => dfns}/dfns.toml (100%) rename modflow_devtools/{dfn => dfns}/fetch.py (100%) rename modflow_devtools/{dfn => dfns}/make_registry.py (100%) rename modflow_devtools/{dfn => dfns}/parse.py (100%) rename modflow_devtools/{dfn => dfns}/registry.py (92%) rename modflow_devtools/{dfn => dfns}/schema/block.py (87%) rename modflow_devtools/{dfn => dfns}/schema/field.py (100%) rename modflow_devtools/{dfn => dfns}/schema/ref.py (100%) rename modflow_devtools/{dfn => dfns}/schema/v1.py (96%) rename modflow_devtools/{dfn => dfns}/schema/v2.py (94%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bd9d92ba..29683872 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -107,8 +107,8 @@ jobs: REPOS_PATH: ${{ github.workspace }} MODFLOW_DEVTOOLS_NO_AUTO_SYNC: 1 # use --dist loadfile to so tests requiring pytest-virtualenv run on the same worker - run: uv run pytest -v -n auto --dist loadfile --durations 0 --ignore test_download.py --ignore test_models.py - + run: uv run pytest -v -n auto --dist loadfile --durations 0 --ignore test_download.py --ignore test_models.py --ignore test_dfn_registry.py + - name: Run network-dependent tests # only invoke the GH API on one OS and Python version # to avoid rate limits (1000 rqs / hour / repository) @@ -118,12 +118,20 @@ jobs: env: REPOS_PATH: ${{ github.workspace }} GITHUB_TOKEN: ${{ github.token }} - TEST_REPO: MODFLOW-ORG/modflow6-testmodels - TEST_REF: develop - TEST_SOURCE: modflow6-testmodels - TEST_SOURCE_NAME: mf6/test - MODFLOW_DEVTOOLS_NO_AUTO_SYNC: 1 - run: uv run pytest -v -n auto --dist loadgroup --durations 0 test_download.py test_models.py + # DFNs API + TEST_DFNS_REPO: wpbonelli/modflow6 + TEST_DFNS_REF: registry + TEST_DFNS_SOURCE: modflow6 + # Models API + TEST_MODELS_REPO: MODFLOW-ORG/modflow6-testmodels + TEST_MODELS_REF: develop + TEST_MODELS_SOURCE: modflow6-testmodels + TEST_MODELS_SOURCE_NAME: mf6/test + # Programs API + TEST_PROGRAMS_REPO: MODFLOW-ORG/modflow6 + TEST_PROGRAMS_REF: develop + TEST_PROGRAMS_SOURCE: modflow6 + run: uv run pytest -v -n auto --dist loadgroup --durations 0 test_download.py test_models.py test_dfn_registry.py rtd: name: Docs diff --git a/autotest/models.toml b/autotest/models.toml new file mode 100644 index 00000000..80a81a54 --- /dev/null +++ b/autotest/models.toml @@ -0,0 +1,7 @@ +schema_version = "1.0" + +[files] + +[models] + +[examples] diff --git a/autotest/test_dfn.py b/autotest/test_dfn.py index 9eb0f401..14ef4af6 100644 --- a/autotest/test_dfn.py +++ b/autotest/test_dfn.py @@ -4,10 +4,10 @@ import pytest from packaging.version import Version -from modflow_devtools.dfn import Dfn, _load_common, load, load_flat -from modflow_devtools.dfn.fetch import fetch_dfns -from modflow_devtools.dfn.schema.v1 import FieldV1 -from modflow_devtools.dfn.schema.v2 import FieldV2 +from modflow_devtools.dfns import Dfn, _load_common, load, load_flat +from modflow_devtools.dfns.fetch import fetch_dfns +from modflow_devtools.dfns.schema.v1 import FieldV1 +from modflow_devtools.dfns.schema.v2 import FieldV2 from modflow_devtools.dfn2toml import convert, is_valid from modflow_devtools.markers import requires_pkg @@ -384,7 +384,7 @@ def test_validate_nonexistent_file(function_tmpdir): def test_fieldv1_to_fieldv2_conversion(): """Test that FieldV1 instances are properly converted to FieldV2.""" - from modflow_devtools.dfn import map + from modflow_devtools.dfns import map dfn_v1 = Dfn( schema_version=Version("1"), @@ -436,7 +436,7 @@ def test_fieldv1_to_fieldv2_conversion(): def test_fieldv1_to_fieldv2_conversion_with_children(): """Test that FieldV1 with nested children are properly converted to FieldV2.""" - from modflow_devtools.dfn import map + from modflow_devtools.dfns import map # Create nested fields for a record child_field_v1 = FieldV1( @@ -483,7 +483,7 @@ def test_fieldv1_to_fieldv2_conversion_with_children(): def test_period_block_conversion(): """Test period block recarray conversion to individual arrays.""" - from modflow_devtools.dfn import map + from modflow_devtools.dfns import map dfn_v1 = Dfn( schema_version=Version("1"), @@ -528,7 +528,7 @@ def test_period_block_conversion(): def test_record_type_conversion(): """Test record type with multiple scalar fields.""" - from modflow_devtools.dfn import map + from modflow_devtools.dfns import map dfn_v1 = Dfn( schema_version=Version("1"), @@ -571,7 +571,7 @@ def test_record_type_conversion(): def test_keystring_type_conversion(): """Test keystring type conversion.""" - from modflow_devtools.dfn import map + from modflow_devtools.dfns import map dfn_v1 = Dfn( schema_version=Version("1"), diff --git a/autotest/test_dfn_registry.py b/autotest/test_dfn_registry.py index 9644d86b..fc2dbdd7 100644 --- a/autotest/test_dfn_registry.py +++ b/autotest/test_dfn_registry.py @@ -1,21 +1,33 @@ -"""Tests for the DFNs API registry infrastructure.""" +""" +Tests for the DFNs API registry infrastructure. + +Tests can be configured via environment variables (loaded from .env file). +""" from __future__ import annotations +import os from pathlib import Path from unittest.mock import patch import pytest from packaging.version import Version -from modflow_devtools.dfn.fetch import fetch_dfns +from modflow_devtools.dfns.fetch import fetch_dfns from modflow_devtools.markers import requires_pkg PROJ_ROOT = Path(__file__).parents[1] DFN_DIR = PROJ_ROOT / "autotest" / "temp" / "dfn" -MF6_OWNER = "MODFLOW-ORG" -MF6_REPO = "modflow6" -MF6_REF = "develop" + +# Test configuration (loaded from .env file via pytest-dotenv plugin) +TEST_DFN_REPO = os.getenv("TEST_DFNS_REPO", "MODFLOW-ORG/modflow6") +TEST_DFN_REF = os.getenv("TEST_DFNS_REF", "develop") +TEST_DFN_SOURCE = os.getenv("TEST_DFNS_SOURCE", "modflow6") + +# For fetching DFN files directly (legacy tests) +MF6_OWNER = TEST_DFN_REPO.split("/")[0] +MF6_REPO = TEST_DFN_REPO.split("/")[1] +MF6_REF = TEST_DFN_REF @pytest.fixture(scope="module") @@ -26,18 +38,13 @@ def dfn_dir(): return DFN_DIR -# ============================================================================= -# DfnSpec Tests -# ============================================================================= - - @requires_pkg("boltons") class TestDfnSpec: """Tests for the DfnSpec class.""" def test_load_from_directory(self, dfn_dir): """Test loading a DfnSpec from a directory of DFN files.""" - from modflow_devtools.dfn import DfnSpec + from modflow_devtools.dfns import DfnSpec spec = DfnSpec.load(dfn_dir) @@ -48,7 +55,7 @@ def test_load_from_directory(self, dfn_dir): def test_load_with_explicit_schema_version(self, dfn_dir): """Test loading with explicit schema version.""" - from modflow_devtools.dfn import DfnSpec + from modflow_devtools.dfns import DfnSpec spec = DfnSpec.load(dfn_dir, schema_version="2") @@ -56,7 +63,7 @@ def test_load_with_explicit_schema_version(self, dfn_dir): def test_mapping_protocol(self, dfn_dir): """Test that DfnSpec implements the Mapping protocol.""" - from modflow_devtools.dfn import DfnSpec + from modflow_devtools.dfns import DfnSpec spec = DfnSpec.load(dfn_dir) @@ -85,7 +92,7 @@ def test_mapping_protocol(self, dfn_dir): def test_getitem_raises_keyerror(self, dfn_dir): """Test that __getitem__ raises KeyError for missing components.""" - from modflow_devtools.dfn import DfnSpec + from modflow_devtools.dfns import DfnSpec spec = DfnSpec.load(dfn_dir) @@ -94,7 +101,7 @@ def test_getitem_raises_keyerror(self, dfn_dir): def test_hierarchical_access(self, dfn_dir): """Test accessing components through the hierarchical tree.""" - from modflow_devtools.dfn import DfnSpec + from modflow_devtools.dfns import DfnSpec spec = DfnSpec.load(dfn_dir) @@ -112,24 +119,19 @@ def test_hierarchical_access(self, dfn_dir): def test_load_empty_directory_raises(self, tmp_path): """Test that loading from empty directory raises ValueError.""" - from modflow_devtools.dfn import DfnSpec + from modflow_devtools.dfns import DfnSpec with pytest.raises(ValueError, match="No DFN files found"): DfnSpec.load(tmp_path) -# ============================================================================= -# Bootstrap and Registry Schema Tests -# ============================================================================= - - @requires_pkg("pydantic") class TestBootstrapConfig: """Tests for bootstrap configuration schemas.""" def test_source_config_defaults(self): """Test SourceConfig default values.""" - from modflow_devtools.dfn.registry import SourceConfig + from modflow_devtools.dfns.registry import SourceConfig config = SourceConfig(repo="owner/repo") @@ -140,7 +142,7 @@ def test_source_config_defaults(self): def test_source_config_custom_values(self): """Test SourceConfig with custom values.""" - from modflow_devtools.dfn.registry import SourceConfig + from modflow_devtools.dfns.registry import SourceConfig config = SourceConfig( repo="custom/repo", @@ -156,7 +158,7 @@ def test_source_config_custom_values(self): def test_bootstrap_config_load(self, tmp_path): """Test loading BootstrapConfig from TOML file.""" - from modflow_devtools.dfn.registry import BootstrapConfig + from modflow_devtools.dfns.registry import BootstrapConfig config_file = tmp_path / "dfns.toml" config_file.write_text(""" @@ -173,7 +175,7 @@ def test_bootstrap_config_load(self, tmp_path): def test_bootstrap_config_load_nonexistent(self, tmp_path): """Test loading from nonexistent file returns empty config.""" - from modflow_devtools.dfn.registry import BootstrapConfig + from modflow_devtools.dfns.registry import BootstrapConfig config = BootstrapConfig.load(tmp_path / "nonexistent.toml") @@ -181,7 +183,7 @@ def test_bootstrap_config_load_nonexistent(self, tmp_path): def test_bootstrap_config_merge(self): """Test merging two bootstrap configs.""" - from modflow_devtools.dfn.registry import BootstrapConfig, SourceConfig + from modflow_devtools.dfns.registry import BootstrapConfig, SourceConfig base = BootstrapConfig( sources={ @@ -208,7 +210,7 @@ def test_bootstrap_config_merge(self): def test_get_bootstrap_config(self): """Test loading bundled bootstrap config.""" - from modflow_devtools.dfn.registry import get_bootstrap_config + from modflow_devtools.dfns.registry import get_bootstrap_config config = get_bootstrap_config() @@ -220,28 +222,9 @@ def test_get_bootstrap_config(self): class TestRegistryMeta: """Tests for registry metadata schemas.""" - def test_dfn_registry_file(self): - """Test DfnRegistryFile schema.""" - from modflow_devtools.dfn.registry import DfnRegistryFile - - file_entry = DfnRegistryFile(hash="sha256:abc123") - assert file_entry.hash == "sha256:abc123" - - def test_dfn_registry_meta_defaults(self): - """Test DfnRegistryMeta default values.""" - from modflow_devtools.dfn.registry import DfnRegistryMeta - - meta = DfnRegistryMeta() - - assert meta.schema_version == "1.0" - assert meta.generated_at is None - assert meta.devtools_version is None - assert meta.ref is None - assert meta.files == {} - def test_dfn_registry_meta_load(self, tmp_path): """Test loading DfnRegistryMeta from TOML file.""" - from modflow_devtools.dfn.registry import DfnRegistryMeta + from modflow_devtools.dfns.registry import DfnRegistryMeta registry_file = tmp_path / "dfns.toml" registry_file.write_text(""" @@ -269,7 +252,7 @@ def test_dfn_registry_meta_save(self, tmp_path): """Test saving DfnRegistryMeta to TOML file.""" import tomli - from modflow_devtools.dfn.registry import DfnRegistryFile, DfnRegistryMeta + from modflow_devtools.dfns.registry import DfnRegistryFile, DfnRegistryMeta meta = DfnRegistryMeta( schema_version="1.0", @@ -292,11 +275,6 @@ def test_dfn_registry_meta_save(self, tmp_path): assert data["files"]["test.dfn"]["hash"] == "sha256:abc123" -# ============================================================================= -# LocalDfnRegistry Tests -# ============================================================================= - - @requires_pkg("boltons", "pydantic") class TestLocalDfnRegistry: """Tests for LocalDfnRegistry class.""" @@ -374,18 +352,13 @@ def test_components_property(self, dfn_dir): assert components["gwf-chd"].name == "gwf-chd" -# ============================================================================= -# Cache Utilities Tests -# ============================================================================= - - @requires_pkg("pydantic") class TestCacheUtilities: """Tests for cache and config utilities.""" def test_get_cache_dir(self): """Test getting cache directory path.""" - from modflow_devtools.dfn.registry import get_cache_dir + from modflow_devtools.dfns.registry import get_cache_dir cache_dir = get_cache_dir("dfn") @@ -394,7 +367,7 @@ def test_get_cache_dir(self): def test_get_user_config_path(self): """Test getting user config path.""" - from modflow_devtools.dfn.registry import get_user_config_path + from modflow_devtools.dfns.registry import get_user_config_path config_path = get_user_config_path("dfn") @@ -403,18 +376,13 @@ def test_get_user_config_path(self): def test_get_cache_dir_custom_subdir(self): """Test cache dir with custom subdirectory.""" - from modflow_devtools.dfn.registry import get_cache_dir + from modflow_devtools.dfns.registry import get_cache_dir cache_dir = get_cache_dir("custom") assert cache_dir.name == "custom" -# ============================================================================= -# make_registry Tool Tests -# ============================================================================= - - @requires_pkg("tomli", "tomli_w") class TestMakeRegistry: """Tests for the registry generation tool.""" @@ -507,14 +475,9 @@ def test_cli_generate(self, dfn_dir, tmp_path): assert output_path.exists() -# ============================================================================= -# CLI Tests -# ============================================================================= - - @requires_pkg("pydantic") class TestCLI: - """Tests for the dfn CLI.""" + """Tests for the DFNs CLI.""" def test_main_help(self): """Test CLI help output.""" @@ -550,12 +513,7 @@ def test_sync_command_no_registry(self): assert result == 1 -# ============================================================================= -# RemoteDfnRegistry Tests (Mocked) -# ============================================================================= - - -@requires_pkg("pydantic", "pooch") +@requires_pkg("pydantic", "pooch", "boltons") class TestRemoteDfnRegistry: """Tests for RemoteDfnRegistry with mocked network calls.""" @@ -577,7 +535,7 @@ def test_unknown_source_raises(self): def test_construct_raw_url(self): """Test URL construction.""" - from modflow_devtools.dfn.registry import RemoteDfnRegistry + from modflow_devtools.dfns.registry import RemoteDfnRegistry registry = RemoteDfnRegistry(source="modflow6", ref="6.6.0") @@ -589,7 +547,7 @@ def test_construct_raw_url(self): def test_get_registry_cache_path(self): """Test getting registry cache path.""" - from modflow_devtools.dfn.registry import RemoteDfnRegistry + from modflow_devtools.dfns.registry import RemoteDfnRegistry registry = RemoteDfnRegistry(source="modflow6", ref="6.6.0") @@ -602,7 +560,7 @@ def test_get_registry_cache_path(self): def test_get_files_cache_dir(self): """Test getting files cache directory.""" - from modflow_devtools.dfn.registry import RemoteDfnRegistry + from modflow_devtools.dfns.registry import RemoteDfnRegistry registry = RemoteDfnRegistry(source="modflow6", ref="6.6.0") @@ -614,7 +572,7 @@ def test_get_files_cache_dir(self): def test_fetch_registry_not_found(self): """Test that fetching nonexistent registry raises appropriate error.""" - from modflow_devtools.dfn.registry import ( + from modflow_devtools.dfns.registry import ( DfnRegistryNotFoundError, RemoteDfnRegistry, ) @@ -624,10 +582,129 @@ def test_fetch_registry_not_found(self): with pytest.raises(DfnRegistryNotFoundError): registry._fetch_registry(force=True) + def test_init_with_repo_override(self): + """Test RemoteDfnRegistry with repo override.""" + from modflow_devtools.dfn import RemoteDfnRegistry + + registry = RemoteDfnRegistry( + source=TEST_DFN_SOURCE, + ref=TEST_DFN_REF, + repo=TEST_DFN_REPO, + ) + + assert registry.source == TEST_DFN_SOURCE + assert registry.ref == TEST_DFN_REF + assert registry.repo == TEST_DFN_REPO -# ============================================================================= -# Module-level Convenience Functions Tests -# ============================================================================= + def test_construct_raw_url_with_repo_override(self): + """Test URL construction with repo override.""" + from modflow_devtools.dfns.registry import RemoteDfnRegistry + + registry = RemoteDfnRegistry( + source=TEST_DFN_SOURCE, + ref=TEST_DFN_REF, + repo=TEST_DFN_REPO, + ) + + url = registry._construct_raw_url("doc/mf6io/mf6ivar/dfn") + + assert "raw.githubusercontent.com" in url + assert TEST_DFN_REPO in url + assert TEST_DFN_REF in url + + def test_fetch_registry(self): + """Test fetching registry from the test repository.""" + from modflow_devtools.dfns.registry import RemoteDfnRegistry + + registry = RemoteDfnRegistry( + source=TEST_DFN_SOURCE, + ref=TEST_DFN_REF, + repo=TEST_DFN_REPO, + ) + + meta = registry._fetch_registry(force=True) + + assert meta is not None + assert len(meta.files) > 0 + # Registry file may have a different ref than what we requested + # (e.g., generated from develop branch but accessed on registry branch) + assert meta.ref is not None + + def test_sync_files(self): + """Test syncing DFN files from the test repository.""" + from modflow_devtools.dfns.registry import RemoteDfnRegistry + + registry = RemoteDfnRegistry( + source=TEST_DFN_SOURCE, + ref=TEST_DFN_REF, + repo=TEST_DFN_REPO, + ) + + # Sync should succeed (fetches registry and sets up pooch) + registry.sync(force=True) + + # Should be able to fetch a DFN file + path = registry.get_dfn_path("gwf-chd") + assert path.exists() + + def test_get_dfn(self): + """Test getting a DFN from the test repository.""" + from modflow_devtools.dfn import Dfn + from modflow_devtools.dfns.registry import RemoteDfnRegistry + + registry = RemoteDfnRegistry( + source=TEST_DFN_SOURCE, + ref=TEST_DFN_REF, + repo=TEST_DFN_REPO, + ) + + # Ensure synced + registry.sync() + + dfn = registry.get_dfn("gwf-chd") + + assert isinstance(dfn, Dfn) + assert dfn.name == "gwf-chd" + + def test_get_spec(self): + """Test getting the full spec from the test repository.""" + from modflow_devtools.dfns import DfnSpec + from modflow_devtools.dfns.registry import RemoteDfnRegistry + + registry = RemoteDfnRegistry( + source=TEST_DFN_SOURCE, + ref=TEST_DFN_REF, + repo=TEST_DFN_REPO, + ) + + # Ensure synced + registry.sync() + + spec = registry.spec + + assert isinstance(spec, DfnSpec) + assert "gwf-chd" in spec + assert "sim-nam" in spec + + def test_list_components(self): + """Test listing available components from the test repository.""" + from modflow_devtools.dfns.registry import RemoteDfnRegistry + + registry = RemoteDfnRegistry( + source=TEST_DFN_SOURCE, + ref=TEST_DFN_REF, + repo=TEST_DFN_REPO, + ) + + # Ensure synced + registry.sync() + + # Use spec.keys() to list components + components = list(registry.spec.keys()) + + assert len(components) > 100 + assert "gwf-chd" in components + assert "sim-nam" in components @requires_pkg("boltons", "pydantic") @@ -647,7 +724,7 @@ def test_list_components_local(self, dfn_dir): def test_get_sync_status(self): """Test get_sync_status function.""" - from modflow_devtools.dfn.registry import get_sync_status + from modflow_devtools.dfns.registry import get_sync_status status = get_sync_status() diff --git a/autotest/test_models.py b/autotest/test_models.py index 14cd99c6..a8a7e637 100644 --- a/autotest/test_models.py +++ b/autotest/test_models.py @@ -21,16 +21,10 @@ get_user_config_path, ) -# Test configuration (loaded from .env file via pytest-dotenv plugin) -TEST_REPO = os.getenv("TEST_REPO", "wpbonelli/modflow6-testmodels") -TEST_REF = os.getenv("TEST_REF", "registry") -TEST_SOURCE = os.getenv("TEST_SOURCE", "modflow6-testmodels") -TEST_SOURCE_NAME = os.getenv("TEST_SOURCE_NAME", "mf6/test") - - -# ============================================================================ -# Tests (Dynamic Registry) -# ============================================================================ +TEST_MODELS_REPO = os.getenv("TEST_MODELS_REPO", "wpbonelli/modflow6-testmodels") +TEST_MODELS_REF = os.getenv("TEST_MODELS_REF", "registry") +TEST_MODELS_SOURCE = os.getenv("TEST_MODELS_SOURCE", "modflow6-testmodels") +TEST_MODELS_SOURCE_NAME = os.getenv("TEST_MODELS_SOURCE_NAME", "mf6/test") class TestBootstrap: @@ -45,15 +39,14 @@ def test_load_bootstrap(self): def test_bootstrap_has_testmodels(self): """Test that testmodels is configured.""" bootstrap = ModelSourceConfig.load() - assert TEST_SOURCE in bootstrap.sources + assert TEST_MODELS_SOURCE in bootstrap.sources def test_bootstrap_testmodels_config(self): """Test testmodels configuration in bundled config (without user overlay).""" - # Load bundled config explicitly (no user config overlay) bundled_path = Path(__file__).parent.parent / "modflow_devtools" / "models" / "models.toml" bootstrap = ModelSourceConfig.load(bootstrap_path=bundled_path) - testmodels = bootstrap.sources[TEST_SOURCE] - # Bundled config should point to MODFLOW-ORG + testmodels = bootstrap.sources[TEST_MODELS_SOURCE] + assert "MODFLOW-ORG/modflow6-testmodels" in testmodels.repo assert "develop" in testmodels.refs or "master" in testmodels.refs @@ -134,8 +127,8 @@ def test_load_bootstrap_with_user_config(self, tmp_path): assert bootstrap.sources["custom-models"].repo == "user/custom-models" # Check that user config overrode bundled config for testmodels - if TEST_SOURCE in bootstrap.sources: - assert bootstrap.sources[TEST_SOURCE].repo == "user/modflow6-testmodels-fork" + if TEST_MODELS_SOURCE in bootstrap.sources: + assert bootstrap.sources[TEST_MODELS_SOURCE].repo == "user/modflow6-testmodels-fork" def test_load_bootstrap_explicit_path_no_overlay(self, tmp_path): """Test that explicit bootstrap path doesn't default to user config overlay.""" @@ -206,7 +199,7 @@ class TestBootstrapSourceMethods: def test_source_has_sync_method(self): """Test that ModelSourceRepo has sync method.""" bootstrap = ModelSourceConfig.load() - source = bootstrap.sources[TEST_SOURCE] + source = bootstrap.sources[TEST_MODELS_SOURCE] assert hasattr(source, "sync") assert callable(source.sync) @@ -224,13 +217,14 @@ def test_get_cache_root(self): def test_get_registry_cache_dir(self): """Test getting registry cache directory for a source/ref.""" - cache_dir = _DEFAULT_CACHE.get_registry_cache_dir(TEST_SOURCE_NAME, TEST_REF) + cache_dir = _DEFAULT_CACHE.get_registry_cache_dir(TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) # Normalize path separators for comparison (Windows uses \, Unix uses /) cache_dir_str = str(cache_dir).replace("\\", "/") assert ( - TEST_SOURCE_NAME in cache_dir_str or TEST_SOURCE_NAME.replace("/", "-") in cache_dir_str + TEST_MODELS_SOURCE_NAME in cache_dir_str + or TEST_MODELS_SOURCE_NAME.replace("/", "-") in cache_dir_str ) - assert TEST_REF in str(cache_dir) + assert TEST_MODELS_REF in str(cache_dir) assert "registries" in str(cache_dir) @@ -241,24 +235,24 @@ def test_discover_registry(self): """Test discovering registry for test repo.""" # Use test repo/ref from environment source = ModelSourceRepo( - repo=TEST_REPO, - name=TEST_SOURCE_NAME, - refs=[TEST_REF], + repo=TEST_MODELS_REPO, + name=TEST_MODELS_SOURCE_NAME, + refs=[TEST_MODELS_REF], ) - discovered = source.discover(ref=TEST_REF) + discovered = source.discover(ref=TEST_MODELS_REF) assert isinstance(discovered, DiscoveredModelRegistry) - assert discovered.source == TEST_SOURCE_NAME - assert discovered.ref == TEST_REF + assert discovered.source == TEST_MODELS_SOURCE_NAME + assert discovered.ref == TEST_MODELS_REF assert discovered.mode == "version_controlled" assert isinstance(discovered.registry, ModelRegistry) def test_discover_registry_nonexistent_ref(self): """Test that discovery fails gracefully for nonexistent ref.""" source = ModelSourceRepo( - repo=TEST_REPO, - name=TEST_SOURCE_NAME, + repo=TEST_MODELS_REPO, + name=TEST_MODELS_SOURCE_NAME, refs=["nonexistent-branch-12345"], ) @@ -272,125 +266,115 @@ class TestSync: def test_sync_single_source_single_ref(self): """Test syncing a single source/ref.""" - _DEFAULT_CACHE.clear(source=TEST_SOURCE_NAME, ref=TEST_REF) + _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) source = ModelSourceRepo( - repo=TEST_REPO, - name=TEST_SOURCE_NAME, - refs=[TEST_REF], + repo=TEST_MODELS_REPO, + name=TEST_MODELS_SOURCE_NAME, + refs=[TEST_MODELS_REF], + verbose=True, ) - result = source.sync(ref=TEST_REF, verbose=True) + result = source.sync(ref=TEST_MODELS_REF, verbose=True) assert len(result.synced) == 1 assert len(result.failed) == 0 - assert (TEST_SOURCE_NAME, TEST_REF) in result.synced + assert (TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) in result.synced def test_sync_creates_cache(self): """Test that sync creates cached registry.""" - _DEFAULT_CACHE.clear(source=TEST_SOURCE_NAME, ref=TEST_REF) - assert not _DEFAULT_CACHE.has(TEST_SOURCE_NAME, TEST_REF) + _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) + assert not _DEFAULT_CACHE.has(TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) source = ModelSourceRepo( - repo=TEST_REPO, - name=TEST_SOURCE_NAME, - refs=[TEST_REF], + repo=TEST_MODELS_REPO, + name=TEST_MODELS_SOURCE_NAME, + refs=[TEST_MODELS_REF], ) - source.sync(ref=TEST_REF) - - assert _DEFAULT_CACHE.has(TEST_SOURCE_NAME, TEST_REF) + source.sync(ref=TEST_MODELS_REF) + assert _DEFAULT_CACHE.has(TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) def test_sync_skip_cached(self): """Test that sync skips already-cached registries.""" - _DEFAULT_CACHE.clear(source=TEST_SOURCE_NAME, ref=TEST_REF) + _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) source = ModelSourceRepo( - repo=TEST_REPO, - name=TEST_SOURCE_NAME, - refs=[TEST_REF], + repo=TEST_MODELS_REPO, + name=TEST_MODELS_SOURCE_NAME, + refs=[TEST_MODELS_REF], ) # First sync - result1 = source.sync(ref=TEST_REF) + result1 = source.sync(ref=TEST_MODELS_REF) assert len(result1.synced) == 1 # Second sync should skip - result2 = source.sync(ref=TEST_REF) + result2 = source.sync(ref=TEST_MODELS_REF) assert len(result2.synced) == 0 assert len(result2.skipped) == 1 def test_sync_force(self): """Test that force flag re-syncs cached registries.""" - _DEFAULT_CACHE.clear(source=TEST_SOURCE_NAME, ref=TEST_REF) + _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) source = ModelSourceRepo( - repo=TEST_REPO, - name=TEST_SOURCE_NAME, - refs=[TEST_REF], + repo=TEST_MODELS_REPO, + name=TEST_MODELS_SOURCE_NAME, + refs=[TEST_MODELS_REF], ) # First sync - result_initial = source.sync(ref=TEST_REF) + result_initial = source.sync(ref=TEST_MODELS_REF) assert len(result_initial.failed) == 0, f"Initial sync failed: {result_initial.failed}" # Force sync - result = source.sync(ref=TEST_REF, force=True) + result = source.sync(ref=TEST_MODELS_REF, force=True) assert len(result.synced) == 1 assert len(result.skipped) == 0 def test_sync_via_source_method(self): """Test syncing via ModelSourceRepo.sync() method.""" - _DEFAULT_CACHE.clear(source=TEST_SOURCE_NAME, ref=TEST_REF) + _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) # Create source with test repo override source = ModelSourceRepo( - repo=TEST_REPO, - name=TEST_SOURCE_NAME, - refs=[TEST_REF], + repo=TEST_MODELS_REPO, + name=TEST_MODELS_SOURCE_NAME, + refs=[TEST_MODELS_REF], ) # Sync via source method - result = source.sync(ref=TEST_REF, verbose=True) + result = source.sync(ref=TEST_MODELS_REF, verbose=True) assert len(result.synced) == 1 - assert (TEST_SOURCE_NAME, TEST_REF) in result.synced + assert (TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) in result.synced def test_source_is_synced_method(self): """Test ModelSourceRepo.is_synced() method.""" - _DEFAULT_CACHE.clear(source=TEST_SOURCE_NAME, ref=TEST_REF) + _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) source = ModelSourceRepo( - repo=TEST_REPO, - name=TEST_SOURCE_NAME, - refs=[TEST_REF], + repo=TEST_MODELS_REPO, + name=TEST_MODELS_SOURCE_NAME, + refs=[TEST_MODELS_REF], ) - # Should not be synced initially - assert not source.is_synced(TEST_REF) - - # Sync - source.sync(ref=TEST_REF) - - # Should be synced now - assert source.is_synced(TEST_REF) + assert not source.is_synced(TEST_MODELS_REF) + source.sync(ref=TEST_MODELS_REF) + assert source.is_synced(TEST_MODELS_REF) def test_source_list_synced_refs_method(self): """Test ModelSourceRepo.list_synced_refs() method.""" - _DEFAULT_CACHE.clear(source=TEST_SOURCE_NAME, ref=TEST_REF) + _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) source = ModelSourceRepo( - repo=TEST_REPO, - name=TEST_SOURCE_NAME, - refs=[TEST_REF], + repo=TEST_MODELS_REPO, + name=TEST_MODELS_SOURCE_NAME, + refs=[TEST_MODELS_REF], ) - # Should have no synced refs initially - assert TEST_REF not in source.list_synced_refs() - - # Sync - source.sync(ref=TEST_REF) - - # Should show in synced refs - assert TEST_REF in source.list_synced_refs() + assert TEST_MODELS_REF not in source.list_synced_refs() + source.sync(ref=TEST_MODELS_REF) + assert TEST_MODELS_REF in source.list_synced_refs() @pytest.mark.xdist_group("registry_cache") @@ -400,15 +384,15 @@ class TestRegistry: @pytest.fixture(scope="class") def synced_registry(self): """Fixture that syncs and loads a registry once for all tests.""" - _DEFAULT_CACHE.clear(source=TEST_SOURCE_NAME, ref=TEST_REF) + _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) source = ModelSourceRepo( - repo=TEST_REPO, - name=TEST_SOURCE_NAME, - refs=[TEST_REF], + repo=TEST_MODELS_REPO, + name=TEST_MODELS_SOURCE_NAME, + refs=[TEST_MODELS_REF], ) - result = source.sync(ref=TEST_REF) + result = source.sync(ref=TEST_MODELS_REF) assert len(result.failed) == 0, f"Fixture sync failed: {result.failed}" - registry = _DEFAULT_CACHE.load(TEST_SOURCE_NAME, TEST_REF) + registry = _DEFAULT_CACHE.load(TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) return registry def test_registry_has_metadata(self, synced_registry): @@ -419,15 +403,12 @@ def test_registry_has_metadata(self, synced_registry): def test_registry_has_files(self, synced_registry): """Test that registry has files.""" assert len(synced_registry.files) > 0 - # Check file structure first_file = next(iter(synced_registry.files.values())) assert hasattr(first_file, "hash") - # Note: url field removed in v2 (dynamic URL construction) def test_registry_has_models(self, synced_registry): """Test that registry has models.""" assert len(synced_registry.models) > 0 - # Check model structure first_model_files = next(iter(synced_registry.models.values())) assert isinstance(first_model_files, list) assert len(first_model_files) > 0 @@ -452,7 +433,7 @@ def test_cli_info(self, capsys): cmd_info(args) captured = capsys.readouterr() - assert TEST_SOURCE in captured.out or TEST_SOURCE_NAME in captured.out + assert TEST_MODELS_SOURCE in captured.out or TEST_MODELS_SOURCE_NAME in captured.out def test_cli_list_empty(self, capsys): """Test 'list' command with no cached registries.""" @@ -470,18 +451,18 @@ def test_cli_list_empty(self, capsys): def test_cli_list_with_cache(self, capsys): """Test 'list' command with cached registries.""" - _DEFAULT_CACHE.clear(source=TEST_SOURCE_NAME, ref=TEST_REF) + _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) source = ModelSourceRepo( - repo=TEST_REPO, - name=TEST_SOURCE_NAME, - refs=[TEST_REF], + repo=TEST_MODELS_REPO, + name=TEST_MODELS_SOURCE_NAME, + refs=[TEST_MODELS_REF], ) - result = source.sync(ref=TEST_REF) + result = source.sync(ref=TEST_MODELS_REF) # Verify sync succeeded before testing list command assert len(result.failed) == 0, f"Sync failed: {result.failed}" assert len(result.synced) == 1, f"Expected 1 synced registry, got {len(result.synced)}" - assert (TEST_SOURCE_NAME, TEST_REF) in result.synced + assert (TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) in result.synced import argparse @@ -491,7 +472,7 @@ def test_cli_list_with_cache(self, capsys): cmd_list(args) captured = capsys.readouterr() - assert f"{TEST_SOURCE_NAME}@{TEST_REF}" in captured.out + assert f"{TEST_MODELS_SOURCE_NAME}@{TEST_MODELS_REF}" in captured.out assert "Models:" in captured.out def test_cli_clear(self, capsys): @@ -530,49 +511,43 @@ class TestIntegration: def test_full_workflow(self): """Test complete workflow: discover -> cache -> load.""" - # Clear cache - _DEFAULT_CACHE.clear(source=TEST_SOURCE_NAME, ref=TEST_REF) + _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) - # Create test source source = ModelSourceRepo( - repo=TEST_REPO, - name=TEST_SOURCE_NAME, - refs=[TEST_REF], + repo=TEST_MODELS_REPO, + name=TEST_MODELS_SOURCE_NAME, + refs=[TEST_MODELS_REF], ) - # Discover registry - discovered = source.discover(ref=TEST_REF) + discovered = source.discover(ref=TEST_MODELS_REF) assert isinstance(discovered.registry, ModelRegistry) - # Cache registry - cache_path = _DEFAULT_CACHE.save(discovered.registry, TEST_SOURCE_NAME, TEST_REF) + cache_path = _DEFAULT_CACHE.save( + discovered.registry, TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF + ) assert cache_path.exists() - # Load from cache - loaded = _DEFAULT_CACHE.load(TEST_SOURCE_NAME, TEST_REF) + loaded = _DEFAULT_CACHE.load(TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) assert loaded is not None assert len(loaded.models) == len(discovered.registry.models) def test_sync_and_list_models(self): """Test syncing and listing available models.""" - _DEFAULT_CACHE.clear(source=TEST_SOURCE_NAME, ref=TEST_REF) + _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) - # Sync source = ModelSourceRepo( - repo=TEST_REPO, - name=TEST_SOURCE_NAME, - refs=[TEST_REF], + repo=TEST_MODELS_REPO, + name=TEST_MODELS_SOURCE_NAME, + refs=[TEST_MODELS_REF], ) - result = source.sync(ref=TEST_REF) + result = source.sync(ref=TEST_MODELS_REF) assert len(result.synced) == 1 - # List cached registries cached = _DEFAULT_CACHE.list() assert len(cached) >= 1 - assert (TEST_SOURCE_NAME, TEST_REF) in cached + assert (TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) in cached - # Load and check models - registry = _DEFAULT_CACHE.load(TEST_SOURCE_NAME, TEST_REF) + registry = _DEFAULT_CACHE.load(TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) assert len(registry.models) > 0 diff --git a/modflow_devtools/dfn/__init__.py b/modflow_devtools/dfns/__init__.py similarity index 97% rename from modflow_devtools/dfn/__init__.py rename to modflow_devtools/dfns/__init__.py index 1ee34e1f..75214dd2 100644 --- a/modflow_devtools/dfn/__init__.py +++ b/modflow_devtools/dfns/__init__.py @@ -2,6 +2,7 @@ MODFLOW 6 definition file tools. """ +import warnings from abc import ABC, abstractmethod from collections.abc import Iterator, Mapping from dataclasses import asdict, dataclass, field, replace @@ -18,19 +19,30 @@ from boltons.iterutils import remap from packaging.version import Version -from modflow_devtools.dfn.parse import ( +# Experimental API warning +warnings.warn( + "The modflow_devtools.dfns API is experimental and may change or be " + "removed in future versions without following normal deprecation procedures. " + "Use at your own risk. To suppress this warning, use:\n" + " warnings.filterwarnings('ignore', " + "message='.*modflow_devtools.dfns.*experimental.*')", + FutureWarning, + stacklevel=2, +) + +from modflow_devtools.dfns.parse import ( is_advanced_package, is_multi_package, parse_dfn, try_parse_bool, try_parse_parent, ) -from modflow_devtools.dfn.schema.block import Block, Blocks, block_sort_key -from modflow_devtools.dfn.schema.field import Field, Fields -from modflow_devtools.dfn.schema.ref import Ref -from modflow_devtools.dfn.schema.v1 import SCALAR_TYPES as V1_SCALAR_TYPES -from modflow_devtools.dfn.schema.v1 import FieldV1 -from modflow_devtools.dfn.schema.v2 import FieldV2 +from modflow_devtools.dfns.schema.block import Block, Blocks, block_sort_key +from modflow_devtools.dfns.schema.field import Field, Fields +from modflow_devtools.dfns.schema.ref import Ref +from modflow_devtools.dfns.schema.v1 import SCALAR_TYPES as V1_SCALAR_TYPES +from modflow_devtools.dfns.schema.v1 import FieldV1 +from modflow_devtools.dfns.schema.v2 import FieldV2 from modflow_devtools.misc import drop_none_or_empty, try_literal_eval __all__ = [ @@ -742,7 +754,7 @@ def is_valid(path: str | PathLike, format: str = "dfn", verbose: bool = False) - def _get_registry_module(): """Lazy import of registry module to avoid circular imports.""" - from modflow_devtools.dfn import registry + from modflow_devtools.dfns import registry return registry diff --git a/modflow_devtools/dfn/__main__.py b/modflow_devtools/dfns/__main__.py similarity index 96% rename from modflow_devtools/dfn/__main__.py rename to modflow_devtools/dfns/__main__.py index d9a216f9..7a39ad41 100644 --- a/modflow_devtools/dfn/__main__.py +++ b/modflow_devtools/dfns/__main__.py @@ -2,10 +2,10 @@ Command-line interface for the DFNs API. Usage: - python -m modflow_devtools.dfn sync [--ref REF] [--force] - python -m modflow_devtools.dfn info - python -m modflow_devtools.dfn list [--ref REF] - python -m modflow_devtools.dfn clean [--all] + python -m modflow_devtools.dfns sync [--ref REF] [--force] + python -m modflow_devtools.dfns info + python -m modflow_devtools.dfns list [--ref REF] + python -m modflow_devtools.dfns clean [--all] """ from __future__ import annotations @@ -14,7 +14,7 @@ import shutil import sys -from modflow_devtools.dfn.registry import ( +from modflow_devtools.dfns.registry import ( DfnRegistryDiscoveryError, DfnRegistryNotFoundError, get_bootstrap_config, diff --git a/modflow_devtools/dfn/dfns.toml b/modflow_devtools/dfns/dfns.toml similarity index 100% rename from modflow_devtools/dfn/dfns.toml rename to modflow_devtools/dfns/dfns.toml diff --git a/modflow_devtools/dfn/fetch.py b/modflow_devtools/dfns/fetch.py similarity index 100% rename from modflow_devtools/dfn/fetch.py rename to modflow_devtools/dfns/fetch.py diff --git a/modflow_devtools/dfn/make_registry.py b/modflow_devtools/dfns/make_registry.py similarity index 100% rename from modflow_devtools/dfn/make_registry.py rename to modflow_devtools/dfns/make_registry.py diff --git a/modflow_devtools/dfn/parse.py b/modflow_devtools/dfns/parse.py similarity index 100% rename from modflow_devtools/dfn/parse.py rename to modflow_devtools/dfns/parse.py diff --git a/modflow_devtools/dfn/registry.py b/modflow_devtools/dfns/registry.py similarity index 92% rename from modflow_devtools/dfn/registry.py rename to modflow_devtools/dfns/registry.py index 4decdd17..bb8bebfc 100644 --- a/modflow_devtools/dfn/registry.py +++ b/modflow_devtools/dfns/registry.py @@ -22,7 +22,7 @@ if TYPE_CHECKING: import pooch - from modflow_devtools.dfn import Dfn, DfnSpec + from modflow_devtools.dfns import Dfn, DfnSpec __all__ = [ "BootstrapConfig", @@ -366,7 +366,7 @@ def model_post_init(self, __context) -> None: def spec(self) -> DfnSpec: """Load and return the DFN specification from local files.""" if self._spec is None: - from modflow_devtools.dfn import DfnSpec + from modflow_devtools.dfns import DfnSpec self._spec = DfnSpec.load(self.path) return self._spec @@ -386,15 +386,37 @@ class RemoteDfnRegistry(DfnRegistry): Registry for remote DFN files with Pooch-based caching. Handles remote registry discovery, caching, and DFN file fetching. - URLs are constructed dynamically from bootstrap metadata. + URLs are constructed dynamically from bootstrap metadata, or can be + overridden by providing explicit repo/dfn_path/registry_path values. Examples -------- + >>> # Use bootstrap config >>> registry = RemoteDfnRegistry(source="modflow6", ref="6.6.0") >>> dfn = registry.get_dfn("gwf-chd") - >>> path = registry.get_dfn_path("gwf-chd") + + >>> # Override repo directly (useful for testing) + >>> registry = RemoteDfnRegistry( + ... source="modflow6", + ... ref="registry", + ... repo="wpbonelli/modflow6", + ... ) """ + # Optional overrides (bypass bootstrap config when provided) + repo: str | None = Field( + default=None, + description="GitHub repository (owner/repo). Overrides bootstrap config.", + ) + dfn_path: str | None = Field( + default=None, + description="Path to DFN files in repo. Overrides bootstrap config.", + ) + registry_path: str | None = Field( + default=None, + description="Path to registry file in repo. Overrides bootstrap config.", + ) + _registry_meta: DfnRegistryMeta | None = None _source_config: SourceConfig | None = None _pooch: pooch.Pooch | None = None @@ -405,15 +427,25 @@ def model_post_init(self, __context) -> None: self._ensure_source_config() def _ensure_source_config(self) -> SourceConfig: - """Load and cache source configuration from bootstrap.""" + """Load and cache source configuration from bootstrap or overrides.""" if self._source_config is None: - config = get_bootstrap_config() - if self.source not in config.sources: - raise ValueError( - f"Unknown source '{self.source}'. " - f"Available sources: {list(config.sources.keys())}" + # If repo is provided, construct config from overrides + if self.repo is not None: + self._source_config = SourceConfig( + repo=self.repo, + dfn_path=self.dfn_path or "doc/mf6io/mf6ivar/dfn", + registry_path=self.registry_path or ".registry/dfns.toml", + refs=[self.ref], ) - self._source_config = config.sources[self.source] + else: + # Load from bootstrap config + config = get_bootstrap_config() + if self.source not in config.sources: + raise ValueError( + f"Unknown source '{self.source}'. " + f"Available sources: {list(config.sources.keys())}" + ) + self._source_config = config.sources[self.source] return self._source_config def _get_registry_cache_path(self) -> Path: @@ -550,7 +582,7 @@ def registry_meta(self) -> DfnRegistryMeta: def spec(self) -> DfnSpec: """Load and return the DFN specification from cached files.""" if self._spec is None: - from modflow_devtools.dfn import DfnSpec + from modflow_devtools.dfns import DfnSpec # Ensure all files are fetched self._fetch_all_files() diff --git a/modflow_devtools/dfn/schema/block.py b/modflow_devtools/dfns/schema/block.py similarity index 87% rename from modflow_devtools/dfn/schema/block.py rename to modflow_devtools/dfns/schema/block.py index ed0f32af..f2c3f78b 100644 --- a/modflow_devtools/dfn/schema/block.py +++ b/modflow_devtools/dfns/schema/block.py @@ -1,6 +1,6 @@ from collections.abc import Mapping -from modflow_devtools.dfn.schema.field import Fields +from modflow_devtools.dfns.schema.field import Fields Block = Fields Blocks = Mapping[str, Block] diff --git a/modflow_devtools/dfn/schema/field.py b/modflow_devtools/dfns/schema/field.py similarity index 100% rename from modflow_devtools/dfn/schema/field.py rename to modflow_devtools/dfns/schema/field.py diff --git a/modflow_devtools/dfn/schema/ref.py b/modflow_devtools/dfns/schema/ref.py similarity index 100% rename from modflow_devtools/dfn/schema/ref.py rename to modflow_devtools/dfns/schema/ref.py diff --git a/modflow_devtools/dfn/schema/v1.py b/modflow_devtools/dfns/schema/v1.py similarity index 96% rename from modflow_devtools/dfn/schema/v1.py rename to modflow_devtools/dfns/schema/v1.py index c1669f52..c57d9daf 100644 --- a/modflow_devtools/dfn/schema/v1.py +++ b/modflow_devtools/dfns/schema/v1.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import Literal -from modflow_devtools.dfn.schema.field import Field +from modflow_devtools.dfns.schema.field import Field FieldType = Literal[ "keyword", diff --git a/modflow_devtools/dfn/schema/v2.py b/modflow_devtools/dfns/schema/v2.py similarity index 94% rename from modflow_devtools/dfn/schema/v2.py rename to modflow_devtools/dfns/schema/v2.py index c26c25c6..bf676e16 100644 --- a/modflow_devtools/dfn/schema/v2.py +++ b/modflow_devtools/dfns/schema/v2.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import Literal -from modflow_devtools.dfn.schema.field import Field +from modflow_devtools.dfns.schema.field import Field FieldType = Literal["keyword", "integer", "double", "string", "record", "array", "list"] From bd0ee7f1a1e7d040b22b2f05a9259ee1b3899dfb Mon Sep 17 00:00:00 2001 From: w-bonelli Date: Tue, 20 Jan 2026 13:55:18 -0500 Subject: [PATCH 26/36] compatibility plan --- docs/md/dev/dfns.md | 165 ++++++++++++++++++++++++++------------------ 1 file changed, 97 insertions(+), 68 deletions(-) diff --git a/docs/md/dev/dfns.md b/docs/md/dev/dfns.md index bb15d7be..3fd39b2d 100644 --- a/docs/md/dev/dfns.md +++ b/docs/md/dev/dfns.md @@ -1062,79 +1062,109 @@ def _detect_schema_version(self) -> Version: ### API compatibility -**Backwards compatible API design**: +**Breaking changes in current implementation**: -```python -# Existing dfn branch API (continue to work) -from modflow_devtools.dfn import load, fetch_dfns +The `dfn` branch introduces fundamental breaking changes that make it incompatible with a 1.x release: -# Works exactly as before -dfn = load("/path/to/dfn/file.dfn") -fetch_dfns("MODFLOW-ORG", "modflow6", "6.6.0", "/tmp/dfns") +1. **Core types changed from TypedDict to dataclass**: + ```python + # Old (develop) - dict-like access + dfn["name"] + field.get("type") + + # New (dfn branch) - attribute access + dfn.name + field.type + ``` + +2. **`Dfn` structure changed**: + - Removed: `sln`, `fkeys` + - Added: `schema_version`, `parent`, `blocks` + - Renamed: `fkeys` → `children` + +3. **Removed exports**: + - `get_dfns()` - now `fetch_dfns()` in submodule, not re-exported from main module + - `FormatVersion`, `Sln`, `FieldType`, `Reader` type aliases + +4. **`Field` structure changed** - different attributes and semantics between v1/v2 + +**Why aliasing is not feasible**: + +The TypedDict → dataclass change is fundamental and cannot be cleanly aliased: +- Code using `dfn["name"]` syntax would break immediately +- Making a dataclass behave like a dict requires implementing `__getitem__`, `get()`, `keys()`, `values()`, `items()`, etc. +- Even with these methods, isinstance checks and type hints would behave differently +- The complexity and maintenance burden outweigh the benefits -# New DFNs API (additive, doesn't break existing) +**Recommendation**: Release as **devtools 2.0**, not 1.x. + +**New API (devtools 2.x)**: + +```python +# DFNs API from modflow_devtools.dfn import DfnSpec, get_dfn, get_registry, sync_dfns -# New functionality +# Sync and access DFNs sync_dfns(ref="6.6.0") dfn = get_dfn("gwf-chd", ref="6.6.0") registry = get_registry(ref="6.6.0") -spec = registry.spec # Registry wraps a DfnSpec -``` +spec = registry.spec -**No breaking changes to existing classes**: -- `Dfn`, `Block`, `Field` dataclasses remain compatible -- `FieldV1`, `FieldV2` continue to work -- `MapV1To2` schema mapping continues to work -- Add `MapV1To11` and `MapV11To2` as needed -- `load()` function continues to work (loads individual DFN files) -- New `DfnSpec` class is additive (doesn't break existing code) +# Attribute access (dataclass style) +print(dfn.name) # "gwf-chd" +print(dfn.blocks["options"]) -**Deprecation strategy**: -- Mark old APIs as deprecated with clear migration path -- Deprecation warnings point to new equivalent functionality -- Keep deprecated APIs working for at least one major version -- Document migration in release notes and migration guide +# fetch_dfns() still available for manual downloads +from modflow_devtools.dfn.fetch import fetch_dfns +fetch_dfns("MODFLOW-ORG", "modflow6", "6.6.0", "/tmp/dfns") +``` ### Migration timeline -**devtools 1.x** (current): -- ✅ Merge dfn branch with v1.1 schema (stable, no breaking changes) -- ✅ Implement DFNs API with v1/v1.1 support -- ✅ FloPy 3 continues using v1.1 schema from mainline -- ✅ All existing APIs remain unchanged and supported -- ⚠️ Deprecate `fetch_dfns()` in favor of DFNs API (but keep working) - -**devtools 2.0** (future): -- ✅ Add v2 schema support (v1, v1.1, and v2 all work) -- ✅ Merge dfn-v2 branch to mainline -- ✅ FloPy 4 begins using v2 schema -- ✅ FloPy 3 continues using v1.1 schema (no changes needed) -- ⚠️ Deprecate v1 schema support (but keep working for one more major version) +**devtools 1.x** (current stable): +- Existing `modflow_devtools/dfn.py` with TypedDict-based API +- `get_dfns()` function for manual downloads +- No registry infrastructure +- **No changes** - maintain stability for existing users + +**devtools 2.0** (this work): +- ❌ Breaking: `Dfn`, `Field` change from TypedDict to dataclass +- ❌ Breaking: `get_dfns()` renamed to `fetch_dfns()` (in submodule) +- ❌ Breaking: Several type aliases removed or moved +- ✅ New: Full DFNs API with registry infrastructure +- ✅ New: `DfnSpec` class with hierarchical and flat access +- ✅ New: `RemoteDfnRegistry`, `LocalDfnRegistry` classes +- ✅ New: CLI commands (sync, info, list, clean) +- ✅ New: Schema versioning and mapping (v1 ↔ v2) +- ✅ New: Pydantic-based configuration and validation + +**devtools 2.x** (future minor releases): +- Add v2 DFN schema support when MODFLOW 6 adopts it +- Schema mapping between all versions (v1, v1.1, v2) +- Additional CLI commands and features +- Performance improvements **devtools 3.0** (distant future): -- ✅ v1.1 and v2 schema both fully supported -- ❌ Remove v1 schema support (deprecated in 2.0) -- ⚠️ Final deprecation warnings for any legacy APIs +- Consider removing v1 schema support (with deprecation warnings in 2.x) +- Potential further API refinements **Key principles**: -1. **Additive changes only** on mainline during 1.x -2. **Multi-version support** - DFNs API works with v1, v1.1, and v2 simultaneously -3. **No forced upgrades** - FloPy 3 never has to migrate off v1.1 -4. **Explicit migration** - Users opt-in to v2 via schema mapping -5. **Long deprecation** - At least one major version warning before removal +1. **Clean break at 2.0** - no half-measures with aliasing +2. **Multi-version schema support** - DFNs API works with v1, v1.1, and v2 simultaneously +3. **Clear migration path** - document all breaking changes in release notes +4. **Semantic versioning** - breaking changes require major version bump **Testing strategy**: - Test suite covers all schema versions (v1, v1.1, v2) - Test schema mapping in all directions (v1↔v1.1↔v2) -- Test FloPy 3 integration continuously (don't break existing consumers) - Test mixed-version scenarios (different refs with different schemas) +- Integration tests with real MODFLOW 6 repository **Documentation**: -- Clear migration guides for each transition +- Clear migration guide from 1.x to 2.x +- Document all breaking changes with before/after examples - Document which MODFLOW 6 versions use which schema versions - Examples showing multi-version usage -- Deprecation timeline clearly communicated ## Implementation Dependencies @@ -1220,34 +1250,33 @@ Merge sequence: - Adds substantial new functionality (schema classes, parsers, etc.) 3. **Finally**: Implement DFNs API features on `develop` (registries, sync, CLI, `DfnSpec`) -API compatibility during merge: +API changes during merge: ```python # Old dfn.py API (on develop now) - uses TypedDicts from modflow_devtools.dfn import get_dfns, Field, Dfn +dfn["name"] # dict-like access -# New dfn/ package API (after dfn branch merge) - upgrades to dataclasses -from modflow_devtools.dfn import get_dfns # Aliased to fetch_dfns, still works -from modflow_devtools.dfn import fetch_dfns # New preferred name -from modflow_devtools.dfn import load, Dfn, Block, Field # Upgraded to dataclasses +# New dfn/ package API (after dfn branch merge) - dataclasses +from modflow_devtools.dfn import Dfn, Block, Field # Now dataclasses +from modflow_devtools.dfn.fetch import fetch_dfns # Renamed, moved to submodule from modflow_devtools.dfn import DfnSpec, get_registry, sync_dfns # New additions - -# The import path stays the same, functionality expands -# get_dfns() kept as alias for backwards compatibility +dfn.name # attribute access ``` -Breaking changes (justified): -- `Field`, `Dfn`, etc. change from `TypedDict` to `dataclass` - more powerful, better typing -- This is acceptable since only internal dogfooding currently (FloPy uses schema, not these classes directly) - -**Needed for DFNs API**: -- ❌ Bootstrap file and registry schema -- ❌ Registry discovery and synchronization -- ❌ Pooch integration for file caching -- ❌ Registry classes (`DfnRegistry`, `RemoteDfnRegistry`, `LocalDfnRegistry`) -- ❌ CLI commands (sync, info, list, clean) -- ❌ Module-level convenience API -- ❌ Registry generation tool (`make_registry.py`) -- ❌ Integration with MODFLOW 6 CI +**Breaking changes** (see [API compatibility](#api-compatibility) section for full details): +- `Field`, `Dfn`, etc. change from `TypedDict` to `dataclass` - **requires 2.0 release** +- `get_dfns()` renamed to `fetch_dfns()` and moved to submodule +- Several type aliases removed or moved to schema submodules + +**Implementation status** (DFNs API): +- ✅ Bootstrap file and registry schema +- ✅ Registry discovery and synchronization +- ✅ Pooch integration for file caching +- ✅ Registry classes (`DfnRegistry`, `RemoteDfnRegistry`, `LocalDfnRegistry`) +- ✅ CLI commands (sync, info, list, clean) +- ✅ Module-level convenience API +- ✅ Registry generation tool (`make_registry.py`) +- ⚠️ Integration with MODFLOW 6 CI (requires registry branch merge in MF6 repo) ### Core components From 6901a0e21bfdca9c47359ff99decbe6e9a7696b2 Mon Sep 17 00:00:00 2001 From: w-bonelli Date: Wed, 21 Jan 2026 17:20:30 -0500 Subject: [PATCH 27/36] add ftype attribute to Dfn --- modflow_devtools/dfns/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/modflow_devtools/dfns/__init__.py b/modflow_devtools/dfns/__init__.py index 75214dd2..0245da85 100644 --- a/modflow_devtools/dfns/__init__.py +++ b/modflow_devtools/dfns/__init__.py @@ -97,6 +97,7 @@ class Dfn: parent: str | None = None advanced: bool = False multi: bool = False + ftype: str | None = None ref: Ref | None = None blocks: Blocks | None = None children: Dfns | None = None @@ -533,6 +534,7 @@ def map(self, dfn: Dfn) -> Dfn: name=dfn.name, advanced=dfn.advanced, multi=dfn.multi, + ftype=dfn.ftype or (dfn.name.split("-", 1)[1].upper() if "-" in dfn.name else None), ref=dfn.ref, blocks=MapV1To2.map_blocks(dfn), schema_version=v2, @@ -569,18 +571,22 @@ def load(f, format: str = "dfn", **kwargs) -> Dfn: parent=try_parse_parent(meta), advanced=is_advanced_package(meta), multi=is_multi_package(meta), + ftype=name.split("-", 1)[1].upper() if "-" in name else None, blocks=blocks, ) elif format == "toml": data = tomli.load(f) + dfn_name = data.pop("name", kwargs.pop("name", None)) dfn_fields = { - "name": data.pop("name", kwargs.pop("name", None)), + "name": dfn_name, "schema_version": Version(str(data.pop("schema_version", "2"))), "parent": data.pop("parent", None), "advanced": data.pop("advanced", False), "multi": data.pop("multi", False), + "ftype": data.pop("ftype", None) + or (dfn_name.split("-", 1)[1].upper() if dfn_name and "-" in dfn_name else None), "ref": data.pop("ref", None), } From a225de597c830e5f0a228fb6d9e3ba7016b00d77 Mon Sep 17 00:00:00 2001 From: Bonelli Date: Sun, 22 Feb 2026 13:50:28 -0500 Subject: [PATCH 28/36] fixes --- .github/workflows/ci.yml | 1 - autotest/models.toml | 7 --- autotest/{test_dfn.py => test_dfns.py} | 0 ..._dfn_registry.py => test_dfns_registry.py} | 44 +++++++++---------- 4 files changed, 22 insertions(+), 30 deletions(-) delete mode 100644 autotest/models.toml rename autotest/{test_dfn.py => test_dfns.py} (100%) rename autotest/{test_dfn_registry.py => test_dfns_registry.py} (94%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 29683872..b01fcb76 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,6 @@ on: branches: - main - develop - - dfn paths-ignore: - '**.md' - '.github/workflows/release.yml' diff --git a/autotest/models.toml b/autotest/models.toml deleted file mode 100644 index 80a81a54..00000000 --- a/autotest/models.toml +++ /dev/null @@ -1,7 +0,0 @@ -schema_version = "1.0" - -[files] - -[models] - -[examples] diff --git a/autotest/test_dfn.py b/autotest/test_dfns.py similarity index 100% rename from autotest/test_dfn.py rename to autotest/test_dfns.py diff --git a/autotest/test_dfn_registry.py b/autotest/test_dfns_registry.py similarity index 94% rename from autotest/test_dfn_registry.py rename to autotest/test_dfns_registry.py index fc2dbdd7..5bbc1281 100644 --- a/autotest/test_dfn_registry.py +++ b/autotest/test_dfns_registry.py @@ -281,7 +281,7 @@ class TestLocalDfnRegistry: def test_init(self, dfn_dir): """Test LocalDfnRegistry initialization.""" - from modflow_devtools.dfn import LocalDfnRegistry + from modflow_devtools.dfns import LocalDfnRegistry registry = LocalDfnRegistry(path=dfn_dir, ref="local") @@ -291,7 +291,7 @@ def test_init(self, dfn_dir): def test_spec_property(self, dfn_dir): """Test accessing spec through registry.""" - from modflow_devtools.dfn import LocalDfnRegistry + from modflow_devtools.dfns import LocalDfnRegistry registry = LocalDfnRegistry(path=dfn_dir) @@ -302,7 +302,7 @@ def test_spec_property(self, dfn_dir): def test_get_dfn(self, dfn_dir): """Test getting a DFN by name.""" - from modflow_devtools.dfn import LocalDfnRegistry + from modflow_devtools.dfns import LocalDfnRegistry registry = LocalDfnRegistry(path=dfn_dir) @@ -313,7 +313,7 @@ def test_get_dfn(self, dfn_dir): def test_get_dfn_path(self, dfn_dir): """Test getting file path for a component.""" - from modflow_devtools.dfn import LocalDfnRegistry + from modflow_devtools.dfns import LocalDfnRegistry registry = LocalDfnRegistry(path=dfn_dir) @@ -324,7 +324,7 @@ def test_get_dfn_path(self, dfn_dir): def test_get_dfn_path_not_found(self, dfn_dir): """Test getting path for nonexistent component raises FileNotFoundError.""" - from modflow_devtools.dfn import LocalDfnRegistry + from modflow_devtools.dfns import LocalDfnRegistry registry = LocalDfnRegistry(path=dfn_dir) @@ -333,7 +333,7 @@ def test_get_dfn_path_not_found(self, dfn_dir): def test_schema_version_property(self, dfn_dir): """Test schema_version property.""" - from modflow_devtools.dfn import LocalDfnRegistry + from modflow_devtools.dfns import LocalDfnRegistry registry = LocalDfnRegistry(path=dfn_dir) @@ -341,7 +341,7 @@ def test_schema_version_property(self, dfn_dir): def test_components_property(self, dfn_dir): """Test components property returns flat dict.""" - from modflow_devtools.dfn import LocalDfnRegistry + from modflow_devtools.dfns import LocalDfnRegistry registry = LocalDfnRegistry(path=dfn_dir) @@ -389,7 +389,7 @@ class TestMakeRegistry: def test_compute_file_hash(self, tmp_path): """Test computing file hash.""" - from modflow_devtools.dfn.make_registry import compute_file_hash + from modflow_devtools.dfns.make_registry import compute_file_hash test_file = tmp_path / "test.txt" test_file.write_text("hello world") @@ -402,7 +402,7 @@ def test_compute_file_hash(self, tmp_path): def test_scan_dfn_directory(self, dfn_dir): """Test scanning a DFN directory.""" - from modflow_devtools.dfn.make_registry import scan_dfn_directory + from modflow_devtools.dfns.make_registry import scan_dfn_directory files = scan_dfn_directory(dfn_dir) @@ -415,7 +415,7 @@ def test_generate_registry(self, dfn_dir, tmp_path): """Test generating a registry file.""" import tomli - from modflow_devtools.dfn.make_registry import generate_registry + from modflow_devtools.dfns.make_registry import generate_registry output_path = tmp_path / "dfns.toml" @@ -437,7 +437,7 @@ def test_generate_registry(self, dfn_dir, tmp_path): def test_generate_registry_empty_dir(self, tmp_path): """Test generating registry from empty directory raises ValueError.""" - from modflow_devtools.dfn.make_registry import generate_registry + from modflow_devtools.dfns.make_registry import generate_registry with pytest.raises(ValueError, match="No DFN files found"): generate_registry( @@ -447,7 +447,7 @@ def test_generate_registry_empty_dir(self, tmp_path): def test_cli_help(self): """Test CLI help output.""" - from modflow_devtools.dfn.make_registry import main + from modflow_devtools.dfns.make_registry import main # --help should exit with 0 with pytest.raises(SystemExit) as exc_info: @@ -456,7 +456,7 @@ def test_cli_help(self): def test_cli_generate(self, dfn_dir, tmp_path): """Test CLI generate command.""" - from modflow_devtools.dfn.make_registry import main + from modflow_devtools.dfns.make_registry import main output_path = tmp_path / "dfns.toml" @@ -481,21 +481,21 @@ class TestCLI: def test_main_help(self): """Test CLI help output.""" - from modflow_devtools.dfn.__main__ import main + from modflow_devtools.dfns.__main__ import main result = main([]) assert result == 0 def test_info_command(self): """Test info command.""" - from modflow_devtools.dfn.__main__ import main + from modflow_devtools.dfns.__main__ import main result = main(["info"]) assert result == 0 def test_clean_command_no_cache(self, tmp_path): """Test clean command when cache doesn't exist.""" - from modflow_devtools.dfn.__main__ import main + from modflow_devtools.dfns.__main__ import main # Patch get_cache_dir to return nonexistent directory with patch("modflow_devtools.dfn.__main__.get_cache_dir") as mock_cache_dir: @@ -506,7 +506,7 @@ def test_clean_command_no_cache(self, tmp_path): def test_sync_command_no_registry(self): """Test sync command when registry doesn't exist (expected to fail).""" - from modflow_devtools.dfn.__main__ import main + from modflow_devtools.dfns.__main__ import main # This should fail because MODFLOW 6 repo doesn't have the registry yet result = main(["sync", "--ref", "nonexistent-ref"]) @@ -519,7 +519,7 @@ class TestRemoteDfnRegistry: def test_init(self): """Test RemoteDfnRegistry initialization.""" - from modflow_devtools.dfn import RemoteDfnRegistry + from modflow_devtools.dfns import RemoteDfnRegistry registry = RemoteDfnRegistry(source="modflow6", ref="develop") @@ -528,7 +528,7 @@ def test_init(self): def test_unknown_source_raises(self): """Test that unknown source raises ValueError.""" - from modflow_devtools.dfn import RemoteDfnRegistry + from modflow_devtools.dfns import RemoteDfnRegistry with pytest.raises(ValueError, match="Unknown source"): RemoteDfnRegistry(source="nonexistent", ref="develop") @@ -584,7 +584,7 @@ def test_fetch_registry_not_found(self): def test_init_with_repo_override(self): """Test RemoteDfnRegistry with repo override.""" - from modflow_devtools.dfn import RemoteDfnRegistry + from modflow_devtools.dfns import RemoteDfnRegistry registry = RemoteDfnRegistry( source=TEST_DFN_SOURCE, @@ -649,7 +649,7 @@ def test_sync_files(self): def test_get_dfn(self): """Test getting a DFN from the test repository.""" - from modflow_devtools.dfn import Dfn + from modflow_devtools.dfns import Dfn from modflow_devtools.dfns.registry import RemoteDfnRegistry registry = RemoteDfnRegistry( @@ -713,7 +713,7 @@ class TestModuleFunctions: def test_list_components_local(self, dfn_dir): """Test list_components with local registry.""" - from modflow_devtools.dfn import LocalDfnRegistry + from modflow_devtools.dfns import LocalDfnRegistry registry = LocalDfnRegistry(path=dfn_dir) components = list(registry.spec.keys()) From c7e0054ca7a172667529b11ed6ee4e9c9912a088 Mon Sep 17 00:00:00 2001 From: Bonelli Date: Mon, 23 Feb 2026 07:46:58 -0500 Subject: [PATCH 29/36] ruff --- autotest/test_dfns.py | 2 +- autotest/test_models.py | 16 ++++++++-------- modflow_devtools/dfns/__init__.py | 22 +++++++++++----------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/autotest/test_dfns.py b/autotest/test_dfns.py index 14ef4af6..52b02170 100644 --- a/autotest/test_dfns.py +++ b/autotest/test_dfns.py @@ -4,11 +4,11 @@ import pytest from packaging.version import Version +from modflow_devtools.dfn2toml import convert, is_valid from modflow_devtools.dfns import Dfn, _load_common, load, load_flat from modflow_devtools.dfns.fetch import fetch_dfns from modflow_devtools.dfns.schema.v1 import FieldV1 from modflow_devtools.dfns.schema.v2 import FieldV2 -from modflow_devtools.dfn2toml import convert, is_valid from modflow_devtools.markers import requires_pkg PROJ_ROOT = Path(__file__).parents[1] diff --git a/autotest/test_models.py b/autotest/test_models.py index a8a7e637..6e427866 100644 --- a/autotest/test_models.py +++ b/autotest/test_models.py @@ -478,28 +478,28 @@ def test_cli_list_with_cache(self, capsys): def test_cli_clear(self, capsys): """Test 'clear' command.""" # Sync a registry first - _DEFAULT_CACHE.clear(source=TEST_SOURCE_NAME, ref=TEST_REF) + _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) source = ModelSourceRepo( - repo=TEST_REPO, - name=TEST_SOURCE_NAME, - refs=[TEST_REF], + repo=TEST_MODELS_REPO, + name=TEST_MODELS_SOURCE_NAME, + refs=[TEST_MODELS_REF], ) - result = source.sync(ref=TEST_REF) + result = source.sync(ref=TEST_MODELS_REF) assert len(result.synced) == 1 # Verify it's cached - assert _DEFAULT_CACHE.has(TEST_SOURCE_NAME, TEST_REF) + assert _DEFAULT_CACHE.has(TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) # Clear with force flag import argparse from modflow_devtools.models.__main__ import cmd_clear - args = argparse.Namespace(source=TEST_SOURCE_NAME, ref=TEST_REF, force=True) + args = argparse.Namespace(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF, force=True) cmd_clear(args) # Verify it was cleared - assert not _DEFAULT_CACHE.has(TEST_SOURCE_NAME, TEST_REF) + assert not _DEFAULT_CACHE.has(TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) captured = capsys.readouterr() assert "Cleared 1 cached registry" in captured.out diff --git a/modflow_devtools/dfns/__init__.py b/modflow_devtools/dfns/__init__.py index 0245da85..6cb65f54 100644 --- a/modflow_devtools/dfns/__init__.py +++ b/modflow_devtools/dfns/__init__.py @@ -19,17 +19,6 @@ from boltons.iterutils import remap from packaging.version import Version -# Experimental API warning -warnings.warn( - "The modflow_devtools.dfns API is experimental and may change or be " - "removed in future versions without following normal deprecation procedures. " - "Use at your own risk. To suppress this warning, use:\n" - " warnings.filterwarnings('ignore', " - "message='.*modflow_devtools.dfns.*experimental.*')", - FutureWarning, - stacklevel=2, -) - from modflow_devtools.dfns.parse import ( is_advanced_package, is_multi_package, @@ -45,6 +34,17 @@ from modflow_devtools.dfns.schema.v2 import FieldV2 from modflow_devtools.misc import drop_none_or_empty, try_literal_eval +# Experimental API warning +warnings.warn( + "The modflow_devtools.dfns API is experimental and may change or be " + "removed in future versions without following normal deprecation procedures. " + "Use at your own risk. To suppress this warning, use:\n" + " warnings.filterwarnings('ignore', " + "message='.*modflow_devtools.dfns.*experimental.*')", + FutureWarning, + stacklevel=2, +) + __all__ = [ "Block", "Blocks", From 73d0744a86238de1f69a567a3d9b8d6020f6a3d0 Mon Sep 17 00:00:00 2001 From: Bonelli Date: Mon, 23 Feb 2026 08:44:35 -0500 Subject: [PATCH 30/36] better support for local model registries, update test config --- .github/workflows/ci.yml | 16 +++-- autotest/test_dfns_registry.py | 116 +++++++++++++++++++++++++++++- modflow_devtools/dfns/__init__.py | 23 ++++-- modflow_devtools/dfns/registry.py | 22 +++++- 4 files changed, 165 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b01fcb76..b48850a7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -89,7 +89,13 @@ jobs: uses: actions/checkout@v4 with: path: modflow-devtools - + + - name: Checkout modflow6 for DFN autodiscovery + uses: actions/checkout@v4 + with: + repository: MODFLOW-ORG/modflow6 + path: modflow6 + - name: Setup uv uses: astral-sh/setup-uv@v5 with: @@ -105,8 +111,9 @@ jobs: env: REPOS_PATH: ${{ github.workspace }} MODFLOW_DEVTOOLS_NO_AUTO_SYNC: 1 + TEST_DFN_PATH: ${{ github.workspace }}/modflow6/doc/mf6io/mf6ivar/dfn # use --dist loadfile to so tests requiring pytest-virtualenv run on the same worker - run: uv run pytest -v -n auto --dist loadfile --durations 0 --ignore test_download.py --ignore test_models.py --ignore test_dfn_registry.py + run: uv run pytest -v -n auto --dist loadfile --durations 0 --ignore test_download.py --ignore test_models.py - name: Run network-dependent tests # only invoke the GH API on one OS and Python version @@ -117,7 +124,8 @@ jobs: env: REPOS_PATH: ${{ github.workspace }} GITHUB_TOKEN: ${{ github.token }} - # DFNs API + # DFNs API - use test fork with registry file for RemoteDfnRegistry tests + # Note: TEST_DFN_PATH is intentionally NOT set here to use fetch behavior TEST_DFNS_REPO: wpbonelli/modflow6 TEST_DFNS_REF: registry TEST_DFNS_SOURCE: modflow6 @@ -130,7 +138,7 @@ jobs: TEST_PROGRAMS_REPO: MODFLOW-ORG/modflow6 TEST_PROGRAMS_REF: develop TEST_PROGRAMS_SOURCE: modflow6 - run: uv run pytest -v -n auto --dist loadgroup --durations 0 test_download.py test_models.py test_dfn_registry.py + run: uv run pytest -v -n auto --dist loadgroup --durations 0 test_download.py test_models.py test_dfns_registry.py rtd: name: Docs diff --git a/autotest/test_dfns_registry.py b/autotest/test_dfns_registry.py index 5bbc1281..c8aa74ce 100644 --- a/autotest/test_dfns_registry.py +++ b/autotest/test_dfns_registry.py @@ -29,10 +29,32 @@ MF6_REPO = TEST_DFN_REPO.split("/")[1] MF6_REF = TEST_DFN_REF +# Path to cloned MF6 repository for autodiscovery (set by CI or local testing) +# If set, use this instead of fetching individual DFN files +TEST_DFN_PATH = os.getenv("TEST_DFN_PATH") + @pytest.fixture(scope="module") def dfn_dir(): - """Ensure DFN files are downloaded for testing.""" + """ + Provide path to DFN files for testing. + + Priority: + 1. If TEST_DFN_PATH is set, use the DFN directory from a cloned MF6 repo (autodiscovery) + 2. Otherwise, fetch individual DFN files to temp directory (legacy behavior) + + The autodiscovery approach is preferred in CI to avoid needing registry files. + """ + # If TEST_DFN_PATH is set, use it (points to cloned MF6 DFN directory) + if TEST_DFN_PATH: + dfn_path = Path(TEST_DFN_PATH).expanduser().resolve() + if not dfn_path.exists(): + raise ValueError(f"TEST_DFN_PATH={TEST_DFN_PATH} does not exist") + if not any(dfn_path.glob("*.dfn")): + raise ValueError(f"No DFN files found in TEST_DFN_PATH={TEST_DFN_PATH}") + return dfn_path + + # Fall back to fetching individual DFN files (legacy behavior for local development) if not any(DFN_DIR.glob("*.dfn")): fetch_dfns(MF6_OWNER, MF6_REPO, MF6_REF, DFN_DIR, verbose=True) return DFN_DIR @@ -731,3 +753,95 @@ def test_get_sync_status(self): assert isinstance(status, dict) # All refs should be either True or False assert all(isinstance(v, bool) for v in status.values()) + + +@requires_pkg("boltons", "pydantic") +class TestGetRegistryWithPath: + """Tests for get_registry() with path parameter.""" + + def test_get_registry_with_path_returns_local_registry(self, dfn_dir): + """Test that get_registry with path returns LocalDfnRegistry.""" + from modflow_devtools.dfns.registry import LocalDfnRegistry, get_registry + + registry = get_registry(path=dfn_dir) + + assert isinstance(registry, LocalDfnRegistry) + assert registry.path == dfn_dir.resolve() + + def test_get_registry_with_path_and_metadata(self, dfn_dir): + """Test that source/ref metadata is preserved with path.""" + from modflow_devtools.dfns.registry import get_registry + + registry = get_registry(path=dfn_dir, source="test", ref="local") + + assert registry.source == "test" + assert registry.ref == "local" + + def test_get_registry_without_path_returns_remote_registry(self): + """Test that get_registry without path still returns RemoteDfnRegistry.""" + from modflow_devtools.dfns.registry import RemoteDfnRegistry, get_registry + + registry = get_registry(source="modflow6", ref="develop", auto_sync=False) + + assert isinstance(registry, RemoteDfnRegistry) + + +@requires_pkg("boltons", "pydantic") +class TestConvenienceFunctionsWithPath: + """Tests for convenience functions with path parameter.""" + + def test_get_dfn_with_path(self, dfn_dir): + """Test get_dfn() with path parameter.""" + from modflow_devtools.dfns import get_dfn + + dfn = get_dfn("gwf-chd", path=dfn_dir) + + assert dfn.name == "gwf-chd" + assert dfn.parent == "gwf-nam" + + def test_get_dfn_path_with_path(self, dfn_dir): + """Test get_dfn_path() with path parameter.""" + from modflow_devtools.dfns import get_dfn_path + + file_path = get_dfn_path("gwf-chd", path=dfn_dir) + + assert file_path.exists() + assert file_path.name == "gwf-chd.dfn" + + def test_list_components_with_path(self, dfn_dir): + """Test list_components() with path parameter.""" + from modflow_devtools.dfns import list_components + + components = list_components(path=dfn_dir) + + assert len(components) > 100 + assert "gwf-chd" in components + + +@requires_pkg("boltons", "pydantic") +def test_autodiscovery_workflow(dfn_dir): + """Test complete autodiscovery workflow.""" + from modflow_devtools.dfns import get_dfn, get_registry, list_components + + # Get registry pointing at local directory + registry = get_registry(path=dfn_dir, ref="local") + + # List components + components = list(registry.spec.keys()) + assert len(components) > 100 + + # Get specific DFN + gwf_chd = registry.get_dfn("gwf-chd") + assert gwf_chd.name == "gwf-chd" + assert gwf_chd.blocks is not None + + # Get file path + chd_path = registry.get_dfn_path("gwf-chd") + assert chd_path.exists() + + # Use convenience functions + components_list = list_components(path=dfn_dir) + assert "gwf-chd" in components_list + + dfn = get_dfn("gwf-wel", path=dfn_dir) + assert dfn.name == "gwf-wel" diff --git a/modflow_devtools/dfns/__init__.py b/modflow_devtools/dfns/__init__.py index 6cb65f54..67289aed 100644 --- a/modflow_devtools/dfns/__init__.py +++ b/modflow_devtools/dfns/__init__.py @@ -794,6 +794,7 @@ def get_dfn( component: str, ref: str = "develop", source: str = "modflow6", + path: str | PathLike | None = None, ) -> "Dfn": """ Get a DFN by component name from the registry. @@ -809,6 +810,9 @@ def get_dfn( Git ref (branch, tag, or commit hash). Default is "develop". source : str, optional Source repository name. Default is "modflow6". + path : str or PathLike, optional + Path to a local directory containing DFN files. If provided, + uses autodiscovery from local filesystem instead of remote. Returns ------- @@ -819,9 +823,10 @@ def get_dfn( -------- >>> dfn = get_dfn("gwf-chd") >>> dfn = get_dfn("gwf-chd", ref="6.6.0") + >>> dfn = get_dfn("gwf-chd", path="/path/to/dfns") """ registry = _get_registry_module() - reg = registry.get_registry(source=source, ref=ref) + reg = registry.get_registry(source=source, ref=ref, path=path) return reg.get_dfn(component) @@ -829,6 +834,7 @@ def get_dfn_path( component: str, ref: str = "develop", source: str = "modflow6", + path: str | PathLike | None = None, ) -> Path: """ Get the local cached file path for a DFN component. @@ -841,24 +847,29 @@ def get_dfn_path( Git ref (branch, tag, or commit hash). Default is "develop". source : str, optional Source repository name. Default is "modflow6". + path : str or PathLike, optional + Path to a local directory containing DFN files. If provided, + returns path from local filesystem instead of cache. Returns ------- Path - Path to the local cached DFN file. + Path to the local DFN file (cached or local directory). Examples -------- >>> path = get_dfn_path("gwf-chd", ref="6.6.0") + >>> path = get_dfn_path("gwf-chd", path="/path/to/dfns") """ registry = _get_registry_module() - reg = registry.get_registry(source=source, ref=ref) + reg = registry.get_registry(source=source, ref=ref, path=path) return reg.get_dfn_path(component) def list_components( ref: str = "develop", source: str = "modflow6", + path: str | PathLike | None = None, ) -> list[str]: """ List available components for a registry. @@ -869,6 +880,9 @@ def list_components( Git ref (branch, tag, or commit hash). Default is "develop". source : str, optional Source repository name. Default is "modflow6". + path : str or PathLike, optional + Path to a local directory containing DFN files. If provided, + lists components from local filesystem. Returns ------- @@ -880,7 +894,8 @@ def list_components( >>> components = list_components(ref="6.6.0") >>> "gwf-chd" in components True + >>> components = list_components(path="/path/to/dfns") """ registry = _get_registry_module() - reg = registry.get_registry(source=source, ref=ref) + reg = registry.get_registry(source=source, ref=ref, path=path) return list(reg.spec.keys()) diff --git a/modflow_devtools/dfns/registry.py b/modflow_devtools/dfns/registry.py index bb8bebfc..462d6554 100644 --- a/modflow_devtools/dfns/registry.py +++ b/modflow_devtools/dfns/registry.py @@ -735,7 +735,8 @@ def get_registry( source: str = "modflow6", ref: str = "develop", auto_sync: bool = True, -) -> RemoteDfnRegistry: + path: str | PathLike | None = None, +) -> DfnRegistry: """ Get a registry for the specified source and ref. @@ -748,17 +749,32 @@ def get_registry( auto_sync : bool, optional If True and registry is not cached, automatically sync. Default is True. Can be disabled via MODFLOW_DEVTOOLS_NO_AUTO_SYNC environment variable. + Ignored when path is provided. + path : str or PathLike, optional + Path to a local directory containing DFN files. If provided, returns + a LocalDfnRegistry for autodiscovery instead of RemoteDfnRegistry. + When using a local path, source and ref are used for metadata only. Returns ------- - RemoteDfnRegistry - Registry for the specified source and ref. + DfnRegistry + Registry for the specified source and ref. Returns LocalDfnRegistry + if path is provided, otherwise RemoteDfnRegistry. Examples -------- + >>> # Remote registry (existing behavior) >>> registry = get_registry(ref="6.6.0") >>> dfn = registry.get_dfn("gwf-chd") + + >>> # Local registry with autodiscovery (NEW) + >>> registry = get_registry(path="/path/to/mf6/doc/mf6io/mf6ivar/dfn") + >>> dfn = registry.get_dfn("gwf-chd") """ + # If path is provided, return LocalDfnRegistry for autodiscovery + if path is not None: + return LocalDfnRegistry(path=Path(path), source=source, ref=ref) + # Check for auto-sync opt-out if os.environ.get("MODFLOW_DEVTOOLS_NO_AUTO_SYNC", "").lower() in ("1", "true", "yes"): auto_sync = False From a8733f38bf361cb3dff2ea5c613c715d668f0eaa Mon Sep 17 00:00:00 2001 From: Bonelli Date: Mon, 23 Feb 2026 09:04:12 -0500 Subject: [PATCH 31/36] fixes --- .github/workflows/ci.yml | 5 ++--- autotest/test_dfns_registry.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b48850a7..620d30ff 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -113,7 +113,7 @@ jobs: MODFLOW_DEVTOOLS_NO_AUTO_SYNC: 1 TEST_DFN_PATH: ${{ github.workspace }}/modflow6/doc/mf6io/mf6ivar/dfn # use --dist loadfile to so tests requiring pytest-virtualenv run on the same worker - run: uv run pytest -v -n auto --dist loadfile --durations 0 --ignore test_download.py --ignore test_models.py + run: uv run pytest -v -n auto --dist loadfile --durations 0 --ignore test_download.py --ignore test_models.py --ignore test_dfns_registry.py - name: Run network-dependent tests # only invoke the GH API on one OS and Python version @@ -124,8 +124,7 @@ jobs: env: REPOS_PATH: ${{ github.workspace }} GITHUB_TOKEN: ${{ github.token }} - # DFNs API - use test fork with registry file for RemoteDfnRegistry tests - # Note: TEST_DFN_PATH is intentionally NOT set here to use fetch behavior + # TODO: switch to upstream when dfn registry file added TEST_DFNS_REPO: wpbonelli/modflow6 TEST_DFNS_REF: registry TEST_DFNS_SOURCE: modflow6 diff --git a/autotest/test_dfns_registry.py b/autotest/test_dfns_registry.py index c8aa74ce..3f97d865 100644 --- a/autotest/test_dfns_registry.py +++ b/autotest/test_dfns_registry.py @@ -520,7 +520,7 @@ def test_clean_command_no_cache(self, tmp_path): from modflow_devtools.dfns.__main__ import main # Patch get_cache_dir to return nonexistent directory - with patch("modflow_devtools.dfn.__main__.get_cache_dir") as mock_cache_dir: + with patch("modflow_devtools.dfns.__main__.get_cache_dir") as mock_cache_dir: mock_cache_dir.return_value = tmp_path / "nonexistent" result = main(["clean"]) From f50e43f166d43ebdfd74a2d9e36c697aaf39b370 Mon Sep 17 00:00:00 2001 From: Bonelli Date: Mon, 23 Feb 2026 09:23:22 -0500 Subject: [PATCH 32/36] fix tests --- autotest/test_build.py | 3 ++- modflow_devtools/dfn2toml.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/autotest/test_build.py b/autotest/test_build.py index ab3323be..eead883c 100644 --- a/autotest/test_build.py +++ b/autotest/test_build.py @@ -28,5 +28,6 @@ def test_meson_build(tmp_path): assert (bin_path / f"mf6{_exe_ext}").is_file() assert (bin_path / f"zbud6{_exe_ext}").is_file() - assert (bin_path / f"mf5to6{_exe_ext}").is_file() + # mf5to6 is no longer built by default in modflow6 meson.build + # assert (bin_path / f"mf5to6{_exe_ext}").is_file() assert (bin_path / f"libmf6{_lib_ext}").is_file() diff --git a/modflow_devtools/dfn2toml.py b/modflow_devtools/dfn2toml.py index 0313d799..7ec012d8 100644 --- a/modflow_devtools/dfn2toml.py +++ b/modflow_devtools/dfn2toml.py @@ -10,8 +10,8 @@ import tomli_w as tomli from boltons.iterutils import remap -from modflow_devtools.dfn import Dfn, is_valid, load, load_flat, map, to_flat, to_tree -from modflow_devtools.dfn.schema.block import block_sort_key +from modflow_devtools.dfns import Dfn, is_valid, load, load_flat, map, to_flat, to_tree +from modflow_devtools.dfns.schema.block import block_sort_key from modflow_devtools.misc import drop_none_or_empty # mypy: ignore-errors From be5d40519cf883f2e452cf57bd851b99c1a2a3ce Mon Sep 17 00:00:00 2001 From: Bonelli Date: Mon, 23 Feb 2026 09:39:40 -0500 Subject: [PATCH 33/36] fixes --- .github/workflows/ci.yml | 3 +++ autotest/test_dfns_registry.py | 6 ++++++ autotest/test_models.py | 12 ++++++++++++ autotest/test_programs.py | 2 ++ 4 files changed, 23 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 620d30ff..4eaf97de 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -102,6 +102,9 @@ jobs: cache-dependency-glob: "**/pyproject.toml" python-version: ${{ matrix.python }} + - name: Setup Fortran + uses: fortran-lang/setup-fortran@v1 + - name: Install project working-directory: modflow-devtools run: uv sync --all-extras diff --git a/autotest/test_dfns_registry.py b/autotest/test_dfns_registry.py index 3f97d865..f9c3a7e2 100644 --- a/autotest/test_dfns_registry.py +++ b/autotest/test_dfns_registry.py @@ -11,6 +11,7 @@ from unittest.mock import patch import pytest +from flaky import flaky from packaging.version import Version from modflow_devtools.dfns.fetch import fetch_dfns @@ -634,6 +635,7 @@ def test_construct_raw_url_with_repo_override(self): assert TEST_DFN_REPO in url assert TEST_DFN_REF in url + @flaky(max_runs=3, min_passes=1) def test_fetch_registry(self): """Test fetching registry from the test repository.""" from modflow_devtools.dfns.registry import RemoteDfnRegistry @@ -652,6 +654,7 @@ def test_fetch_registry(self): # (e.g., generated from develop branch but accessed on registry branch) assert meta.ref is not None + @flaky(max_runs=3, min_passes=1) def test_sync_files(self): """Test syncing DFN files from the test repository.""" from modflow_devtools.dfns.registry import RemoteDfnRegistry @@ -669,6 +672,7 @@ def test_sync_files(self): path = registry.get_dfn_path("gwf-chd") assert path.exists() + @flaky(max_runs=3, min_passes=1) def test_get_dfn(self): """Test getting a DFN from the test repository.""" from modflow_devtools.dfns import Dfn @@ -688,6 +692,7 @@ def test_get_dfn(self): assert isinstance(dfn, Dfn) assert dfn.name == "gwf-chd" + @flaky(max_runs=3, min_passes=1) def test_get_spec(self): """Test getting the full spec from the test repository.""" from modflow_devtools.dfns import DfnSpec @@ -708,6 +713,7 @@ def test_get_spec(self): assert "gwf-chd" in spec assert "sim-nam" in spec + @flaky(max_runs=3, min_passes=1) def test_list_components(self): """Test listing available components from the test repository.""" from modflow_devtools.dfns.registry import RemoteDfnRegistry diff --git a/autotest/test_models.py b/autotest/test_models.py index 6e427866..88a4b0c1 100644 --- a/autotest/test_models.py +++ b/autotest/test_models.py @@ -10,6 +10,7 @@ from pathlib import Path import pytest +from flaky import flaky from modflow_devtools.models import ( _DEFAULT_CACHE, @@ -231,6 +232,7 @@ def test_get_registry_cache_dir(self): class TestDiscovery: """Test registry discovery.""" + @flaky(max_runs=3, min_passes=1) def test_discover_registry(self): """Test discovering registry for test repo.""" # Use test repo/ref from environment @@ -248,6 +250,7 @@ def test_discover_registry(self): assert discovered.mode == "version_controlled" assert isinstance(discovered.registry, ModelRegistry) + @flaky(max_runs=3, min_passes=1) def test_discover_registry_nonexistent_ref(self): """Test that discovery fails gracefully for nonexistent ref.""" source = ModelSourceRepo( @@ -264,6 +267,7 @@ def test_discover_registry_nonexistent_ref(self): class TestSync: """Test registry synchronization.""" + @flaky(max_runs=3, min_passes=1) def test_sync_single_source_single_ref(self): """Test syncing a single source/ref.""" _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) @@ -280,6 +284,7 @@ def test_sync_single_source_single_ref(self): assert len(result.failed) == 0 assert (TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) in result.synced + @flaky(max_runs=3, min_passes=1) def test_sync_creates_cache(self): """Test that sync creates cached registry.""" _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) @@ -293,6 +298,7 @@ def test_sync_creates_cache(self): source.sync(ref=TEST_MODELS_REF) assert _DEFAULT_CACHE.has(TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) + @flaky(max_runs=3, min_passes=1) def test_sync_skip_cached(self): """Test that sync skips already-cached registries.""" _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) @@ -312,6 +318,7 @@ def test_sync_skip_cached(self): assert len(result2.synced) == 0 assert len(result2.skipped) == 1 + @flaky(max_runs=3, min_passes=1) def test_sync_force(self): """Test that force flag re-syncs cached registries.""" _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) @@ -331,6 +338,7 @@ def test_sync_force(self): assert len(result.synced) == 1 assert len(result.skipped) == 0 + @flaky(max_runs=3, min_passes=1) def test_sync_via_source_method(self): """Test syncing via ModelSourceRepo.sync() method.""" _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) @@ -348,6 +356,7 @@ def test_sync_via_source_method(self): assert len(result.synced) == 1 assert (TEST_MODELS_SOURCE_NAME, TEST_MODELS_REF) in result.synced + @flaky(max_runs=3, min_passes=1) def test_source_is_synced_method(self): """Test ModelSourceRepo.is_synced() method.""" _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) @@ -362,6 +371,7 @@ def test_source_is_synced_method(self): source.sync(ref=TEST_MODELS_REF) assert source.is_synced(TEST_MODELS_REF) + @flaky(max_runs=3, min_passes=1) def test_source_list_synced_refs_method(self): """Test ModelSourceRepo.list_synced_refs() method.""" _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) @@ -509,6 +519,7 @@ def test_cli_clear(self, capsys): class TestIntegration: """Integration tests for full workflows.""" + @flaky(max_runs=3, min_passes=1) def test_full_workflow(self): """Test complete workflow: discover -> cache -> load.""" _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) @@ -531,6 +542,7 @@ def test_full_workflow(self): assert loaded is not None assert len(loaded.models) == len(discovered.registry.models) + @flaky(max_runs=3, min_passes=1) def test_sync_and_list_models(self): """Test syncing and listing available models.""" _DEFAULT_CACHE.clear(source=TEST_MODELS_SOURCE_NAME, ref=TEST_MODELS_REF) diff --git a/autotest/test_programs.py b/autotest/test_programs.py index 39a2d6f9..bc3e7dfc 100644 --- a/autotest/test_programs.py +++ b/autotest/test_programs.py @@ -2,6 +2,7 @@ from pathlib import Path import pytest +from flaky import flaky from modflow_devtools.programs import ( _DEFAULT_CACHE, @@ -493,6 +494,7 @@ def test_nonexistent_platform_uses_fallback(self): class TestForceSemantics: """Test force flag semantics for sync and install.""" + @flaky(max_runs=3, min_passes=1) def test_sync_force_flag(self): """Test that sync --force re-downloads even if cached.""" # Clear cache first From 2a849d987ea50f03359cf527fa40e7507c43cd82 Mon Sep 17 00:00:00 2001 From: Bonelli Date: Mon, 23 Feb 2026 09:51:03 -0500 Subject: [PATCH 34/36] fix? --- modflow_devtools/models/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modflow_devtools/models/__init__.py b/modflow_devtools/models/__init__.py index 75fd7f7b..c9a00d78 100644 --- a/modflow_devtools/models/__init__.py +++ b/modflow_devtools/models/__init__.py @@ -286,7 +286,12 @@ def load(self, source: str, ref: str) -> ModelRegistry | None: return None with registry_file.open("rb") as f: - return ModelRegistry(**tomli.load(f)) + data = tomli.load(f) + # Defensive: filter out any empty file entries that might have been saved + # (should not happen with current code, but handles edge cases) + if "files" in data: + data["files"] = {k: v for k, v in data["files"].items() if v} + return ModelRegistry(**data) def has(self, source: str, ref: str) -> bool: """ From 82a4b8d6c2c6bd05c7e4e573e490a46d00aa3630 Mon Sep 17 00:00:00 2001 From: Bonelli Date: Mon, 23 Feb 2026 09:53:11 -0500 Subject: [PATCH 35/36] compiler --- .github/workflows/ci.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4eaf97de..e245286b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -103,7 +103,18 @@ jobs: python-version: ${{ matrix.python }} - name: Setup Fortran + if: runner.os != 'Windows' uses: fortran-lang/setup-fortran@v1 + with: + compiler: gcc + version: ${{ env.GCC_V }} + + - name: Setup Fortran (Windows) + if: runner.os == 'Windows' + uses: fortran-lang/setup-fortran@v1 + with: + compiler: intel + version: latest - name: Install project working-directory: modflow-devtools From 1a3530848fdda2cd768491c4c5961244014bbc4e Mon Sep 17 00:00:00 2001 From: Bonelli Date: Mon, 23 Feb 2026 09:57:12 -0500 Subject: [PATCH 36/36] intel version --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e245286b..0423cc03 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -114,7 +114,7 @@ jobs: uses: fortran-lang/setup-fortran@v1 with: compiler: intel - version: latest + version: 2025.2 - name: Install project working-directory: modflow-devtools