Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
23af5c8
feat(waterdata): drop hash-valued ID columns by default
thodson-usgs May 17, 2026
9c625f3
feat(waterdata): extend hash-ID drop to get_stats_por / get_stats_dat…
thodson-usgs May 17, 2026
2d01ae3
test(waterdata): add stats hash-drop unit tests
thodson-usgs May 17, 2026
fd940d9
feat(waterdata): extend hash-ID drop to get_samples
thodson-usgs May 17, 2026
5bb96d3
refactor(waterdata): unify hash-drop helper and tighten internals
thodson-usgs May 17, 2026
b6ca211
chore(waterdata): drop benchmark scaffolding from PR
thodson-usgs May 18, 2026
8591740
docs(waterdata): correct include_hash_ids stability claims
thodson-usgs May 24, 2026
c5973fc
refactor(waterdata): rename include_hash_ids to include_hash; tighten…
thodson-usgs May 24, 2026
2605204
feat(waterdata): add waterdata.xarray module returning CF datasets
thodson-usgs May 25, 2026
6bc2b26
perf(waterdata.xarray): build datasets from the lean hash-free frame
thodson-usgs May 25, 2026
6df10c4
refactor(waterdata.xarray): move CF vocabulary maps to types
thodson-usgs May 25, 2026
140a3c4
feat(waterdata.xarray): always drop hash IDs; ignore include_hash
thodson-usgs May 25, 2026
ad42b52
fix(waterdata.xarray): accurate stats docstring; pin wrapper->_fetch …
thodson-usgs May 25, 2026
700120e
fix(waterdata.xarray): handle NaT times and mixed units in the converter
thodson-usgs May 25, 2026
0c921f2
feat(waterdata.xarray): add lon/lat, site descriptors, and date_modified
thodson-usgs May 27, 2026
01b014b
test(waterdata): spec stats hash-drop mocks against httpx.Client
thodson-usgs May 27, 2026
0f24f7f
feat(waterdata.xarray): ragged-default CF datasets, get_samples, sche…
thodson-usgs May 27, 2026
c58e96e
docs(waterdata.xarray): add demo notebook for the xarray wrappers
thodson-usgs May 27, 2026
3274d91
fix(waterdata): repair queryables fallback, scalar properties, sedime…
thodson-usgs May 27, 2026
ba74979
docs(waterdata.xarray): emit valid numpydoc from the wrapper docstrings
thodson-usgs May 27, 2026
f1755e7
refactor(waterdata): drop the pandas-path hash-dropping; keep it in x…
thodson-usgs May 27, 2026
15d3789
fix(waterdata.xarray): drop time_series_id from the flat stats dataset
thodson-usgs May 27, 2026
055fda6
fix(waterdata.xarray): drop the invalid water_temperature CF standard…
thodson-usgs May 27, 2026
20f95e7
feat(waterdata.xarray): record vertical_datum to distinguish stage pa…
thodson-usgs May 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions dataretrieval/waterdata/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,62 @@
"count",
],
}


# --- CF / xarray vocabulary mappings ---------------------------------------
# Lookup tables used by :mod:`dataretrieval.waterdata.xarray` to translate
# USGS terms into CF-conventions metadata. Each is intentionally partial:
# anything not listed falls back to a sensible default (raw unit string kept
# verbatim; no standard_name emitted) rather than guessing a wrong CF term.
# They are plain data, so they live here rather than in the (xarray-optional)
# converter module and can be extended without importing xarray.

# USGS unit strings -> UDUNITS / CF-canonical form.
CF_UNIT_MAP = {
"ft^3/s": "ft3 s-1",
"ft3/s": "ft3 s-1",
"ft": "ft",
"in": "in",
"degC": "degC",
"deg C": "degC",
"uS/cm": "uS/cm",
"mg/l": "mg L-1",
"mg/L": "mg L-1",
# UDUNITS 'ton' is the US short ton; 'short_ton' is not a valid UDUNITS name.
"tons/day": "ton day-1",
"%": "percent",
}

# USGS statistic_id -> the operator in a CF ``cell_methods`` string.
CF_CELL_METHODS = {
"00001": "maximum",
"00002": "minimum",
"00003": "mean",
"00006": "sum",
"00008": "median",
"00011": "point", # instantaneous
}

# USGS 5-digit parameter code -> CF standard_name. Deliberately conservative;
# codes without a confident match are left without a standard_name.
CF_STANDARD_NAMES = {
"00060": "water_volume_transport_in_river_channel",
# 00010 (water temperature) is intentionally omitted: ``water_temperature``
# is NOT a CF standard name, and the only valid CF water-temperature name,
# ``sea_water_temperature``, is wrong-domain for USGS freshwater/groundwater.
# Leaving it unmapped keeps the variable's ``long_name`` without emitting an
# invalid or misleading ``standard_name``.
"00065": "water_surface_height_above_reference_datum",
"63160": "water_surface_height_above_reference_datum",
"00045": "lwe_thickness_of_precipitation_amount",
}

# USGS parameter code -> vertical reference datum, attached as a
# ``vertical_datum`` attribute. The two water-surface-height parameters share
# the CF standard_name water_surface_height_above_reference_datum, so the datum
# distinguishes them: gage height (00065) is measured from a local site (gage)
# datum, while stream water level (63160) is referenced to NAVD88.
CF_VERTICAL_DATUM = {
"00065": "local site datum",
"63160": "NAVD88",
}
Loading
Loading