diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..e64db84b 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,6 @@ +- bump: minor + changes: + added: + - Add place-level region support for US Census places with format place/{STATE_ABBREV}-{PLACE_FIPS} + removed: + - Remove city region type (city/nyc) in favor of place regions diff --git a/policyengine_api/constants.py b/policyengine_api/constants.py index a9f13df5..a8437314 100644 --- a/policyengine_api/constants.py +++ b/policyengine_api/constants.py @@ -30,7 +30,7 @@ US_REGION_TYPES = ( "national", # National level (e.g., "us") "state", # US states (e.g., "state/ca", "state/ny") - "city", # US cities (e.g., "city/nyc") + "place", # US Census places (e.g., "place/NJ-57000") "congressional_district", # US congressional districts (e.g., "congressional_district/CA-37") ) @@ -46,7 +46,7 @@ REGION_PREFIXES = { "us": [ "state/", # US states (e.g., "state/ca", "state/ny") - "city/", # US cities (e.g., "city/nyc") + "place/", # US Census places (e.g., "place/NJ-57000") "congressional_district/", # US congressional districts (e.g., "congressional_district/CA-37") ], "uk": [ diff --git a/policyengine_api/country.py b/policyengine_api/country.py index 29f64fbb..68028016 100644 --- a/policyengine_api/country.py +++ b/policyengine_api/country.py @@ -157,7 +157,6 @@ def build_microsimulation_options(self) -> dict: dict(name="state/nj", label="New Jersey", type="state"), dict(name="state/nm", label="New Mexico", type="state"), dict(name="state/ny", label="New York", type="state"), - dict(name="city/nyc", label="New York City", type="city"), dict(name="state/nc", label="North Carolina", type="state"), dict(name="state/nd", label="North Dakota", type="state"), dict(name="state/oh", label="Ohio", type="state"), diff --git a/policyengine_api/data/congressional_districts.py b/policyengine_api/data/congressional_districts.py index 8c52c4e6..2e728991 100644 --- a/policyengine_api/data/congressional_districts.py +++ b/policyengine_api/data/congressional_districts.py @@ -730,12 +730,13 @@ def normalize_us_region(region: str) -> str: Args: region: A region string that may be in legacy or standard format. - Examples: "ca", "state/ca", "nyc", "city/nyc", + Examples: "ca", "state/ca", "place/NJ-57000", "congressional_district/CA-01", "us" Returns: The normalized region string with appropriate prefix. - Examples: "state/ca", "city/nyc", "congressional_district/CA-01", "us" + Examples: "state/ca", "place/NJ-57000", + "congressional_district/CA-01", "us" Note: This function does NOT validate that the region is valid - it only @@ -744,7 +745,7 @@ def normalize_us_region(region: str) -> str: # Already has a valid prefix - return as-is if ( region.startswith("state/") - or region.startswith("city/") + or region.startswith("place/") or region.startswith("congressional_district/") ): return region @@ -753,10 +754,6 @@ def normalize_us_region(region: str) -> str: if region == "us": return region - # Legacy NYC format - if region == "nyc": - return "city/nyc" - # Legacy bare state code (e.g., "ca", "tx", "NY") # Check if it's a valid state code before adding prefix if region.lower() in get_valid_state_codes(): diff --git a/policyengine_api/data/places.py b/policyengine_api/data/places.py new file mode 100644 index 00000000..e588489f --- /dev/null +++ b/policyengine_api/data/places.py @@ -0,0 +1,51 @@ +""" +US Census place code parsing and validation utilities. + +Place codes follow the format: STATE_ABBREV-PLACE_FIPS +Example: NJ-57000 for Newark, NJ +""" + +from policyengine_api.data.congressional_districts import get_valid_state_codes + + +def parse_place_code(place_code: str) -> tuple[str, str]: + """ + Parse a place code into its state abbreviation and FIPS components. + + Args: + place_code: Place code in format STATE_ABBREV-PLACE_FIPS (e.g., "NJ-57000") + + Returns: + Tuple of (state_abbrev, place_fips) + + Raises: + ValueError: If the place code format is invalid + """ + if "-" not in place_code: + raise ValueError( + f"Invalid place format: '{place_code}'. " + "Expected format: STATE_ABBREV-PLACE_FIPS (e.g., NJ-57000)" + ) + return place_code.split("-", 1) + + +def validate_place_code(place_code: str) -> None: + """ + Validate a place code has valid state abbreviation and FIPS format. + + Args: + place_code: Place code in format STATE_ABBREV-PLACE_FIPS (e.g., "NJ-57000") + + Raises: + ValueError: If the state abbreviation or FIPS code is invalid + """ + state_abbrev, place_fips = parse_place_code(place_code) + + if state_abbrev.lower() not in get_valid_state_codes(): + raise ValueError(f"Invalid state in place code: '{state_abbrev}'") + + if not place_fips.isdigit() or len(place_fips) != 5: + raise ValueError( + f"Invalid FIPS code in place: '{place_fips}'. " + "Expected 5-digit FIPS code" + ) diff --git a/policyengine_api/services/economy_service.py b/policyengine_api/services/economy_service.py index d22a36ef..6f7ab5ab 100644 --- a/policyengine_api/services/economy_service.py +++ b/policyengine_api/services/economy_service.py @@ -17,6 +17,7 @@ get_valid_congressional_districts, normalize_us_region, ) +from policyengine_api.data.places import validate_place_code from policyengine.simulation import SimulationOptions from policyengine.utils.data.datasets import get_default_dataset import json @@ -520,11 +521,9 @@ def _validate_us_region(self, region: str) -> None: state_code = region[len("state/") :] if state_code.lower() not in get_valid_state_codes(): raise ValueError(f"Invalid US state: '{state_code}'") - elif region.startswith("city/"): - # Currently only NYC is supported - city_code = region[len("city/") :] - if city_code != "nyc": - raise ValueError(f"Invalid US city: '{city_code}'") + elif region.startswith("place/"): + place_code = region[len("place/") :] + validate_place_code(place_code) elif region.startswith("congressional_district/"): district_id = region[len("congressional_district/") :] if district_id.lower() not in get_valid_congressional_districts(): diff --git a/tests/fixtures/services/economy_service.py b/tests/fixtures/services/economy_service.py index 96ee736f..9ea1ca24 100644 --- a/tests/fixtures/services/economy_service.py +++ b/tests/fixtures/services/economy_service.py @@ -233,9 +233,7 @@ def mock_simulation_api_modal(): MOCK_US_NATIONWIDE_DATASET = "gs://policyengine-us-data/cps_2023.h5" MOCK_US_STATE_CA_DATASET = "gs://policyengine-us-data/states/CA.h5" MOCK_US_STATE_UT_DATASET = "gs://policyengine-us-data/states/UT.h5" -MOCK_US_CITY_NYC_DATASET = ( - "gs://policyengine-us-data/pooled_3_year_cps_2023.h5" -) +MOCK_US_PLACE_NJ_57000_DATASET = "gs://policyengine-us-data/states/NJ.h5" MOCK_US_DISTRICT_CA37_DATASET = "gs://policyengine-us-data/districts/CA-37.h5" MOCK_UK_DATASET = "gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5" @@ -251,8 +249,11 @@ def mock_get_default_dataset_fn(country: str, region: str | None) -> str: return MOCK_US_STATE_CA_DATASET elif region == "state/ut": return MOCK_US_STATE_UT_DATASET - elif region == "city/nyc": - return MOCK_US_CITY_NYC_DATASET + elif region.startswith("place/"): + # Place uses parent state's dataset + place_code = region.split("/")[1] + state_abbrev = place_code.split("-")[0].upper() + return f"gs://policyengine-us-data/states/{state_abbrev}.h5" elif region == "congressional_district/CA-37": return MOCK_US_DISTRICT_CA37_DATASET elif region.startswith("state/"): diff --git a/tests/unit/data/test_congressional_districts.py b/tests/unit/data/test_congressional_districts.py index 05819916..91b41570 100644 --- a/tests/unit/data/test_congressional_districts.py +++ b/tests/unit/data/test_congressional_districts.py @@ -359,8 +359,9 @@ def test__prefixed_state_unchanged(self): assert normalize_us_region("state/ca") == "state/ca" assert normalize_us_region("state/TX") == "state/TX" - def test__prefixed_city_unchanged(self): - assert normalize_us_region("city/nyc") == "city/nyc" + def test__prefixed_place_unchanged(self): + assert normalize_us_region("place/NJ-57000") == "place/NJ-57000" + assert normalize_us_region("place/ca-44000") == "place/ca-44000" def test__prefixed_congressional_district_unchanged(self): assert ( @@ -372,9 +373,6 @@ def test__prefixed_congressional_district_unchanged(self): == "congressional_district/tx-14" ) - def test__legacy_nyc_converted(self): - assert normalize_us_region("nyc") == "city/nyc" - def test__legacy_state_code_lowercase_converted(self): assert normalize_us_region("ca") == "state/ca" assert normalize_us_region("tx") == "state/tx" diff --git a/tests/unit/data/test_places.py b/tests/unit/data/test_places.py new file mode 100644 index 00000000..fc73e7e5 --- /dev/null +++ b/tests/unit/data/test_places.py @@ -0,0 +1,65 @@ +import pytest + +from policyengine_api.data.places import ( + parse_place_code, + validate_place_code, +) + + +class TestParsePlaceCode: + """Tests for the parse_place_code function.""" + + def test__given_valid_place_code__returns_tuple(self): + state, fips = parse_place_code("NJ-57000") + assert state == "NJ" + assert fips == "57000" + + def test__given_lowercase_place_code__returns_tuple(self): + state, fips = parse_place_code("ca-44000") + assert state == "ca" + assert fips == "44000" + + def test__given_no_hyphen__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + parse_place_code("NJ57000") + assert "Invalid place format" in str(exc_info.value) + + def test__given_empty_string__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + parse_place_code("") + assert "Invalid place format" in str(exc_info.value) + + +class TestValidatePlaceCode: + """Tests for the validate_place_code function.""" + + def test__given_valid_place_code__no_error(self): + # Should not raise + validate_place_code("NJ-57000") + validate_place_code("ca-44000") + validate_place_code("TX-35000") + + def test__given_invalid_state__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("XX-57000") + assert "Invalid state in place code" in str(exc_info.value) + + def test__given_non_digit_fips__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("NJ-abcde") + assert "Invalid FIPS code" in str(exc_info.value) + + def test__given_short_fips__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("NJ-5700") + assert "Invalid FIPS code" in str(exc_info.value) + + def test__given_long_fips__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("NJ-570001") + assert "Invalid FIPS code" in str(exc_info.value) + + def test__given_no_hyphen__raises_value_error(self): + with pytest.raises(ValueError) as exc_info: + validate_place_code("NJ57000") + assert "Invalid place format" in str(exc_info.value) diff --git a/tests/unit/services/test_economy_service.py b/tests/unit/services/test_economy_service.py index 4cb010ce..49c0fe39 100644 --- a/tests/unit/services/test_economy_service.py +++ b/tests/unit/services/test_economy_service.py @@ -942,11 +942,32 @@ def test__given_invalid_bare_value__raises_value_error(self): service._setup_region("us", "invalid_value") assert "Invalid US region: 'invalid_value'" in str(exc_info.value) - def test__given_city_nyc__returns_unchanged(self): - # Test normalized "city/nyc" format passes through + def test__given_place_region__returns_unchanged(self): + # Test normalized "place/STATE-FIPS" format passes through service = EconomyService() - result = service._setup_region("us", "city/nyc") - assert result == "city/nyc" + result = service._setup_region("us", "place/NJ-57000") + assert result == "place/NJ-57000" + + def test__given_invalid_place_format__raises_value_error(self): + # Test place without hyphen raises error + service = EconomyService() + with pytest.raises(ValueError) as exc_info: + service._setup_region("us", "place/invalid") + assert "Invalid place format" in str(exc_info.value) + + def test__given_invalid_place_state__raises_value_error(self): + # Test place with invalid state code raises error + service = EconomyService() + with pytest.raises(ValueError) as exc_info: + service._setup_region("us", "place/XX-57000") + assert "Invalid state in place code" in str(exc_info.value) + + def test__given_invalid_place_fips__raises_value_error(self): + # Test place with invalid FIPS code raises error + service = EconomyService() + with pytest.raises(ValueError) as exc_info: + service._setup_region("us", "place/NJ-abc") + assert "Invalid FIPS code" in str(exc_info.value) class TestSetupData: """Tests for _setup_data method. @@ -955,13 +976,11 @@ class TestSetupData: to return GCS paths for all region types (not None). """ - def test__given_us_city_nyc__returns_pooled_cps(self): - # Test with normalized city/nyc format + def test__given_us_place__returns_state_dataset(self): + # Test with place region - uses parent state's dataset service = EconomyService() - result = service._setup_data("us", "city/nyc") - assert ( - result == "gs://policyengine-us-data/pooled_3_year_cps_2023.h5" - ) + result = service._setup_data("us", "place/NJ-57000") + assert result == "gs://policyengine-us-data/states/NJ.h5" def test__given_us_state_ca__returns_state_dataset(self): # Test with US state - returns state-specific dataset diff --git a/tests/unit/services/test_metadata_service.py b/tests/unit/services/test_metadata_service.py index 70ea9262..40c6805d 100644 --- a/tests/unit/services/test_metadata_service.py +++ b/tests/unit/services/test_metadata_service.py @@ -55,7 +55,6 @@ def test_get_metadata_empty_country_id(self): "state/ny", "state/tx", "state/fl", - "city/nyc", ], ), ("ca", 3, ["ca"]), @@ -124,7 +123,7 @@ def test_verify_metadata_for_given_country( "country_id, expected_types", [ ("uk", ["national", "country", "constituency", "local_authority"]), - ("us", ["national", "state", "city", "congressional_district"]), + ("us", ["national", "state", "place", "congressional_district"]), ], ) def test_verify_region_types_for_given_country( diff --git a/tests/unit/test_constants.py b/tests/unit/test_constants.py index 439d5a23..f83d964c 100644 --- a/tests/unit/test_constants.py +++ b/tests/unit/test_constants.py @@ -35,8 +35,8 @@ def test__contains_national(self): def test__contains_state(self): assert "state" in US_REGION_TYPES - def test__contains_city(self): - assert "city" in US_REGION_TYPES + def test__contains_place(self): + assert "place" in US_REGION_TYPES def test__contains_congressional_district(self): assert "congressional_district" in US_REGION_TYPES @@ -75,8 +75,8 @@ def test__us_key_exists(self): def test__contains_state_prefix(self): assert "state/" in REGION_PREFIXES["us"] - def test__contains_city_prefix(self): - assert "city/" in REGION_PREFIXES["us"] + def test__contains_place_prefix(self): + assert "place/" in REGION_PREFIXES["us"] def test__contains_congressional_district_prefix(self): assert "congressional_district/" in REGION_PREFIXES["us"]