Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
- bump: minor
changes:
added:
- Add place-level region support for US Census places with format place/{STATE_ABBREV}-{PLACE_FIPS}
removed:
- Remove city region type (city/nyc) in favor of place regions
4 changes: 2 additions & 2 deletions policyengine_api/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
US_REGION_TYPES = (
"national", # National level (e.g., "us")
"state", # US states (e.g., "state/ca", "state/ny")
"city", # US cities (e.g., "city/nyc")
"place", # US Census places (e.g., "place/NJ-57000")
"congressional_district", # US congressional districts (e.g., "congressional_district/CA-37")
)

Expand All @@ -46,7 +46,7 @@
REGION_PREFIXES = {
"us": [
"state/", # US states (e.g., "state/ca", "state/ny")
"city/", # US cities (e.g., "city/nyc")
"place/", # US Census places (e.g., "place/NJ-57000")
"congressional_district/", # US congressional districts (e.g., "congressional_district/CA-37")
],
"uk": [
Expand Down
1 change: 0 additions & 1 deletion policyengine_api/country.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ def build_microsimulation_options(self) -> dict:
dict(name="state/nj", label="New Jersey", type="state"),
dict(name="state/nm", label="New Mexico", type="state"),
dict(name="state/ny", label="New York", type="state"),
dict(name="city/nyc", label="New York City", type="city"),
dict(name="state/nc", label="North Carolina", type="state"),
dict(name="state/nd", label="North Dakota", type="state"),
dict(name="state/oh", label="Ohio", type="state"),
Expand Down
11 changes: 4 additions & 7 deletions policyengine_api/data/congressional_districts.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,12 +730,13 @@ def normalize_us_region(region: str) -> str:

Args:
region: A region string that may be in legacy or standard format.
Examples: "ca", "state/ca", "nyc", "city/nyc",
Examples: "ca", "state/ca", "place/NJ-57000",
"congressional_district/CA-01", "us"

Returns:
The normalized region string with appropriate prefix.
Examples: "state/ca", "city/nyc", "congressional_district/CA-01", "us"
Examples: "state/ca", "place/NJ-57000",
"congressional_district/CA-01", "us"

Note:
This function does NOT validate that the region is valid - it only
Expand All @@ -744,7 +745,7 @@ def normalize_us_region(region: str) -> str:
# Already has a valid prefix - return as-is
if (
region.startswith("state/")
or region.startswith("city/")
or region.startswith("place/")
or region.startswith("congressional_district/")
):
return region
Expand All @@ -753,10 +754,6 @@ def normalize_us_region(region: str) -> str:
if region == "us":
return region

# Legacy NYC format
if region == "nyc":
return "city/nyc"

# Legacy bare state code (e.g., "ca", "tx", "NY")
# Check if it's a valid state code before adding prefix
if region.lower() in get_valid_state_codes():
Expand Down
51 changes: 51 additions & 0 deletions policyengine_api/data/places.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
US Census place code parsing and validation utilities.

Place codes follow the format: STATE_ABBREV-PLACE_FIPS
Example: NJ-57000 for Newark, NJ
"""

from policyengine_api.data.congressional_districts import get_valid_state_codes


def parse_place_code(place_code: str) -> tuple[str, str]:
"""
Parse a place code into its state abbreviation and FIPS components.

Args:
place_code: Place code in format STATE_ABBREV-PLACE_FIPS (e.g., "NJ-57000")

Returns:
Tuple of (state_abbrev, place_fips)

Raises:
ValueError: If the place code format is invalid
"""
if "-" not in place_code:
raise ValueError(
f"Invalid place format: '{place_code}'. "
"Expected format: STATE_ABBREV-PLACE_FIPS (e.g., NJ-57000)"
)
return place_code.split("-", 1)


def validate_place_code(place_code: str) -> None:
"""
Validate a place code has valid state abbreviation and FIPS format.

Args:
place_code: Place code in format STATE_ABBREV-PLACE_FIPS (e.g., "NJ-57000")

Raises:
ValueError: If the state abbreviation or FIPS code is invalid
"""
state_abbrev, place_fips = parse_place_code(place_code)

if state_abbrev.lower() not in get_valid_state_codes():
raise ValueError(f"Invalid state in place code: '{state_abbrev}'")

if not place_fips.isdigit() or len(place_fips) != 5:
raise ValueError(
f"Invalid FIPS code in place: '{place_fips}'. "
"Expected 5-digit FIPS code"
)
9 changes: 4 additions & 5 deletions policyengine_api/services/economy_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
get_valid_congressional_districts,
normalize_us_region,
)
from policyengine_api.data.places import validate_place_code
from policyengine.simulation import SimulationOptions
from policyengine.utils.data.datasets import get_default_dataset
import json
Expand Down Expand Up @@ -520,11 +521,9 @@ def _validate_us_region(self, region: str) -> None:
state_code = region[len("state/") :]
if state_code.lower() not in get_valid_state_codes():
raise ValueError(f"Invalid US state: '{state_code}'")
elif region.startswith("city/"):
# Currently only NYC is supported
city_code = region[len("city/") :]
if city_code != "nyc":
raise ValueError(f"Invalid US city: '{city_code}'")
elif region.startswith("place/"):
place_code = region[len("place/") :]
validate_place_code(place_code)
elif region.startswith("congressional_district/"):
district_id = region[len("congressional_district/") :]
if district_id.lower() not in get_valid_congressional_districts():
Expand Down
11 changes: 6 additions & 5 deletions tests/fixtures/services/economy_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,7 @@ def mock_simulation_api_modal():
MOCK_US_NATIONWIDE_DATASET = "gs://policyengine-us-data/cps_2023.h5"
MOCK_US_STATE_CA_DATASET = "gs://policyengine-us-data/states/CA.h5"
MOCK_US_STATE_UT_DATASET = "gs://policyengine-us-data/states/UT.h5"
MOCK_US_CITY_NYC_DATASET = (
"gs://policyengine-us-data/pooled_3_year_cps_2023.h5"
)
MOCK_US_PLACE_NJ_57000_DATASET = "gs://policyengine-us-data/states/NJ.h5"
MOCK_US_DISTRICT_CA37_DATASET = "gs://policyengine-us-data/districts/CA-37.h5"
MOCK_UK_DATASET = "gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5"

Expand All @@ -251,8 +249,11 @@ def mock_get_default_dataset_fn(country: str, region: str | None) -> str:
return MOCK_US_STATE_CA_DATASET
elif region == "state/ut":
return MOCK_US_STATE_UT_DATASET
elif region == "city/nyc":
return MOCK_US_CITY_NYC_DATASET
elif region.startswith("place/"):
# Place uses parent state's dataset
place_code = region.split("/")[1]
state_abbrev = place_code.split("-")[0].upper()
return f"gs://policyengine-us-data/states/{state_abbrev}.h5"
elif region == "congressional_district/CA-37":
return MOCK_US_DISTRICT_CA37_DATASET
elif region.startswith("state/"):
Expand Down
8 changes: 3 additions & 5 deletions tests/unit/data/test_congressional_districts.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,8 +359,9 @@ def test__prefixed_state_unchanged(self):
assert normalize_us_region("state/ca") == "state/ca"
assert normalize_us_region("state/TX") == "state/TX"

def test__prefixed_city_unchanged(self):
assert normalize_us_region("city/nyc") == "city/nyc"
def test__prefixed_place_unchanged(self):
assert normalize_us_region("place/NJ-57000") == "place/NJ-57000"
assert normalize_us_region("place/ca-44000") == "place/ca-44000"

def test__prefixed_congressional_district_unchanged(self):
assert (
Expand All @@ -372,9 +373,6 @@ def test__prefixed_congressional_district_unchanged(self):
== "congressional_district/tx-14"
)

def test__legacy_nyc_converted(self):
assert normalize_us_region("nyc") == "city/nyc"

def test__legacy_state_code_lowercase_converted(self):
assert normalize_us_region("ca") == "state/ca"
assert normalize_us_region("tx") == "state/tx"
Expand Down
65 changes: 65 additions & 0 deletions tests/unit/data/test_places.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import pytest

from policyengine_api.data.places import (
parse_place_code,
validate_place_code,
)


class TestParsePlaceCode:
"""Tests for the parse_place_code function."""

def test__given_valid_place_code__returns_tuple(self):
state, fips = parse_place_code("NJ-57000")
assert state == "NJ"
assert fips == "57000"

def test__given_lowercase_place_code__returns_tuple(self):
state, fips = parse_place_code("ca-44000")
assert state == "ca"
assert fips == "44000"

def test__given_no_hyphen__raises_value_error(self):
with pytest.raises(ValueError) as exc_info:
parse_place_code("NJ57000")
assert "Invalid place format" in str(exc_info.value)

def test__given_empty_string__raises_value_error(self):
with pytest.raises(ValueError) as exc_info:
parse_place_code("")
assert "Invalid place format" in str(exc_info.value)


class TestValidatePlaceCode:
"""Tests for the validate_place_code function."""

def test__given_valid_place_code__no_error(self):
# Should not raise
validate_place_code("NJ-57000")
validate_place_code("ca-44000")
validate_place_code("TX-35000")

def test__given_invalid_state__raises_value_error(self):
with pytest.raises(ValueError) as exc_info:
validate_place_code("XX-57000")
assert "Invalid state in place code" in str(exc_info.value)

def test__given_non_digit_fips__raises_value_error(self):
with pytest.raises(ValueError) as exc_info:
validate_place_code("NJ-abcde")
assert "Invalid FIPS code" in str(exc_info.value)

def test__given_short_fips__raises_value_error(self):
with pytest.raises(ValueError) as exc_info:
validate_place_code("NJ-5700")
assert "Invalid FIPS code" in str(exc_info.value)

def test__given_long_fips__raises_value_error(self):
with pytest.raises(ValueError) as exc_info:
validate_place_code("NJ-570001")
assert "Invalid FIPS code" in str(exc_info.value)

def test__given_no_hyphen__raises_value_error(self):
with pytest.raises(ValueError) as exc_info:
validate_place_code("NJ57000")
assert "Invalid place format" in str(exc_info.value)
39 changes: 29 additions & 10 deletions tests/unit/services/test_economy_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,11 +942,32 @@ def test__given_invalid_bare_value__raises_value_error(self):
service._setup_region("us", "invalid_value")
assert "Invalid US region: 'invalid_value'" in str(exc_info.value)

def test__given_city_nyc__returns_unchanged(self):
# Test normalized "city/nyc" format passes through
def test__given_place_region__returns_unchanged(self):
# Test normalized "place/STATE-FIPS" format passes through
service = EconomyService()
result = service._setup_region("us", "city/nyc")
assert result == "city/nyc"
result = service._setup_region("us", "place/NJ-57000")
assert result == "place/NJ-57000"

def test__given_invalid_place_format__raises_value_error(self):
# Test place without hyphen raises error
service = EconomyService()
with pytest.raises(ValueError) as exc_info:
service._setup_region("us", "place/invalid")
assert "Invalid place format" in str(exc_info.value)

def test__given_invalid_place_state__raises_value_error(self):
# Test place with invalid state code raises error
service = EconomyService()
with pytest.raises(ValueError) as exc_info:
service._setup_region("us", "place/XX-57000")
assert "Invalid state in place code" in str(exc_info.value)

def test__given_invalid_place_fips__raises_value_error(self):
# Test place with invalid FIPS code raises error
service = EconomyService()
with pytest.raises(ValueError) as exc_info:
service._setup_region("us", "place/NJ-abc")
assert "Invalid FIPS code" in str(exc_info.value)

class TestSetupData:
"""Tests for _setup_data method.
Expand All @@ -955,13 +976,11 @@ class TestSetupData:
to return GCS paths for all region types (not None).
"""

def test__given_us_city_nyc__returns_pooled_cps(self):
# Test with normalized city/nyc format
def test__given_us_place__returns_state_dataset(self):
# Test with place region - uses parent state's dataset
service = EconomyService()
result = service._setup_data("us", "city/nyc")
assert (
result == "gs://policyengine-us-data/pooled_3_year_cps_2023.h5"
)
result = service._setup_data("us", "place/NJ-57000")
assert result == "gs://policyengine-us-data/states/NJ.h5"

def test__given_us_state_ca__returns_state_dataset(self):
# Test with US state - returns state-specific dataset
Expand Down
3 changes: 1 addition & 2 deletions tests/unit/services/test_metadata_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def test_get_metadata_empty_country_id(self):
"state/ny",
"state/tx",
"state/fl",
"city/nyc",
],
),
("ca", 3, ["ca"]),
Expand Down Expand Up @@ -124,7 +123,7 @@ def test_verify_metadata_for_given_country(
"country_id, expected_types",
[
("uk", ["national", "country", "constituency", "local_authority"]),
("us", ["national", "state", "city", "congressional_district"]),
("us", ["national", "state", "place", "congressional_district"]),
],
)
def test_verify_region_types_for_given_country(
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/test_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def test__contains_national(self):
def test__contains_state(self):
assert "state" in US_REGION_TYPES

def test__contains_city(self):
assert "city" in US_REGION_TYPES
def test__contains_place(self):
assert "place" in US_REGION_TYPES

def test__contains_congressional_district(self):
assert "congressional_district" in US_REGION_TYPES
Expand Down Expand Up @@ -75,8 +75,8 @@ def test__us_key_exists(self):
def test__contains_state_prefix(self):
assert "state/" in REGION_PREFIXES["us"]

def test__contains_city_prefix(self):
assert "city/" in REGION_PREFIXES["us"]
def test__contains_place_prefix(self):
assert "place/" in REGION_PREFIXES["us"]

def test__contains_congressional_district_prefix(self):
assert "congressional_district/" in REGION_PREFIXES["us"]
Expand Down
Loading