diff --git a/.github/workflows/test_app.yml b/.github/workflows/test_app.yml index c869304a..ae0bb121 100644 --- a/.github/workflows/test_app.yml +++ b/.github/workflows/test_app.yml @@ -19,22 +19,12 @@ jobs: --health-timeout 5s --health-retries 5 - env: # <-- Consolidated env block here + env: POSTGRES_PASSWORD: postgres POSTGRES_USER: postgres - POSTGRES_DB: source_collector_test_db + POSTGRES_DB: postgres POSTGRES_HOST: postgres POSTGRES_PORT: 5432 - DATA_SOURCES_HOST: postgres - DATA_SOURCES_PORT: 5432 - DATA_SOURCES_USER: postgres - DATA_SOURCES_PASSWORD: postgres - DATA_SOURCES_DB: test_data_sources_db - FDW_DATA_SOURCES_HOST: postgres - FDW_DATA_SOURCES_PORT: 5432 - FDW_DATA_SOURCES_USER: postgres - FDW_DATA_SOURCES_PASSWORD: postgres - FDW_DATA_SOURCES_DB: test_data_sources_db GOOGLE_API_KEY: TEST GOOGLE_CSE_ID: TEST @@ -42,16 +32,10 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 - - name: Install PostgreSQL client tools - run: | - apt-get update - apt-get install -y postgresql-client - - name: Install dependencies run: | python -m pip install --upgrade pip pip install -r requirements.txt - python -m local_database.create_database --use-shell - name: Run tests run: | diff --git a/alembic/versions/2025_04_21_1817-13f1272f94b9_set_up_foreign_data_wrapper.py b/alembic/versions/2025_04_21_1817-13f1272f94b9_set_up_foreign_data_wrapper.py deleted file mode 100644 index 737b49a0..00000000 --- a/alembic/versions/2025_04_21_1817-13f1272f94b9_set_up_foreign_data_wrapper.py +++ /dev/null @@ -1,250 +0,0 @@ -"""Set up foreign data wrapper - -Revision ID: 13f1272f94b9 -Revises: e285e6e7cf71 -Create Date: 2025-04-21 18:17:34.593973 - -""" -import os -from typing import Sequence, Union - -from alembic import op -from dotenv import load_dotenv - -# revision identifiers, used by Alembic. -revision: str = '13f1272f94b9' -down_revision: Union[str, None] = 'e285e6e7cf71' -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - - load_dotenv() - remote_host = os.getenv("FDW_DATA_SOURCES_HOST") - user = os.getenv("FDW_DATA_SOURCES_USER") - password = os.getenv("FDW_DATA_SOURCES_PASSWORD") - db_name = os.getenv("FDW_DATA_SOURCES_DB") - port = os.getenv("FDW_DATA_SOURCES_PORT") - - op.execute(f"CREATE EXTENSION IF NOT EXISTS postgres_fdw;") - - op.execute(f""" - CREATE SERVER data_sources_server - FOREIGN DATA WRAPPER postgres_fdw - OPTIONS (host '{remote_host}', dbname '{db_name}', port '{port}'); - """) - - op.execute(f""" - CREATE USER MAPPING FOR PUBLIC - SERVER data_sources_server - OPTIONS (user '{user}', password '{password}'); - """) - - op.execute('CREATE SCHEMA if not exists "remote";') - - # Users table - op.execute(""" - CREATE FOREIGN TABLE IF NOT EXISTS "remote".users - ( - id bigint, - created_at timestamp with time zone, - updated_at timestamp with time zone, - email text, - password_digest text, - api_key character varying, - role text - ) - SERVER data_sources_server - OPTIONS ( - schema_name 'public', - table_name 'users' - ); - """) - - # Agencies - # -Enums - # --Jurisdiction Type - op.execute(""" - CREATE TYPE jurisdiction_type AS ENUM - ('school', 'county', 'local', 'port', 'tribal', 'transit', 'state', 'federal'); - """) - # --Agency Type - op.execute(""" - CREATE TYPE agency_type AS ENUM - ('incarceration', 'law enforcement', 'aggregated', 'court', 'unknown'); - """) - - # -Table - op.execute(""" - CREATE FOREIGN TABLE IF NOT EXISTS "remote".agencies - ( - name character , - homepage_url character , - jurisdiction_type jurisdiction_type , - lat double precision, - lng double precision, - defunct_year character , - airtable_uid character , - agency_type agency_type , - multi_agency boolean , - no_web_presence boolean , - airtable_agency_last_modified timestamp with time zone, - rejection_reason character , - last_approval_editor character , - submitter_contact character, - agency_created timestamp with time zone, - id integer, - approval_status text, - creator_user_id integer - ) - SERVER data_sources_server - OPTIONS ( - schema_name 'public', - table_name 'agencies' - ); - """) - - # Locations Table - # -Enums - # --Location Type - op.execute(""" - CREATE TYPE location_type AS ENUM - ('State', 'County', 'Locality'); - """) - - # -Table - op.execute(""" - CREATE FOREIGN TABLE IF NOT EXISTS "remote".locations - ( - id bigint, - type location_type, - state_id bigint, - county_id bigint, - locality_id bigint - ) - SERVER data_sources_server - OPTIONS ( - schema_name 'public', - table_name 'locations' - ); - """) - - # Data Sources Table - - # -Enums - # -- access_type - op.execute(""" - CREATE TYPE access_type AS ENUM - ('Download', 'Webpage', 'API'); - """) - - # -- agency_aggregation - op.execute(""" - CREATE TYPE agency_aggregation AS ENUM - ('county', 'local', 'state', 'federal'); - """) - # -- update_method - op.execute(""" - CREATE TYPE update_method AS ENUM - ('Insert', 'No updates', 'Overwrite'); - """) - - # -- detail_level - op.execute(""" - CREATE TYPE detail_level AS ENUM - ('Individual record', 'Aggregated records', 'Summarized totals'); - """) - - # -- retention_schedule - op.execute(""" - CREATE TYPE retention_schedule AS ENUM - ('< 1 day', '1 day', '< 1 week', '1 week', '1 month', '< 1 year', '1-10 years', '> 10 years', 'Future only'); - """) - - # -Table - op.execute(""" - CREATE FOREIGN TABLE IF NOT EXISTS "remote".data_sources - ( - name character varying , - description character , - source_url character , - agency_supplied boolean, - supplying_entity character , - agency_originated boolean, - agency_aggregation agency_aggregation, - coverage_start date, - coverage_end date, - updated_at timestamp with time zone , - detail_level detail_level, - record_download_option_provided boolean, - data_portal_type character , - update_method update_method, - readme_url character , - originating_entity character , - retention_schedule retention_schedule, - airtable_uid character , - scraper_url character , - created_at timestamp with time zone , - submission_notes character , - rejection_note character , - submitter_contact_info character , - agency_described_not_in_database character , - data_portal_type_other character , - data_source_request character , - broken_source_url_as_of timestamp with time zone, - access_notes text , - url_status text , - approval_status text , - record_type_id integer, - access_types access_type[], - tags text[] , - record_formats text[] , - id integer, - approval_status_updated_at timestamp with time zone , - last_approval_editor bigint - ) - SERVER data_sources_server - OPTIONS ( - schema_name 'public', - table_name 'data_sources' - ); - """) - - - -def downgrade() -> None: - # Drop foreign schema - op.execute('DROP SCHEMA IF EXISTS "remote" CASCADE;') - - # Drop enums - enums = [ - "jurisdiction_type", - "agency_type", - "location_type", - "access_type", - "agency_aggregation", - "update_method", - "detail_level", - "retention_schedule", - ] - for enum in enums: - op.execute(f""" - DROP TYPE IF EXISTS {enum}; - """) - - # Drop user mapping - user = os.getenv("DATA_SOURCES_USER") - op.execute(f""" - DROP USER MAPPING FOR PUBLIC SERVER data_sources_server; - """) - - # Drop server - op.execute(""" - DROP SERVER IF EXISTS data_sources_server CASCADE; - """) - - # Drop FDW - op.execute(""" - DROP EXTENSION IF EXISTS postgres_fdw CASCADE; - """) diff --git a/local_database/DataDumper/dump/data_sources_db_dump.sql b/local_database/DataDumper/dump/data_sources_db_dump.sql deleted file mode 100644 index aa27b60a..00000000 Binary files a/local_database/DataDumper/dump/data_sources_db_dump.sql and /dev/null differ diff --git a/local_database/DockerInfos.py b/local_database/DockerInfos.py index 3b1c071b..17180bab 100644 --- a/local_database/DockerInfos.py +++ b/local_database/DockerInfos.py @@ -1,5 +1,4 @@ from local_database.DTOs import DockerInfo, DockerfileInfo, HealthCheckInfo, VolumeInfo -from local_database.constants import LOCAL_DATA_SOURCES_DB_NAME from util.helper_functions import get_from_env, project_path @@ -26,43 +25,6 @@ def get_database_docker_info() -> DockerInfo: ) ) - -def get_data_sources_data_dumper_info() -> DockerInfo: - return DockerInfo( - dockerfile_info=DockerfileInfo( - image_tag="datadumper", - dockerfile_directory=str(project_path( - "local_database", - "DataDumper" - )) - ), - volume_info=VolumeInfo( - host_path=str(project_path( - "local_database", - "DataDumper", - "dump" - )), - container_path="/dump" - ), - name="datadumper", - environment={ - "DUMP_HOST": get_from_env("PROD_DATA_SOURCES_HOST"), - "DUMP_USER": get_from_env("PROD_DATA_SOURCES_USER"), - "DUMP_PASSWORD": get_from_env("PROD_DATA_SOURCES_PASSWORD"), - "DUMP_NAME": get_from_env("PROD_DATA_SOURCES_DB"), - "DUMP_PORT": get_from_env("PROD_DATA_SOURCES_PORT"), - "RESTORE_HOST": get_from_env("POSTGRES_HOST"), - "RESTORE_USER": get_from_env("POSTGRES_USER"), - "RESTORE_PORT": get_from_env("POSTGRES_PORT"), - "RESTORE_DB_NAME": LOCAL_DATA_SOURCES_DB_NAME, - "RESTORE_PASSWORD": get_from_env("POSTGRES_PASSWORD"), - "DUMP_FILE": "/dump/data_sources_db_dump.sql", - "DUMP_SCHEMA_ONLY": "true" - }, - command="bash" - ) - - def get_source_collector_data_dumper_info() -> DockerInfo: return DockerInfo( dockerfile_info=DockerfileInfo( diff --git a/local_database/constants.py b/local_database/constants.py index d5c96e72..51147717 100644 --- a/local_database/constants.py +++ b/local_database/constants.py @@ -1,4 +1,3 @@ -LOCAL_DATA_SOURCES_DB_NAME = "test_data_sources_db" LOCAL_SOURCE_COLLECTOR_DB_NAME = "source_collector_test_db" DUMP_SH_DOCKER_PATH = "/usr/local/bin/dump.sh" diff --git a/local_database/create_database.py b/local_database/create_database.py index 58b15508..67eae70b 100644 --- a/local_database/create_database.py +++ b/local_database/create_database.py @@ -5,9 +5,7 @@ import psycopg2 from psycopg2 import sql -from local_database.DockerInfos import get_data_sources_data_dumper_info -from local_database.classes.DockerManager import DockerManager -from local_database.constants import LOCAL_DATA_SOURCES_DB_NAME, LOCAL_SOURCE_COLLECTOR_DB_NAME, RESTORE_SH_DOCKER_PATH +from local_database.constants import LOCAL_SOURCE_COLLECTOR_DB_NAME, RESTORE_SH_DOCKER_PATH # Defaults (can be overridden via environment variables) POSTGRES_HOST = os.getenv("POSTGRES_HOST", "host.docker.internal") @@ -52,47 +50,7 @@ def create_database(db_name): def main(): print("Creating databases...") - create_database(LOCAL_DATA_SOURCES_DB_NAME) create_database(LOCAL_SOURCE_COLLECTOR_DB_NAME) if __name__ == "__main__": main() - parser = argparse.ArgumentParser() - - parser.add_argument( - "--use-shell", - action="store_true", - help="Use shell to run restore script" - ) - - args = parser.parse_args() - - if args.use_shell: - subprocess.run( - [ - "bash", - "-c", - RESTORE_SH_DOCKER_PATH - ], - env={ - "RESTORE_HOST": POSTGRES_HOST, - "RESTORE_USER": POSTGRES_USER, - "RESTORE_PORT": str(POSTGRES_PORT), - "RESTORE_DB_NAME": LOCAL_DATA_SOURCES_DB_NAME, - "RESTORE_PASSWORD": POSTGRES_PASSWORD - } - ) - os.system(RESTORE_SH_DOCKER_PATH) - exit(0) - - docker_manager = DockerManager() - data_sources_docker_info = get_data_sources_data_dumper_info() - container = docker_manager.run_container( - data_sources_docker_info, - force_rebuild=True - ) - try: - container.run_command(RESTORE_SH_DOCKER_PATH) - finally: - container.stop() - diff --git a/local_database/dump_data_sources_schema.py b/local_database/dump_data_sources_schema.py deleted file mode 100644 index 65079f53..00000000 --- a/local_database/dump_data_sources_schema.py +++ /dev/null @@ -1,21 +0,0 @@ -from local_database.DockerInfos import get_data_sources_data_dumper_info -from local_database.classes.DockerManager import DockerManager -from local_database.constants import DUMP_SH_DOCKER_PATH - - -def main(): - docker_manager = DockerManager() - data_sources_docker_info = get_data_sources_data_dumper_info() - container = docker_manager.run_container( - data_sources_docker_info, - force_rebuild=True - ) - try: - container.run_command(DUMP_SH_DOCKER_PATH) - finally: - container.stop() - - - -if __name__ == "__main__": - main() \ No newline at end of file