diff --git a/Dockerfile b/Dockerfile index 5ba90408..85931528 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,25 +15,14 @@ RUN uv sync --locked --no-dev RUN playwright install-deps chromium RUN playwright install chromium - # Copy project files -COPY api ./api -COPY db ./collector_db -COPY collector_manager ./collector_manager -COPY core ./core -COPY html_tag_collector ./html_tag_collector -COPY source_collectors ./source_collectors -COPY util ./util +COPY src ./src COPY alembic.ini ./alembic.ini COPY alembic ./alembic COPY apply_migrations.py ./apply_migrations.py -COPY security_manager ./security_manager -COPY pdap_api_client ./pdap_api_client COPY execute.sh ./execute.sh COPY .project-root ./.project-root -COPY llm_api_logic ./llm_api_logic - # Expose the application port EXPOSE 80 diff --git a/alembic/env.py b/alembic/env.py index 3ba2f117..a70a4d5d 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -5,8 +5,8 @@ from sqlalchemy import engine_from_config from sqlalchemy import pool -from db.helper_functions import get_postgres_connection_string -from db.models import Base +from src.db.helper_functions import get_postgres_connection_string +from src.db.models import Base # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/alembic/versions/072b32a45b1c_add_task_tables_and_linking_logic.py b/alembic/versions/072b32a45b1c_add_task_tables_and_linking_logic.py index 8661f524..a67d128f 100644 --- a/alembic/versions/072b32a45b1c_add_task_tables_and_linking_logic.py +++ b/alembic/versions/072b32a45b1c_add_task_tables_and_linking_logic.py @@ -10,7 +10,7 @@ from alembic import op import sqlalchemy as sa -from db.enums import PGEnum +from src.db.enums import PGEnum # revision identifiers, used by Alembic. revision: str = '072b32a45b1c' diff --git a/alembic/versions/19bf57df581a_add_url_agency_suggestions.py b/alembic/versions/19bf57df581a_add_url_agency_suggestions.py index f735e271..c113b8fc 100644 --- a/alembic/versions/19bf57df581a_add_url_agency_suggestions.py +++ b/alembic/versions/19bf57df581a_add_url_agency_suggestions.py @@ -9,7 +9,7 @@ from alembic import op import sqlalchemy as sa -from db.enums import PGEnum +from src.db.enums import PGEnum # revision identifiers, used by Alembic. revision: str = '19bf57df581a' down_revision: Union[str, None] = '072b32a45b1c' diff --git a/alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py b/alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py index 55442f50..97889bd9 100644 --- a/alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py +++ b/alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py @@ -32,7 +32,7 @@ import sqlalchemy as sa from sqlalchemy import UniqueConstraint -from util.alembic_helpers import switch_enum_type +from src.util.alembic_helpers import switch_enum_type # revision identifiers, used by Alembic. revision: str = '33421c0590bb' diff --git a/alembic/versions/2025_03_15_1745-6eb8084e2f48_add_name_description_and_url_optional_.py b/alembic/versions/2025_03_15_1745-6eb8084e2f48_add_name_description_and_url_optional_.py index e8b542f9..36bfbf4e 100644 --- a/alembic/versions/2025_03_15_1745-6eb8084e2f48_add_name_description_and_url_optional_.py +++ b/alembic/versions/2025_03_15_1745-6eb8084e2f48_add_name_description_and_url_optional_.py @@ -10,7 +10,7 @@ from alembic import op import sqlalchemy as sa -from util.alembic_helpers import switch_enum_type +from src.util.alembic_helpers import switch_enum_type # revision identifiers, used by Alembic. revision: str = '6eb8084e2f48' diff --git a/alembic/versions/2025_04_02_2040-4c70177eba78_add_rejected_batch_status.py b/alembic/versions/2025_04_02_2040-4c70177eba78_add_rejected_batch_status.py index fcb9821b..c61f310d 100644 --- a/alembic/versions/2025_04_02_2040-4c70177eba78_add_rejected_batch_status.py +++ b/alembic/versions/2025_04_02_2040-4c70177eba78_add_rejected_batch_status.py @@ -8,7 +8,7 @@ from typing import Sequence, Union -from util.alembic_helpers import switch_enum_type +from src.util.alembic_helpers import switch_enum_type # revision identifiers, used by Alembic. revision: str = '4c70177eba78' diff --git a/alembic/versions/2025_04_15_1338-b363794fa4e9_add_submit_url_task_type_enum.py b/alembic/versions/2025_04_15_1338-b363794fa4e9_add_submit_url_task_type_enum.py index e1d5b725..f19dfd90 100644 --- a/alembic/versions/2025_04_15_1338-b363794fa4e9_add_submit_url_task_type_enum.py +++ b/alembic/versions/2025_04_15_1338-b363794fa4e9_add_submit_url_task_type_enum.py @@ -8,7 +8,7 @@ from typing import Sequence, Union -from util.alembic_helpers import switch_enum_type +from src.util.alembic_helpers import switch_enum_type # revision identifiers, used by Alembic. revision: str = 'b363794fa4e9' diff --git a/alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py b/alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py index 882c2c5f..bc60015b 100644 --- a/alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py +++ b/alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py @@ -7,10 +7,7 @@ """ from typing import Sequence, Union -from alembic import op -import sqlalchemy as sa - -from util.alembic_helpers import switch_enum_type, alter_enum_value +from src.util.alembic_helpers import alter_enum_value # revision identifiers, used by Alembic. revision: str = 'e285e6e7cf71' diff --git a/alembic/versions/2025_05_03_0956-028565b77b9e_add_manual_strategy_to_batch_strategy_.py b/alembic/versions/2025_05_03_0956-028565b77b9e_add_manual_strategy_to_batch_strategy_.py index 9ec86fee..cb7fd988 100644 --- a/alembic/versions/2025_05_03_0956-028565b77b9e_add_manual_strategy_to_batch_strategy_.py +++ b/alembic/versions/2025_05_03_0956-028565b77b9e_add_manual_strategy_to_batch_strategy_.py @@ -8,9 +8,8 @@ from typing import Sequence, Union from alembic import op -import sqlalchemy as sa -from util.alembic_helpers import switch_enum_type +from src.util.alembic_helpers import switch_enum_type # revision identifiers, used by Alembic. revision: str = '028565b77b9e' diff --git a/alembic/versions/2025_05_13_0704-864107b703ae_create_url_checked_for_duplicate_table.py b/alembic/versions/2025_05_13_0704-864107b703ae_create_url_checked_for_duplicate_table.py index e2e5947f..39ab8125 100644 --- a/alembic/versions/2025_05_13_0704-864107b703ae_create_url_checked_for_duplicate_table.py +++ b/alembic/versions/2025_05_13_0704-864107b703ae_create_url_checked_for_duplicate_table.py @@ -10,7 +10,7 @@ from alembic import op import sqlalchemy as sa -from util.alembic_helpers import switch_enum_type +from src.util.alembic_helpers import switch_enum_type # revision identifiers, used by Alembic. revision: str = '864107b703ae' diff --git a/alembic/versions/2025_05_13_1234-b5f079b6b8cb_create_url_probed_for_404_table_and_.py b/alembic/versions/2025_05_13_1234-b5f079b6b8cb_create_url_probed_for_404_table_and_.py index f8868b02..1fc0c8e6 100644 --- a/alembic/versions/2025_05_13_1234-b5f079b6b8cb_create_url_probed_for_404_table_and_.py +++ b/alembic/versions/2025_05_13_1234-b5f079b6b8cb_create_url_probed_for_404_table_and_.py @@ -10,7 +10,7 @@ from alembic import op import sqlalchemy as sa -from util.alembic_helpers import switch_enum_type +from src.util.alembic_helpers import switch_enum_type # revision identifiers, used by Alembic. revision: str = 'b5f079b6b8cb' diff --git a/alembic/versions/2025_05_16_1031-00cc949e0347_update_relevancy_logic.py b/alembic/versions/2025_05_16_1031-00cc949e0347_update_relevancy_logic.py index 5ba1240f..78307640 100644 --- a/alembic/versions/2025_05_16_1031-00cc949e0347_update_relevancy_logic.py +++ b/alembic/versions/2025_05_16_1031-00cc949e0347_update_relevancy_logic.py @@ -16,7 +16,7 @@ from alembic import op import sqlalchemy as sa -from util.alembic_helpers import switch_enum_type +from src.util.alembic_helpers import switch_enum_type # revision identifiers, used by Alembic. revision: str = '00cc949e0347' diff --git a/alembic/versions/9afd8a5633c9_create_htmlcontent_and_rooturl_tables.py b/alembic/versions/9afd8a5633c9_create_htmlcontent_and_rooturl_tables.py index bc26bf30..ed4bea29 100644 --- a/alembic/versions/9afd8a5633c9_create_htmlcontent_and_rooturl_tables.py +++ b/alembic/versions/9afd8a5633c9_create_htmlcontent_and_rooturl_tables.py @@ -10,7 +10,7 @@ import sqlalchemy as sa from alembic import op -from db.enums import PGEnum +from src.db.enums import PGEnum # revision identifiers, used by Alembic. revision: str = '9afd8a5633c9' diff --git a/alembic/versions/d7eb670edaf0_revise_agency_identification_logic.py b/alembic/versions/d7eb670edaf0_revise_agency_identification_logic.py index 16611eee..cd68a4b5 100644 --- a/alembic/versions/d7eb670edaf0_revise_agency_identification_logic.py +++ b/alembic/versions/d7eb670edaf0_revise_agency_identification_logic.py @@ -10,7 +10,7 @@ from alembic import op import sqlalchemy as sa -from db.enums import PGEnum +from src.db.enums import PGEnum # revision identifiers, used by Alembic. revision: str = 'd7eb670edaf0' diff --git a/api/dependencies.py b/api/dependencies.py deleted file mode 100644 index 0d14a00c..00000000 --- a/api/dependencies.py +++ /dev/null @@ -1,12 +0,0 @@ -from core.AsyncCore import AsyncCore -from core.SourceCollectorCore import SourceCollectorCore - - -def get_core() -> SourceCollectorCore: - from api.main import app - return app.state.core - - -def get_async_core() -> AsyncCore: - from api.main import app - return app.state.async_core \ No newline at end of file diff --git a/apply_migrations.py b/apply_migrations.py index 2dc207ce..ed3b2f44 100644 --- a/apply_migrations.py +++ b/apply_migrations.py @@ -1,7 +1,7 @@ from alembic import command from alembic.config import Config -from db.helper_functions import get_postgres_connection_string +from src.db.helper_functions import get_postgres_connection_string def apply_migrations(): print("Applying migrations...") diff --git a/local_database/DockerInfos.py b/local_database/DockerInfos.py index 17180bab..ad7228fb 100644 --- a/local_database/DockerInfos.py +++ b/local_database/DockerInfos.py @@ -1,5 +1,5 @@ from local_database.DTOs import DockerInfo, DockerfileInfo, HealthCheckInfo, VolumeInfo -from util.helper_functions import get_from_env, project_path +from src.util import get_from_env, project_path def get_database_docker_info() -> DockerInfo: diff --git a/source_collectors/muckrock/classes/fetch_requests/FOIALoopFetchRequest.py b/source_collectors/muckrock/classes/fetch_requests/FOIALoopFetchRequest.py deleted file mode 100644 index d498fdc2..00000000 --- a/source_collectors/muckrock/classes/fetch_requests/FOIALoopFetchRequest.py +++ /dev/null @@ -1,5 +0,0 @@ -from source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest - - -class FOIALoopFetchRequest(FetchRequest): - jurisdiction: int diff --git a/api/__init__.py b/src/__init__.py similarity index 100% rename from api/__init__.py rename to src/__init__.py diff --git a/api/README.md b/src/api/README.md similarity index 100% rename from api/README.md rename to src/api/README.md diff --git a/api/routes/__init__.py b/src/api/__init__.py similarity index 100% rename from api/routes/__init__.py rename to src/api/__init__.py diff --git a/src/api/dependencies.py b/src/api/dependencies.py new file mode 100644 index 00000000..3411340a --- /dev/null +++ b/src/api/dependencies.py @@ -0,0 +1,12 @@ +from src.core.AsyncCore import AsyncCore +from src.core.SourceCollectorCore import SourceCollectorCore + + +def get_core() -> SourceCollectorCore: + from src.api.main import app + return app.state.core + + +def get_async_core() -> AsyncCore: + from src.api.main import app + return app.state.async_core \ No newline at end of file diff --git a/api/main.py b/src/api/main.py similarity index 72% rename from api/main.py rename to src/api/main.py index fd82df85..227de24c 100644 --- a/api/main.py +++ b/src/api/main.py @@ -5,29 +5,29 @@ from fastapi import FastAPI from starlette.responses import RedirectResponse -from api.routes.annotate import annotate_router -from api.routes.batch import batch_router -from api.routes.collector import collector_router -from api.routes.metrics import metrics_router -from api.routes.review import review_router -from api.routes.root import root_router -from api.routes.search import search_router -from api.routes.task import task_router -from api.routes.url import url_router -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DatabaseClient import DatabaseClient -from collector_manager.AsyncCollectorManager import AsyncCollectorManager -from core.AsyncCore import AsyncCore -from core.AsyncCoreLogger import AsyncCoreLogger -from core.EnvVarManager import EnvVarManager -from core.ScheduledTaskManager import AsyncScheduledTaskManager -from core.SourceCollectorCore import SourceCollectorCore -from core.TaskManager import TaskManager -from html_tag_collector.ResponseParser import HTMLResponseParser -from html_tag_collector.RootURLCache import RootURLCache -from html_tag_collector.URLRequestInterface import URLRequestInterface +from src.api.routes.annotate import annotate_router +from src.api.routes.batch import batch_router +from src.api.routes.collector import collector_router +from src.api.routes.metrics import metrics_router +from src.api.routes.review import review_router +from src.api.routes.root import root_router +from src.api.routes.search import search_router +from src.api.routes.task import task_router +from src.api.routes.url import url_router +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DatabaseClient import DatabaseClient +from src.collector_manager.AsyncCollectorManager import AsyncCollectorManager +from src.core.AsyncCore import AsyncCore +from src.core.AsyncCoreLogger import AsyncCoreLogger +from src.core.EnvVarManager import EnvVarManager +from src.core.ScheduledTaskManager import AsyncScheduledTaskManager +from src.core.SourceCollectorCore import SourceCollectorCore +from src.core.TaskManager import TaskManager +from src.html_tag_collector.ResponseParser import HTMLResponseParser +from src.html_tag_collector.RootURLCache import RootURLCache +from src.html_tag_collector.URLRequestInterface import URLRequestInterface from pdap_access_manager import AccessManager -from pdap_api_client.PDAPClient import PDAPClient +from src.pdap_api_client.PDAPClient import PDAPClient from discord_poster import DiscordPoster diff --git a/collector_manager/DTOs/__init__.py b/src/api/routes/__init__.py similarity index 100% rename from collector_manager/DTOs/__init__.py rename to src/api/routes/__init__.py diff --git a/api/routes/annotate.py b/src/api/routes/annotate.py similarity index 86% rename from api/routes/annotate.py rename to src/api/routes/annotate.py index 7cb5fa65..ceb170bb 100644 --- a/api/routes/annotate.py +++ b/src/api/routes/annotate.py @@ -2,17 +2,17 @@ from fastapi import APIRouter, Depends, Path, Query -from api.dependencies import get_async_core -from core.AsyncCore import AsyncCore -from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo -from core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo -from core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo -from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \ +from src.api.dependencies import get_async_core +from src.core.AsyncCore import AsyncCore +from src.core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo +from src.core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo +from src.core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo +from src.core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \ URLAgencyAnnotationPostInfo -from core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse -from core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo -from core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo -from security_manager.SecurityManager import get_access_info, AccessInfo +from src.core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse +from src.core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo +from src.core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo +from src.security_manager.SecurityManager import get_access_info, AccessInfo annotate_router = APIRouter( prefix="/annotate", diff --git a/api/routes/batch.py b/src/api/routes/batch.py similarity index 82% rename from api/routes/batch.py rename to src/api/routes/batch.py index 4ca38c55..ee895c82 100644 --- a/api/routes/batch.py +++ b/src/api/routes/batch.py @@ -3,18 +3,17 @@ from fastapi import Path, APIRouter from fastapi.params import Query, Depends -from api.dependencies import get_core, get_async_core -from db.DTOs.BatchInfo import BatchInfo -from collector_manager.enums import CollectorType -from core.AsyncCore import AsyncCore -from core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse -from core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse -from core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse -from core.DTOs.GetURLsByBatchResponse import GetURLsByBatchResponse -from core.DTOs.MessageResponse import MessageResponse -from core.SourceCollectorCore import SourceCollectorCore -from core.enums import BatchStatus -from security_manager.SecurityManager import AccessInfo, get_access_info +from src.api.dependencies import get_async_core +from src.db.DTOs.BatchInfo import BatchInfo +from src.collector_manager.enums import CollectorType +from src.core.AsyncCore import AsyncCore +from src.core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse +from src.core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse +from src.core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse +from src.core.DTOs.GetURLsByBatchResponse import GetURLsByBatchResponse +from src.core.DTOs.MessageResponse import MessageResponse +from src.core.enums import BatchStatus +from src.security_manager.SecurityManager import AccessInfo, get_access_info batch_router = APIRouter( prefix="/batch", diff --git a/api/routes/collector.py b/src/api/routes/collector.py similarity index 83% rename from api/routes/collector.py rename to src/api/routes/collector.py index 16f5a900..2d60ec51 100644 --- a/api/routes/collector.py +++ b/src/api/routes/collector.py @@ -1,18 +1,18 @@ from fastapi import APIRouter from fastapi.params import Depends -from api.dependencies import get_async_core -from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO -from collector_manager.enums import CollectorType -from core.AsyncCore import AsyncCore -from core.DTOs.CollectorStartInfo import CollectorStartInfo -from core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO -from core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO -from security_manager.SecurityManager import AccessInfo, get_access_info -from source_collectors.auto_googler.DTOs import AutoGooglerInputDTO -from source_collectors.ckan.DTOs import CKANInputDTO -from source_collectors.common_crawler.DTOs import CommonCrawlerInputDTO -from source_collectors.muckrock.DTOs import MuckrockCountySearchCollectorInputDTO, \ +from src.api.dependencies import get_async_core +from src.collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO +from src.collector_manager.enums import CollectorType +from src.core.AsyncCore import AsyncCore +from src.core.DTOs.CollectorStartInfo import CollectorStartInfo +from src.core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO +from src.core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO +from src.security_manager.SecurityManager import AccessInfo, get_access_info +from src.source_collectors.auto_googler.DTOs import AutoGooglerInputDTO +from src.source_collectors.ckan.DTOs import CKANInputDTO +from src.source_collectors.common_crawler.DTOs import CommonCrawlerInputDTO +from src.source_collectors.muckrock.DTOs import MuckrockCountySearchCollectorInputDTO, \ MuckrockAllFOIARequestsCollectorInputDTO, MuckrockSimpleSearchCollectorInputDTO collector_router = APIRouter( diff --git a/api/routes/metrics.py b/src/api/routes/metrics.py similarity index 72% rename from api/routes/metrics.py rename to src/api/routes/metrics.py index d81aa2e6..b90334e8 100644 --- a/api/routes/metrics.py +++ b/src/api/routes/metrics.py @@ -1,15 +1,15 @@ from fastapi import APIRouter from fastapi.params import Query, Depends -from api.dependencies import get_async_core -from core.AsyncCore import AsyncCore -from core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO -from core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO -from core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO -from core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO -from core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO -from core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO -from security_manager.SecurityManager import AccessInfo, get_access_info +from src.api.dependencies import get_async_core +from src.core.AsyncCore import AsyncCore +from src.core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO +from src.core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO +from src.core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO +from src.core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO +from src.core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO +from src.core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO +from src.security_manager.SecurityManager import AccessInfo, get_access_info metrics_router = APIRouter( prefix="/metrics", diff --git a/api/routes/review.py b/src/api/routes/review.py similarity index 84% rename from api/routes/review.py rename to src/api/routes/review.py index ac937701..51946461 100644 --- a/api/routes/review.py +++ b/src/api/routes/review.py @@ -2,12 +2,11 @@ from fastapi import APIRouter, Depends, Query -from api.dependencies import get_async_core -from core.AsyncCore import AsyncCore -from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewBaseInfo, FinalReviewRejectionInfo -from core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewResponse, \ - GetNextURLForFinalReviewOuterResponse -from security_manager.SecurityManager import AccessInfo, get_access_info, require_permission, Permissions +from src.api.dependencies import get_async_core +from src.core.AsyncCore import AsyncCore +from src.core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewRejectionInfo +from src.core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewOuterResponse +from src.security_manager.SecurityManager import AccessInfo, require_permission, Permissions review_router = APIRouter( prefix="/review", diff --git a/api/routes/root.py b/src/api/routes/root.py similarity index 82% rename from api/routes/root.py rename to src/api/routes/root.py index 065e95fd..4298716e 100644 --- a/api/routes/root.py +++ b/src/api/routes/root.py @@ -1,6 +1,6 @@ from fastapi import APIRouter, Query, Depends -from security_manager.SecurityManager import AccessInfo, get_access_info +from src.security_manager.SecurityManager import AccessInfo, get_access_info root_router = APIRouter(prefix="", tags=["root"]) diff --git a/api/routes/search.py b/src/api/routes/search.py similarity index 66% rename from api/routes/search.py rename to src/api/routes/search.py index 4513bb2f..7955c0db 100644 --- a/api/routes/search.py +++ b/src/api/routes/search.py @@ -1,9 +1,9 @@ from fastapi import APIRouter, Query, Depends -from api.dependencies import get_async_core -from core.AsyncCore import AsyncCore -from core.DTOs.SearchURLResponse import SearchURLResponse -from security_manager.SecurityManager import get_access_info, AccessInfo +from src.api.dependencies import get_async_core +from src.core.AsyncCore import AsyncCore +from src.core.DTOs.SearchURLResponse import SearchURLResponse +from src.security_manager.SecurityManager import get_access_info, AccessInfo search_router = APIRouter(prefix="/search", tags=["search"]) diff --git a/api/routes/task.py b/src/api/routes/task.py similarity index 79% rename from api/routes/task.py rename to src/api/routes/task.py index a99598f7..2b0ac6d4 100644 --- a/api/routes/task.py +++ b/src/api/routes/task.py @@ -2,13 +2,13 @@ from fastapi import APIRouter, Depends, Query, Path -from api.dependencies import get_async_core -from db.DTOs.GetTaskStatusResponseInfo import GetTaskStatusResponseInfo -from db.DTOs.TaskInfo import TaskInfo -from db.enums import TaskType -from core.AsyncCore import AsyncCore -from core.enums import BatchStatus -from security_manager.SecurityManager import AccessInfo, get_access_info +from src.api.dependencies import get_async_core +from src.db.DTOs.GetTaskStatusResponseInfo import GetTaskStatusResponseInfo +from src.db.DTOs.TaskInfo import TaskInfo +from src.db.enums import TaskType +from src.core.AsyncCore import AsyncCore +from src.core.enums import BatchStatus +from src.security_manager.SecurityManager import AccessInfo, get_access_info task_router = APIRouter( prefix="/task", diff --git a/api/routes/url.py b/src/api/routes/url.py similarity index 73% rename from api/routes/url.py rename to src/api/routes/url.py index 9c3a1261..46b7950e 100644 --- a/api/routes/url.py +++ b/src/api/routes/url.py @@ -1,9 +1,9 @@ from fastapi import APIRouter, Query, Depends -from api.dependencies import get_async_core -from core.AsyncCore import AsyncCore -from core.DTOs.GetURLsResponseInfo import GetURLsResponseInfo -from security_manager.SecurityManager import AccessInfo, get_access_info +from src.api.dependencies import get_async_core +from src.core.AsyncCore import AsyncCore +from src.core.DTOs.GetURLsResponseInfo import GetURLsResponseInfo +from src.security_manager.SecurityManager import AccessInfo, get_access_info url_router = APIRouter( prefix="/url", diff --git a/collector_manager/AsyncCollectorBase.py b/src/collector_manager/AsyncCollectorBase.py similarity index 90% rename from collector_manager/AsyncCollectorBase.py rename to src/collector_manager/AsyncCollectorBase.py index 94361ed4..3f890c28 100644 --- a/collector_manager/AsyncCollectorBase.py +++ b/src/collector_manager/AsyncCollectorBase.py @@ -6,14 +6,14 @@ from pydantic import BaseModel -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.InsertURLsInfo import InsertURLsInfo -from db.DTOs.LogInfo import LogInfo -from collector_manager.enums import CollectorType -from core.AsyncCoreLogger import AsyncCoreLogger -from core.FunctionTrigger import FunctionTrigger -from core.enums import BatchStatus -from core.preprocessors.PreprocessorBase import PreprocessorBase +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.InsertURLsInfo import InsertURLsInfo +from src.db.DTOs.LogInfo import LogInfo +from src.collector_manager.enums import CollectorType +from src.core.AsyncCoreLogger import AsyncCoreLogger +from src.core.FunctionTrigger import FunctionTrigger +from src.core.enums import BatchStatus +from src.core.preprocessors.PreprocessorBase import PreprocessorBase class AsyncCollectorBase(ABC): diff --git a/collector_manager/AsyncCollectorManager.py b/src/collector_manager/AsyncCollectorManager.py similarity index 86% rename from collector_manager/AsyncCollectorManager.py rename to src/collector_manager/AsyncCollectorManager.py index bfb7beef..66819902 100644 --- a/collector_manager/AsyncCollectorManager.py +++ b/src/collector_manager/AsyncCollectorManager.py @@ -5,13 +5,13 @@ from fastapi import HTTPException from pydantic import BaseModel -from db.AsyncDatabaseClient import AsyncDatabaseClient -from collector_manager.AsyncCollectorBase import AsyncCollectorBase -from collector_manager.CollectorManager import InvalidCollectorError -from collector_manager.collector_mapping import COLLECTOR_MAPPING -from collector_manager.enums import CollectorType -from core.AsyncCoreLogger import AsyncCoreLogger -from core.FunctionTrigger import FunctionTrigger +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.collector_manager.AsyncCollectorBase import AsyncCollectorBase +from src.collector_manager.CollectorManager import InvalidCollectorError +from src.collector_manager.collector_mapping import COLLECTOR_MAPPING +from src.collector_manager.enums import CollectorType +from src.core.AsyncCoreLogger import AsyncCoreLogger +from src.core.FunctionTrigger import FunctionTrigger class AsyncCollectorManager: diff --git a/collector_manager/CollectorManager.py b/src/collector_manager/CollectorManager.py similarity index 100% rename from collector_manager/CollectorManager.py rename to src/collector_manager/CollectorManager.py diff --git a/collector_manager/DTOs/ExampleInputDTO.py b/src/collector_manager/DTOs/ExampleInputDTO.py similarity index 100% rename from collector_manager/DTOs/ExampleInputDTO.py rename to src/collector_manager/DTOs/ExampleInputDTO.py diff --git a/collector_manager/DTOs/ExampleOutputDTO.py b/src/collector_manager/DTOs/ExampleOutputDTO.py similarity index 100% rename from collector_manager/DTOs/ExampleOutputDTO.py rename to src/collector_manager/DTOs/ExampleOutputDTO.py diff --git a/collector_manager/__init__.py b/src/collector_manager/DTOs/__init__.py similarity index 100% rename from collector_manager/__init__.py rename to src/collector_manager/DTOs/__init__.py diff --git a/collector_manager/ExampleCollector.py b/src/collector_manager/ExampleCollector.py similarity index 70% rename from collector_manager/ExampleCollector.py rename to src/collector_manager/ExampleCollector.py index 7bc8a583..819bb7a3 100644 --- a/collector_manager/ExampleCollector.py +++ b/src/collector_manager/ExampleCollector.py @@ -5,11 +5,11 @@ """ import asyncio -from collector_manager.AsyncCollectorBase import AsyncCollectorBase -from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO -from collector_manager.DTOs.ExampleOutputDTO import ExampleOutputDTO -from collector_manager.enums import CollectorType -from core.preprocessors.ExamplePreprocessor import ExamplePreprocessor +from src.collector_manager.AsyncCollectorBase import AsyncCollectorBase +from src.collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO +from src.collector_manager.DTOs.ExampleOutputDTO import ExampleOutputDTO +from src.collector_manager.enums import CollectorType +from src.core.preprocessors.ExamplePreprocessor import ExamplePreprocessor class ExampleCollector(AsyncCollectorBase): diff --git a/collector_manager/README.md b/src/collector_manager/README.md similarity index 100% rename from collector_manager/README.md rename to src/collector_manager/README.md diff --git a/core/DTOs/__init__.py b/src/collector_manager/__init__.py similarity index 100% rename from core/DTOs/__init__.py rename to src/collector_manager/__init__.py diff --git a/collector_manager/collector_mapping.py b/src/collector_manager/collector_mapping.py similarity index 53% rename from collector_manager/collector_mapping.py rename to src/collector_manager/collector_mapping.py index 9ec49f4e..0aee33b2 100644 --- a/collector_manager/collector_mapping.py +++ b/src/collector_manager/collector_mapping.py @@ -1,9 +1,9 @@ -from collector_manager.ExampleCollector import ExampleCollector -from collector_manager.enums import CollectorType -from source_collectors.auto_googler.AutoGooglerCollector import AutoGooglerCollector -from source_collectors.ckan.CKANCollector import CKANCollector -from source_collectors.common_crawler.CommonCrawlerCollector import CommonCrawlerCollector -from source_collectors.muckrock.classes.MuckrockCollector import MuckrockSimpleSearchCollector, \ +from src.collector_manager.ExampleCollector import ExampleCollector +from src.collector_manager.enums import CollectorType +from src.source_collectors.auto_googler.AutoGooglerCollector import AutoGooglerCollector +from src.source_collectors.ckan import CKANCollector +from src.source_collectors.common_crawler import CommonCrawlerCollector +from src.source_collectors.muckrock.classes.MuckrockCollector import MuckrockSimpleSearchCollector, \ MuckrockCountyLevelSearchCollector, MuckrockAllFOIARequestsCollector COLLECTOR_MAPPING = { diff --git a/collector_manager/configs/sample_autogoogler_config.json b/src/collector_manager/configs/sample_autogoogler_config.json similarity index 100% rename from collector_manager/configs/sample_autogoogler_config.json rename to src/collector_manager/configs/sample_autogoogler_config.json diff --git a/collector_manager/enums.py b/src/collector_manager/enums.py similarity index 100% rename from collector_manager/enums.py rename to src/collector_manager/enums.py diff --git a/core/AsyncCore.py b/src/core/AsyncCore.py similarity index 79% rename from core/AsyncCore.py rename to src/core/AsyncCore.py index 1d61557e..180c652d 100644 --- a/core/AsyncCore.py +++ b/src/core/AsyncCore.py @@ -3,41 +3,41 @@ from pydantic import BaseModel from sqlalchemy.exc import IntegrityError -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.BatchInfo import BatchInfo -from db.DTOs.GetTaskStatusResponseInfo import GetTaskStatusResponseInfo -from db.enums import TaskType -from collector_manager.AsyncCollectorManager import AsyncCollectorManager -from collector_manager.enums import CollectorType -from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo -from core.DTOs.CollectorStartInfo import CollectorStartInfo -from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, RejectionReason -from core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse -from core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse -from core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse -from core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO -from core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO -from core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO -from core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO -from core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO -from core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO -from core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo -from core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo -from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \ +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.BatchInfo import BatchInfo +from src.db.DTOs.GetTaskStatusResponseInfo import GetTaskStatusResponseInfo +from src.db.enums import TaskType +from src.collector_manager.AsyncCollectorManager import AsyncCollectorManager +from src.collector_manager.enums import CollectorType +from src.core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo +from src.core.DTOs.CollectorStartInfo import CollectorStartInfo +from src.core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, RejectionReason +from src.core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse +from src.core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse +from src.core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse +from src.core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO +from src.core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO +from src.core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO +from src.core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO +from src.core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO +from src.core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO +from src.core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo +from src.core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo +from src.core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \ URLAgencyAnnotationPostInfo -from core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse -from core.DTOs.GetTasksResponse import GetTasksResponse -from core.DTOs.GetURLsByBatchResponse import GetURLsByBatchResponse -from core.DTOs.GetURLsResponseInfo import GetURLsResponseInfo -from core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO -from core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO -from core.DTOs.MessageResponse import MessageResponse -from core.DTOs.SearchURLResponse import SearchURLResponse -from core.TaskManager import TaskManager -from core.classes.ErrorManager import ErrorManager -from core.enums import BatchStatus, RecordType, AnnotationType, SuggestedStatus - -from security_manager.SecurityManager import AccessInfo +from src.core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse +from src.core.DTOs.GetTasksResponse import GetTasksResponse +from src.core.DTOs.GetURLsByBatchResponse import GetURLsByBatchResponse +from src.core.DTOs.GetURLsResponseInfo import GetURLsResponseInfo +from src.core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO +from src.core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO +from src.core.DTOs.MessageResponse import MessageResponse +from src.core.DTOs.SearchURLResponse import SearchURLResponse +from src.core.TaskManager import TaskManager +from src.core.classes.ErrorManager import ErrorManager +from src.core.enums import BatchStatus, RecordType, AnnotationType, SuggestedStatus + +from src.security_manager.SecurityManager import AccessInfo class AsyncCore: diff --git a/core/AsyncCoreLogger.py b/src/core/AsyncCoreLogger.py similarity index 95% rename from core/AsyncCoreLogger.py rename to src/core/AsyncCoreLogger.py index 67bb5dc9..e3cdc4b2 100644 --- a/core/AsyncCoreLogger.py +++ b/src/core/AsyncCoreLogger.py @@ -1,7 +1,7 @@ import asyncio -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.LogInfo import LogInfo +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.LogInfo import LogInfo class AsyncCoreLogger: diff --git a/core/DTOs/AllAnnotationPostInfo.py b/src/core/DTOs/AllAnnotationPostInfo.py similarity index 82% rename from core/DTOs/AllAnnotationPostInfo.py rename to src/core/DTOs/AllAnnotationPostInfo.py index 2a81be78..6287f074 100644 --- a/core/DTOs/AllAnnotationPostInfo.py +++ b/src/core/DTOs/AllAnnotationPostInfo.py @@ -1,12 +1,10 @@ -from http import HTTPStatus from typing import Optional -from fastapi import HTTPException from pydantic import BaseModel, model_validator -from core.DTOs.GetNextURLForAgencyAnnotationResponse import URLAgencyAnnotationPostInfo -from core.enums import RecordType, SuggestedStatus -from core.exceptions import FailedValidationException +from src.core.DTOs.GetNextURLForAgencyAnnotationResponse import URLAgencyAnnotationPostInfo +from src.core.enums import RecordType, SuggestedStatus +from src.core.exceptions import FailedValidationException class AllAnnotationPostInfo(BaseModel): diff --git a/core/DTOs/AnnotationRequestInfo.py b/src/core/DTOs/AnnotationRequestInfo.py similarity index 72% rename from core/DTOs/AnnotationRequestInfo.py rename to src/core/DTOs/AnnotationRequestInfo.py index 1e886ae8..0b63ed71 100644 --- a/core/DTOs/AnnotationRequestInfo.py +++ b/src/core/DTOs/AnnotationRequestInfo.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from html_tag_collector.DataClassTags import ResponseHTMLInfo +from src.html_tag_collector import ResponseHTMLInfo class AnnotationRequestInfo(BaseModel): diff --git a/core/DTOs/BatchStatusInfo.py b/src/core/DTOs/BatchStatusInfo.py similarity index 67% rename from core/DTOs/BatchStatusInfo.py rename to src/core/DTOs/BatchStatusInfo.py index f0362a71..ad54686e 100644 --- a/core/DTOs/BatchStatusInfo.py +++ b/src/core/DTOs/BatchStatusInfo.py @@ -2,8 +2,8 @@ from pydantic import BaseModel -from collector_manager.enums import CollectorType -from core.enums import BatchStatus +from src.collector_manager.enums import CollectorType +from src.core.enums import BatchStatus class BatchStatusInfo(BaseModel): diff --git a/core/DTOs/CollectionLifecycleInfo.py b/src/core/DTOs/CollectionLifecycleInfo.py similarity index 65% rename from core/DTOs/CollectionLifecycleInfo.py rename to src/core/DTOs/CollectionLifecycleInfo.py index 925dee93..b1d2673f 100644 --- a/core/DTOs/CollectionLifecycleInfo.py +++ b/src/core/DTOs/CollectionLifecycleInfo.py @@ -1,7 +1,7 @@ from pydantic import BaseModel -from db.DTOs.DuplicateInfo import DuplicateInfo -from db.DTOs.URLMapping import URLMapping +from src.db.DTOs.DuplicateInfo import DuplicateInfo +from src.db.DTOs.URLMapping import URLMapping class CollectionLifecycleInfo(BaseModel): diff --git a/core/DTOs/CollectorStartInfo.py b/src/core/DTOs/CollectorStartInfo.py similarity index 100% rename from core/DTOs/CollectorStartInfo.py rename to src/core/DTOs/CollectorStartInfo.py diff --git a/core/DTOs/CollectorStartParams.py b/src/core/DTOs/CollectorStartParams.py similarity index 74% rename from core/DTOs/CollectorStartParams.py rename to src/core/DTOs/CollectorStartParams.py index 5038afc6..6c7d4a61 100644 --- a/core/DTOs/CollectorStartParams.py +++ b/src/core/DTOs/CollectorStartParams.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from collector_manager.enums import CollectorType +from src.collector_manager import CollectorType class CollectorStartParams(BaseModel): diff --git a/core/DTOs/FinalReviewApprovalInfo.py b/src/core/DTOs/FinalReviewApprovalInfo.py similarity index 98% rename from core/DTOs/FinalReviewApprovalInfo.py rename to src/core/DTOs/FinalReviewApprovalInfo.py index 5e4a19d6..f65c7e91 100644 --- a/core/DTOs/FinalReviewApprovalInfo.py +++ b/src/core/DTOs/FinalReviewApprovalInfo.py @@ -3,7 +3,7 @@ from pydantic import BaseModel, Field -from core.enums import RecordType +from src.core.enums import RecordType class FinalReviewBaseInfo(BaseModel): url_id: int = Field( diff --git a/core/DTOs/GetBatchLogsResponse.py b/src/core/DTOs/GetBatchLogsResponse.py similarity index 68% rename from core/DTOs/GetBatchLogsResponse.py rename to src/core/DTOs/GetBatchLogsResponse.py index adcc3be9..05db2370 100644 --- a/core/DTOs/GetBatchLogsResponse.py +++ b/src/core/DTOs/GetBatchLogsResponse.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from db.DTOs.LogInfo import LogOutputInfo +from src.db.DTOs.LogInfo import LogOutputInfo class GetBatchLogsResponse(BaseModel): diff --git a/core/DTOs/GetBatchStatusResponse.py b/src/core/DTOs/GetBatchStatusResponse.py similarity index 70% rename from core/DTOs/GetBatchStatusResponse.py rename to src/core/DTOs/GetBatchStatusResponse.py index d1a02dc7..8ee0da43 100644 --- a/core/DTOs/GetBatchStatusResponse.py +++ b/src/core/DTOs/GetBatchStatusResponse.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from db.DTOs.BatchInfo import BatchInfo +from src.db.DTOs.BatchInfo import BatchInfo class GetBatchStatusResponse(BaseModel): diff --git a/core/DTOs/GetDuplicatesByBatchResponse.py b/src/core/DTOs/GetDuplicatesByBatchResponse.py similarity index 73% rename from core/DTOs/GetDuplicatesByBatchResponse.py rename to src/core/DTOs/GetDuplicatesByBatchResponse.py index 68a6dd4b..e9c3a864 100644 --- a/core/DTOs/GetDuplicatesByBatchResponse.py +++ b/src/core/DTOs/GetDuplicatesByBatchResponse.py @@ -2,7 +2,7 @@ from pydantic import BaseModel -from db.DTOs.DuplicateInfo import DuplicateInfo +from src.db.DTOs.DuplicateInfo import DuplicateInfo class GetDuplicatesByBatchResponse(BaseModel): diff --git a/core/DTOs/GetMetricsBacklogResponse.py b/src/core/DTOs/GetMetricsBacklogResponse.py similarity index 100% rename from core/DTOs/GetMetricsBacklogResponse.py rename to src/core/DTOs/GetMetricsBacklogResponse.py diff --git a/core/DTOs/GetMetricsBatchesAggregatedResponseDTO.py b/src/core/DTOs/GetMetricsBatchesAggregatedResponseDTO.py similarity index 90% rename from core/DTOs/GetMetricsBatchesAggregatedResponseDTO.py rename to src/core/DTOs/GetMetricsBatchesAggregatedResponseDTO.py index 37535f2d..fad69be5 100644 --- a/core/DTOs/GetMetricsBatchesAggregatedResponseDTO.py +++ b/src/core/DTOs/GetMetricsBatchesAggregatedResponseDTO.py @@ -2,7 +2,7 @@ from pydantic import BaseModel -from collector_manager.enums import CollectorType +from src.collector_manager.enums import CollectorType class GetMetricsBatchesAggregatedInnerResponseDTO(BaseModel): diff --git a/core/DTOs/GetMetricsBatchesBreakdownResponseDTO.py b/src/core/DTOs/GetMetricsBatchesBreakdownResponseDTO.py similarity index 84% rename from core/DTOs/GetMetricsBatchesBreakdownResponseDTO.py rename to src/core/DTOs/GetMetricsBatchesBreakdownResponseDTO.py index 6572f49f..d5bdd0f6 100644 --- a/core/DTOs/GetMetricsBatchesBreakdownResponseDTO.py +++ b/src/core/DTOs/GetMetricsBatchesBreakdownResponseDTO.py @@ -2,8 +2,8 @@ from pydantic import BaseModel -from collector_manager.enums import CollectorType -from core.enums import BatchStatus +from src.collector_manager.enums import CollectorType +from src.core.enums import BatchStatus class GetMetricsBatchesBreakdownInnerResponseDTO(BaseModel): diff --git a/core/DTOs/GetMetricsURLsAggregatedResponseDTO.py b/src/core/DTOs/GetMetricsURLsAggregatedResponseDTO.py similarity index 100% rename from core/DTOs/GetMetricsURLsAggregatedResponseDTO.py rename to src/core/DTOs/GetMetricsURLsAggregatedResponseDTO.py diff --git a/core/DTOs/GetMetricsURLsBreakdownPendingResponseDTO.py b/src/core/DTOs/GetMetricsURLsBreakdownPendingResponseDTO.py similarity index 100% rename from core/DTOs/GetMetricsURLsBreakdownPendingResponseDTO.py rename to src/core/DTOs/GetMetricsURLsBreakdownPendingResponseDTO.py diff --git a/core/DTOs/GetMetricsURLsBreakdownSubmittedResponseDTO.py b/src/core/DTOs/GetMetricsURLsBreakdownSubmittedResponseDTO.py similarity index 100% rename from core/DTOs/GetMetricsURLsBreakdownSubmittedResponseDTO.py rename to src/core/DTOs/GetMetricsURLsBreakdownSubmittedResponseDTO.py diff --git a/core/DTOs/GetNextRecordTypeAnnotationResponseInfo.py b/src/core/DTOs/GetNextRecordTypeAnnotationResponseInfo.py similarity index 79% rename from core/DTOs/GetNextRecordTypeAnnotationResponseInfo.py rename to src/core/DTOs/GetNextRecordTypeAnnotationResponseInfo.py index e1784409..af8fbae7 100644 --- a/core/DTOs/GetNextRecordTypeAnnotationResponseInfo.py +++ b/src/core/DTOs/GetNextRecordTypeAnnotationResponseInfo.py @@ -2,9 +2,9 @@ from pydantic import Field, BaseModel -from db.DTOs.URLMapping import URLMapping -from core.enums import RecordType -from html_tag_collector.DataClassTags import ResponseHTMLInfo +from src.db.DTOs.URLMapping import URLMapping +from src.core.enums import RecordType +from src.html_tag_collector.DataClassTags import ResponseHTMLInfo class GetNextRecordTypeAnnotationResponseInfo(BaseModel): diff --git a/core/DTOs/GetNextRelevanceAnnotationResponseInfo.py b/src/core/DTOs/GetNextRelevanceAnnotationResponseInfo.py similarity index 78% rename from core/DTOs/GetNextRelevanceAnnotationResponseInfo.py rename to src/core/DTOs/GetNextRelevanceAnnotationResponseInfo.py index af586395..5a76c692 100644 --- a/core/DTOs/GetNextRelevanceAnnotationResponseInfo.py +++ b/src/core/DTOs/GetNextRelevanceAnnotationResponseInfo.py @@ -2,9 +2,8 @@ from pydantic import BaseModel, Field -from db.DTOs.URLMapping import URLMapping -from core.DTOs.ResponseURLInfo import ResponseURLInfo -from html_tag_collector.DataClassTags import ResponseHTMLInfo +from src.db.DTOs.URLMapping import URLMapping +from src.html_tag_collector.DataClassTags import ResponseHTMLInfo class GetNextRelevanceAnnotationResponseInfo(BaseModel): diff --git a/core/DTOs/GetNextURLForAgencyAnnotationResponse.py b/src/core/DTOs/GetNextURLForAgencyAnnotationResponse.py similarity index 84% rename from core/DTOs/GetNextURLForAgencyAnnotationResponse.py rename to src/core/DTOs/GetNextURLForAgencyAnnotationResponse.py index 8b3d06f4..40bac8c4 100644 --- a/core/DTOs/GetNextURLForAgencyAnnotationResponse.py +++ b/src/core/DTOs/GetNextURLForAgencyAnnotationResponse.py @@ -1,9 +1,9 @@ -from typing import Optional, Literal +from typing import Optional from pydantic import BaseModel -from core.enums import SuggestionType -from html_tag_collector.DataClassTags import ResponseHTMLInfo +from src.core.enums import SuggestionType +from src.html_tag_collector.DataClassTags import ResponseHTMLInfo class GetNextURLForAgencyAgencyInfo(BaseModel): suggestion_type: SuggestionType diff --git a/core/DTOs/GetNextURLForAllAnnotationResponse.py b/src/core/DTOs/GetNextURLForAllAnnotationResponse.py similarity index 76% rename from core/DTOs/GetNextURLForAllAnnotationResponse.py rename to src/core/DTOs/GetNextURLForAllAnnotationResponse.py index f4fa4bb8..495342ec 100644 --- a/core/DTOs/GetNextURLForAllAnnotationResponse.py +++ b/src/core/DTOs/GetNextURLForAllAnnotationResponse.py @@ -2,9 +2,9 @@ from pydantic import Field, BaseModel -from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAgencyInfo -from core.enums import RecordType -from html_tag_collector.DataClassTags import ResponseHTMLInfo +from src.core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAgencyInfo +from src.core.enums import RecordType +from src.html_tag_collector.DataClassTags import ResponseHTMLInfo class GetNextURLForAllAnnotationInnerResponse(BaseModel): diff --git a/core/DTOs/GetNextURLForFinalReviewResponse.py b/src/core/DTOs/GetNextURLForFinalReviewResponse.py similarity index 93% rename from core/DTOs/GetNextURLForFinalReviewResponse.py rename to src/core/DTOs/GetNextURLForFinalReviewResponse.py index f7e84d1f..81addf54 100644 --- a/core/DTOs/GetNextURLForFinalReviewResponse.py +++ b/src/core/DTOs/GetNextURLForFinalReviewResponse.py @@ -2,9 +2,9 @@ from pydantic import BaseModel, Field -from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAgencyInfo -from core.enums import RecordType, SuggestedStatus -from html_tag_collector.DataClassTags import ResponseHTMLInfo +from src.core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAgencyInfo +from src.core.enums import RecordType, SuggestedStatus +from src.html_tag_collector.DataClassTags import ResponseHTMLInfo class FinalReviewAnnotationRelevantInfo(BaseModel): auto: Optional[bool] = Field(title="Whether the auto-labeler has marked the URL as relevant") diff --git a/core/DTOs/GetTasksResponse.py b/src/core/DTOs/GetTasksResponse.py similarity index 80% rename from core/DTOs/GetTasksResponse.py rename to src/core/DTOs/GetTasksResponse.py index 670ce8d3..da3c0334 100644 --- a/core/DTOs/GetTasksResponse.py +++ b/src/core/DTOs/GetTasksResponse.py @@ -2,8 +2,8 @@ from pydantic import BaseModel -from db.enums import TaskType -from core.enums import BatchStatus +from src.db.enums import TaskType +from src.core.enums import BatchStatus class GetTasksResponseTaskInfo(BaseModel): diff --git a/core/DTOs/GetURLsByBatchResponse.py b/src/core/DTOs/GetURLsByBatchResponse.py similarity index 71% rename from core/DTOs/GetURLsByBatchResponse.py rename to src/core/DTOs/GetURLsByBatchResponse.py index c737d720..ddffa1e9 100644 --- a/core/DTOs/GetURLsByBatchResponse.py +++ b/src/core/DTOs/GetURLsByBatchResponse.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from db.DTOs.URLInfo import URLInfo +from src.db.DTOs.URLInfo import URLInfo class GetURLsByBatchResponse(BaseModel): diff --git a/core/DTOs/GetURLsResponseInfo.py b/src/core/DTOs/GetURLsResponseInfo.py similarity index 85% rename from core/DTOs/GetURLsResponseInfo.py rename to src/core/DTOs/GetURLsResponseInfo.py index a924d5aa..a4f91f4f 100644 --- a/core/DTOs/GetURLsResponseInfo.py +++ b/src/core/DTOs/GetURLsResponseInfo.py @@ -3,8 +3,8 @@ from pydantic import BaseModel -from db.enums import URLMetadataAttributeType, ValidationStatus, ValidationSource -from collector_manager.enums import URLStatus +from src.collector_manager.enums import URLStatus +from src.db.enums import URLMetadataAttributeType, ValidationStatus, ValidationSource class GetURLsResponseErrorInfo(BaseModel): id: int diff --git a/core/DTOs/ManualBatchInputDTO.py b/src/core/DTOs/ManualBatchInputDTO.py similarity index 93% rename from core/DTOs/ManualBatchInputDTO.py rename to src/core/DTOs/ManualBatchInputDTO.py index 9bb98755..f7de1ecf 100644 --- a/core/DTOs/ManualBatchInputDTO.py +++ b/src/core/DTOs/ManualBatchInputDTO.py @@ -2,7 +2,7 @@ from pydantic import BaseModel, Field -from core.enums import RecordType +from src.core.enums import RecordType class ManualBatchInnerInputDTO(BaseModel): diff --git a/core/DTOs/ManualBatchResponseDTO.py b/src/core/DTOs/ManualBatchResponseDTO.py similarity index 100% rename from core/DTOs/ManualBatchResponseDTO.py rename to src/core/DTOs/ManualBatchResponseDTO.py diff --git a/core/DTOs/MessageCountResponse.py b/src/core/DTOs/MessageCountResponse.py similarity index 69% rename from core/DTOs/MessageCountResponse.py rename to src/core/DTOs/MessageCountResponse.py index acf5faf0..54da2cdf 100644 --- a/core/DTOs/MessageCountResponse.py +++ b/src/core/DTOs/MessageCountResponse.py @@ -1,6 +1,6 @@ from pydantic import Field -from core.DTOs.MessageResponse import MessageResponse +from src.core.DTOs.MessageResponse import MessageResponse class MessageCountResponse(MessageResponse): diff --git a/core/DTOs/MessageResponse.py b/src/core/DTOs/MessageResponse.py similarity index 100% rename from core/DTOs/MessageResponse.py rename to src/core/DTOs/MessageResponse.py diff --git a/core/DTOs/README.md b/src/core/DTOs/README.md similarity index 100% rename from core/DTOs/README.md rename to src/core/DTOs/README.md diff --git a/core/DTOs/RecordTypeAnnotationPostInfo.py b/src/core/DTOs/RecordTypeAnnotationPostInfo.py similarity index 73% rename from core/DTOs/RecordTypeAnnotationPostInfo.py rename to src/core/DTOs/RecordTypeAnnotationPostInfo.py index 87e8b674..a3c7a653 100644 --- a/core/DTOs/RecordTypeAnnotationPostInfo.py +++ b/src/core/DTOs/RecordTypeAnnotationPostInfo.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from core.enums import RecordType +from src.core.enums import RecordType class RecordTypeAnnotationPostInfo(BaseModel): diff --git a/core/DTOs/RelevanceAnnotationPostInfo.py b/src/core/DTOs/RelevanceAnnotationPostInfo.py similarity index 73% rename from core/DTOs/RelevanceAnnotationPostInfo.py rename to src/core/DTOs/RelevanceAnnotationPostInfo.py index 29d0e764..a29a5327 100644 --- a/core/DTOs/RelevanceAnnotationPostInfo.py +++ b/src/core/DTOs/RelevanceAnnotationPostInfo.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from core.enums import SuggestedStatus +from src.core.enums import SuggestedStatus class RelevanceAnnotationPostInfo(BaseModel): diff --git a/core/DTOs/ResponseURLInfo.py b/src/core/DTOs/ResponseURLInfo.py similarity index 100% rename from core/DTOs/ResponseURLInfo.py rename to src/core/DTOs/ResponseURLInfo.py diff --git a/core/DTOs/SearchURLResponse.py b/src/core/DTOs/SearchURLResponse.py similarity index 100% rename from core/DTOs/SearchURLResponse.py rename to src/core/DTOs/SearchURLResponse.py diff --git a/core/DTOs/TaskOperatorRunInfo.py b/src/core/DTOs/TaskOperatorRunInfo.py similarity index 100% rename from core/DTOs/TaskOperatorRunInfo.py rename to src/core/DTOs/TaskOperatorRunInfo.py diff --git a/core/DTOs/URLAgencySuggestionInfo.py b/src/core/DTOs/URLAgencySuggestionInfo.py similarity index 89% rename from core/DTOs/URLAgencySuggestionInfo.py rename to src/core/DTOs/URLAgencySuggestionInfo.py index 2eae0496..c0ea08f4 100644 --- a/core/DTOs/URLAgencySuggestionInfo.py +++ b/src/core/DTOs/URLAgencySuggestionInfo.py @@ -2,7 +2,7 @@ from pydantic import BaseModel -from core.enums import SuggestionType +from src.core.enums import SuggestionType class URLAgencySuggestionInfo(BaseModel): diff --git a/core/DTOs/task_data_objects/__init__.py b/src/core/DTOs/__init__.py similarity index 100% rename from core/DTOs/task_data_objects/__init__.py rename to src/core/DTOs/__init__.py diff --git a/core/DTOs/task_data_objects/AgencyIdentificationTDO.py b/src/core/DTOs/task_data_objects/AgencyIdentificationTDO.py similarity index 78% rename from core/DTOs/task_data_objects/AgencyIdentificationTDO.py rename to src/core/DTOs/task_data_objects/AgencyIdentificationTDO.py index 10c3ce99..cc62430f 100644 --- a/core/DTOs/task_data_objects/AgencyIdentificationTDO.py +++ b/src/core/DTOs/task_data_objects/AgencyIdentificationTDO.py @@ -2,7 +2,7 @@ from pydantic import BaseModel -from collector_manager.enums import CollectorType +from src.collector_manager.enums import CollectorType class AgencyIdentificationTDO(BaseModel): diff --git a/core/DTOs/task_data_objects/README.md b/src/core/DTOs/task_data_objects/README.md similarity index 100% rename from core/DTOs/task_data_objects/README.md rename to src/core/DTOs/task_data_objects/README.md diff --git a/core/DTOs/task_data_objects/SubmitApprovedURLTDO.py b/src/core/DTOs/task_data_objects/SubmitApprovedURLTDO.py similarity index 94% rename from core/DTOs/task_data_objects/SubmitApprovedURLTDO.py rename to src/core/DTOs/task_data_objects/SubmitApprovedURLTDO.py index be26d3a8..d5193640 100644 --- a/core/DTOs/task_data_objects/SubmitApprovedURLTDO.py +++ b/src/core/DTOs/task_data_objects/SubmitApprovedURLTDO.py @@ -2,7 +2,7 @@ from pydantic import BaseModel -from core.enums import RecordType +from src.core.enums import RecordType from datetime import datetime class SubmitApprovedURLTDO(BaseModel): diff --git a/core/DTOs/task_data_objects/URL404ProbeTDO.py b/src/core/DTOs/task_data_objects/URL404ProbeTDO.py similarity index 100% rename from core/DTOs/task_data_objects/URL404ProbeTDO.py rename to src/core/DTOs/task_data_objects/URL404ProbeTDO.py diff --git a/core/DTOs/task_data_objects/URLDuplicateTDO.py b/src/core/DTOs/task_data_objects/URLDuplicateTDO.py similarity index 100% rename from core/DTOs/task_data_objects/URLDuplicateTDO.py rename to src/core/DTOs/task_data_objects/URLDuplicateTDO.py diff --git a/core/DTOs/task_data_objects/URLMiscellaneousMetadataTDO.py b/src/core/DTOs/task_data_objects/URLMiscellaneousMetadataTDO.py similarity index 91% rename from core/DTOs/task_data_objects/URLMiscellaneousMetadataTDO.py rename to src/core/DTOs/task_data_objects/URLMiscellaneousMetadataTDO.py index ff173a8e..1daa40b1 100644 --- a/core/DTOs/task_data_objects/URLMiscellaneousMetadataTDO.py +++ b/src/core/DTOs/task_data_objects/URLMiscellaneousMetadataTDO.py @@ -2,7 +2,8 @@ from pydantic import BaseModel -from collector_manager.enums import CollectorType +from src.collector_manager.enums import CollectorType + class URLHTMLMetadataInfo(BaseModel): title: Optional[str] = None diff --git a/core/DTOs/task_data_objects/URLRecordTypeTDO.py b/src/core/DTOs/task_data_objects/URLRecordTypeTDO.py similarity index 75% rename from core/DTOs/task_data_objects/URLRecordTypeTDO.py rename to src/core/DTOs/task_data_objects/URLRecordTypeTDO.py index 03215f71..ae0bdfb8 100644 --- a/core/DTOs/task_data_objects/URLRecordTypeTDO.py +++ b/src/core/DTOs/task_data_objects/URLRecordTypeTDO.py @@ -2,8 +2,8 @@ from pydantic import BaseModel -from db.DTOs.URLWithHTML import URLWithHTML -from core.enums import RecordType +from src.db.DTOs.URLWithHTML import URLWithHTML +from src.core.enums import RecordType class URLRecordTypeTDO(BaseModel): diff --git a/core/DTOs/task_data_objects/UrlHtmlTDO.py b/src/core/DTOs/task_data_objects/UrlHtmlTDO.py similarity index 55% rename from core/DTOs/task_data_objects/UrlHtmlTDO.py rename to src/core/DTOs/task_data_objects/UrlHtmlTDO.py index 96c44778..7c222b2a 100644 --- a/core/DTOs/task_data_objects/UrlHtmlTDO.py +++ b/src/core/DTOs/task_data_objects/UrlHtmlTDO.py @@ -2,9 +2,9 @@ from pydantic import BaseModel -from db.DTOs.URLInfo import URLInfo -from html_tag_collector.DataClassTags import ResponseHTMLInfo -from html_tag_collector.URLRequestInterface import URLResponseInfo +from src.db.DTOs.URLInfo import URLInfo +from src.html_tag_collector.DataClassTags import ResponseHTMLInfo +from src.html_tag_collector.URLRequestInterface import URLResponseInfo class UrlHtmlTDO(BaseModel): diff --git a/core/__init__.py b/src/core/DTOs/task_data_objects/__init__.py similarity index 100% rename from core/__init__.py rename to src/core/DTOs/task_data_objects/__init__.py diff --git a/core/EnvVarManager.py b/src/core/EnvVarManager.py similarity index 100% rename from core/EnvVarManager.py rename to src/core/EnvVarManager.py diff --git a/core/FunctionTrigger.py b/src/core/FunctionTrigger.py similarity index 100% rename from core/FunctionTrigger.py rename to src/core/FunctionTrigger.py diff --git a/core/README.md b/src/core/README.md similarity index 100% rename from core/README.md rename to src/core/README.md diff --git a/core/ScheduledTaskManager.py b/src/core/ScheduledTaskManager.py similarity index 97% rename from core/ScheduledTaskManager.py rename to src/core/ScheduledTaskManager.py index e0b87247..22502e2d 100644 --- a/core/ScheduledTaskManager.py +++ b/src/core/ScheduledTaskManager.py @@ -2,7 +2,7 @@ from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.triggers.interval import IntervalTrigger -from core.AsyncCore import AsyncCore +from src.core.AsyncCore import AsyncCore class AsyncScheduledTaskManager: diff --git a/core/SourceCollectorCore.py b/src/core/SourceCollectorCore.py similarity index 74% rename from core/SourceCollectorCore.py rename to src/core/SourceCollectorCore.py index 6b9822ae..b31d8037 100644 --- a/core/SourceCollectorCore.py +++ b/src/core/SourceCollectorCore.py @@ -1,8 +1,7 @@ -from typing import Optional, Any +from typing import Optional - -from db.DatabaseClient import DatabaseClient -from core.enums import BatchStatus +from src.db.DatabaseClient import DatabaseClient +from src.core.enums import BatchStatus class SourceCollectorCore: diff --git a/core/TaskManager.py b/src/core/TaskManager.py similarity index 80% rename from core/TaskManager.py rename to src/core/TaskManager.py index 4424ec1c..17008d44 100644 --- a/core/TaskManager.py +++ b/src/core/TaskManager.py @@ -1,25 +1,25 @@ import logging -from core.classes.task_operators.URL404ProbeTaskOperator import URL404ProbeTaskOperator -from core.classes.task_operators.URLDuplicateTaskOperator import URLDuplicateTaskOperator -from source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.TaskInfo import TaskInfo -from db.enums import TaskType -from core.DTOs.GetTasksResponse import GetTasksResponse -from core.DTOs.TaskOperatorRunInfo import TaskOperatorRunInfo, TaskOperatorOutcome -from core.FunctionTrigger import FunctionTrigger -from core.classes.task_operators.AgencyIdentificationTaskOperator import AgencyIdentificationTaskOperator -from core.classes.task_operators.SubmitApprovedURLTaskOperator import SubmitApprovedURLTaskOperator -from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase -from core.classes.task_operators.URLHTMLTaskOperator import URLHTMLTaskOperator -from core.classes.task_operators.URLMiscellaneousMetadataTaskOperator import URLMiscellaneousMetadataTaskOperator -from core.classes.task_operators.URLRecordTypeTaskOperator import URLRecordTypeTaskOperator -from core.enums import BatchStatus -from html_tag_collector.ResponseParser import HTMLResponseParser -from html_tag_collector.URLRequestInterface import URLRequestInterface -from llm_api_logic.OpenAIRecordClassifier import OpenAIRecordClassifier -from pdap_api_client.PDAPClient import PDAPClient +from src.core.classes.task_operators.URL404ProbeTaskOperator import URL404ProbeTaskOperator +from src.core.classes.task_operators.URLDuplicateTaskOperator import URLDuplicateTaskOperator +from src.source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.TaskInfo import TaskInfo +from src.db.enums import TaskType +from src.core.DTOs.GetTasksResponse import GetTasksResponse +from src.core.DTOs.TaskOperatorRunInfo import TaskOperatorRunInfo, TaskOperatorOutcome +from src.core.FunctionTrigger import FunctionTrigger +from src.core.classes.task_operators.AgencyIdentificationTaskOperator import AgencyIdentificationTaskOperator +from src.core.classes.task_operators.SubmitApprovedURLTaskOperator import SubmitApprovedURLTaskOperator +from src.core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from src.core.classes.task_operators.URLHTMLTaskOperator import URLHTMLTaskOperator +from src.core.classes.task_operators.URLMiscellaneousMetadataTaskOperator import URLMiscellaneousMetadataTaskOperator +from src.core.classes.task_operators.URLRecordTypeTaskOperator import URLRecordTypeTaskOperator +from src.core.enums import BatchStatus +from src.html_tag_collector.ResponseParser import HTMLResponseParser +from src.html_tag_collector.URLRequestInterface import URLRequestInterface +from src.llm_api_logic.OpenAIRecordClassifier import OpenAIRecordClassifier +from src.pdap_api_client.PDAPClient import PDAPClient from discord_poster import DiscordPoster TASK_REPEAT_THRESHOLD = 20 diff --git a/core/classes/__init__.py b/src/core/__init__.py similarity index 100% rename from core/classes/__init__.py rename to src/core/__init__.py diff --git a/core/classes/ErrorManager.py b/src/core/classes/ErrorManager.py similarity index 96% rename from core/classes/ErrorManager.py rename to src/core/classes/ErrorManager.py index ba763054..5a779a80 100644 --- a/core/classes/ErrorManager.py +++ b/src/core/classes/ErrorManager.py @@ -4,7 +4,7 @@ from fastapi import HTTPException from pydantic import BaseModel -from core.enums import AnnotationType +from src.core.enums import AnnotationType class ErrorTypes(Enum): diff --git a/core/classes/HTMLContentInfoGetter.py b/src/core/classes/HTMLContentInfoGetter.py similarity index 85% rename from core/classes/HTMLContentInfoGetter.py rename to src/core/classes/HTMLContentInfoGetter.py index b9e0b7e1..8e16fad1 100644 --- a/core/classes/HTMLContentInfoGetter.py +++ b/src/core/classes/HTMLContentInfoGetter.py @@ -1,5 +1,5 @@ -from db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo, HTMLContentType -from html_tag_collector.DataClassTags import ResponseHTMLInfo +from src.db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo, HTMLContentType +from src.html_tag_collector.DataClassTags import ResponseHTMLInfo class HTMLContentInfoGetter: diff --git a/core/classes/subtasks/MiscellaneousMetadata/__init__.py b/src/core/classes/__init__.py similarity index 100% rename from core/classes/subtasks/MiscellaneousMetadata/__init__.py rename to src/core/classes/__init__.py diff --git a/core/classes/subtasks/AgencyIdentificationSubtaskBase.py b/src/core/classes/subtasks/AgencyIdentificationSubtaskBase.py similarity index 81% rename from core/classes/subtasks/AgencyIdentificationSubtaskBase.py rename to src/core/classes/subtasks/AgencyIdentificationSubtaskBase.py index 755cade5..9e7dd865 100644 --- a/core/classes/subtasks/AgencyIdentificationSubtaskBase.py +++ b/src/core/classes/subtasks/AgencyIdentificationSubtaskBase.py @@ -2,7 +2,7 @@ from abc import ABC from typing import Optional -from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from src.core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo class AgencyIdentificationSubtaskBase(ABC): diff --git a/core/classes/subtasks/AutoGooglerAgencyIdentificationSubtask.py b/src/core/classes/subtasks/AutoGooglerAgencyIdentificationSubtask.py similarity index 73% rename from core/classes/subtasks/AutoGooglerAgencyIdentificationSubtask.py rename to src/core/classes/subtasks/AutoGooglerAgencyIdentificationSubtask.py index 1e5d945b..b4734c71 100644 --- a/core/classes/subtasks/AutoGooglerAgencyIdentificationSubtask.py +++ b/src/core/classes/subtasks/AutoGooglerAgencyIdentificationSubtask.py @@ -1,8 +1,8 @@ from typing import Optional -from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo -from core.classes.subtasks.AgencyIdentificationSubtaskBase import AgencyIdentificationSubtaskBase -from core.enums import SuggestionType +from src.core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from src.core.classes.subtasks.AgencyIdentificationSubtaskBase import AgencyIdentificationSubtaskBase +from src.core.enums import SuggestionType class AutoGooglerAgencyIdentificationSubtask(AgencyIdentificationSubtaskBase): diff --git a/core/classes/subtasks/CKANAgencyIdentificationSubtask.py b/src/core/classes/subtasks/CKANAgencyIdentificationSubtask.py similarity index 72% rename from core/classes/subtasks/CKANAgencyIdentificationSubtask.py rename to src/core/classes/subtasks/CKANAgencyIdentificationSubtask.py index 5eb88406..4ac8f0fd 100644 --- a/core/classes/subtasks/CKANAgencyIdentificationSubtask.py +++ b/src/core/classes/subtasks/CKANAgencyIdentificationSubtask.py @@ -1,9 +1,9 @@ from typing import Optional -from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo -from core.helpers import process_match_agency_response_to_suggestions -from pdap_api_client.PDAPClient import PDAPClient -from pdap_api_client.DTOs import MatchAgencyResponse +from src.core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from src.core.helpers import process_match_agency_response_to_suggestions +from src.pdap_api_client.PDAPClient import PDAPClient +from src.pdap_api_client.DTOs import MatchAgencyResponse class CKANAgencyIdentificationSubtask: diff --git a/core/classes/subtasks/CommonCrawlerAgencyIdentificationSubtask.py b/src/core/classes/subtasks/CommonCrawlerAgencyIdentificationSubtask.py similarity index 82% rename from core/classes/subtasks/CommonCrawlerAgencyIdentificationSubtask.py rename to src/core/classes/subtasks/CommonCrawlerAgencyIdentificationSubtask.py index 5d0fa409..00441a0a 100644 --- a/core/classes/subtasks/CommonCrawlerAgencyIdentificationSubtask.py +++ b/src/core/classes/subtasks/CommonCrawlerAgencyIdentificationSubtask.py @@ -1,7 +1,7 @@ from typing import Optional -from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo -from core.enums import SuggestionType +from src.core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from src.core.enums import SuggestionType class CommonCrawlerAgencyIdentificationSubtask: diff --git a/core/classes/subtasks/MiscellaneousMetadata/AutoGooglerMiscMetadataSubtask.py b/src/core/classes/subtasks/MiscellaneousMetadata/AutoGooglerMiscMetadataSubtask.py similarity index 58% rename from core/classes/subtasks/MiscellaneousMetadata/AutoGooglerMiscMetadataSubtask.py rename to src/core/classes/subtasks/MiscellaneousMetadata/AutoGooglerMiscMetadataSubtask.py index 43659a9e..8cf644ad 100644 --- a/core/classes/subtasks/MiscellaneousMetadata/AutoGooglerMiscMetadataSubtask.py +++ b/src/core/classes/subtasks/MiscellaneousMetadata/AutoGooglerMiscMetadataSubtask.py @@ -1,5 +1,5 @@ -from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO -from core.classes.subtasks.MiscellaneousMetadata.MiscellaneousMetadataSubtaskBase import \ +from src.core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO +from src.core.classes.subtasks.MiscellaneousMetadata.MiscellaneousMetadataSubtaskBase import \ MiscellaneousMetadataSubtaskBase diff --git a/core/classes/subtasks/MiscellaneousMetadata/CKANMiscMetadataSubtask.py b/src/core/classes/subtasks/MiscellaneousMetadata/CKANMiscMetadataSubtask.py similarity index 72% rename from core/classes/subtasks/MiscellaneousMetadata/CKANMiscMetadataSubtask.py rename to src/core/classes/subtasks/MiscellaneousMetadata/CKANMiscMetadataSubtask.py index 04ef7a0f..60c3a410 100644 --- a/core/classes/subtasks/MiscellaneousMetadata/CKANMiscMetadataSubtask.py +++ b/src/core/classes/subtasks/MiscellaneousMetadata/CKANMiscMetadataSubtask.py @@ -1,5 +1,5 @@ -from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO -from core.classes.subtasks.MiscellaneousMetadata.MiscellaneousMetadataSubtaskBase import \ +from src.core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO +from src.core.classes.subtasks.MiscellaneousMetadata.MiscellaneousMetadataSubtaskBase import \ MiscellaneousMetadataSubtaskBase diff --git a/core/classes/subtasks/MiscellaneousMetadata/MiscellaneousMetadataSubtaskBase.py b/src/core/classes/subtasks/MiscellaneousMetadata/MiscellaneousMetadataSubtaskBase.py similarity index 66% rename from core/classes/subtasks/MiscellaneousMetadata/MiscellaneousMetadataSubtaskBase.py rename to src/core/classes/subtasks/MiscellaneousMetadata/MiscellaneousMetadataSubtaskBase.py index 7a0e7d1f..0f1224ad 100644 --- a/core/classes/subtasks/MiscellaneousMetadata/MiscellaneousMetadataSubtaskBase.py +++ b/src/core/classes/subtasks/MiscellaneousMetadata/MiscellaneousMetadataSubtaskBase.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod -from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO +from src.core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO class MiscellaneousMetadataSubtaskBase(ABC): diff --git a/core/classes/subtasks/MiscellaneousMetadata/MuckrockMiscMetadataSubtask.py b/src/core/classes/subtasks/MiscellaneousMetadata/MuckrockMiscMetadataSubtask.py similarity index 58% rename from core/classes/subtasks/MiscellaneousMetadata/MuckrockMiscMetadataSubtask.py rename to src/core/classes/subtasks/MiscellaneousMetadata/MuckrockMiscMetadataSubtask.py index 1d599162..4bd18481 100644 --- a/core/classes/subtasks/MiscellaneousMetadata/MuckrockMiscMetadataSubtask.py +++ b/src/core/classes/subtasks/MiscellaneousMetadata/MuckrockMiscMetadataSubtask.py @@ -1,5 +1,5 @@ -from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO -from core.classes.subtasks.MiscellaneousMetadata.MiscellaneousMetadataSubtaskBase import \ +from src.core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO +from src.core.classes.subtasks.MiscellaneousMetadata.MiscellaneousMetadataSubtaskBase import \ MiscellaneousMetadataSubtaskBase diff --git a/core/classes/subtasks/__init__.py b/src/core/classes/subtasks/MiscellaneousMetadata/__init__.py similarity index 100% rename from core/classes/subtasks/__init__.py rename to src/core/classes/subtasks/MiscellaneousMetadata/__init__.py diff --git a/core/classes/subtasks/MuckrockAgencyIdentificationSubtask.py b/src/core/classes/subtasks/MuckrockAgencyIdentificationSubtask.py similarity index 74% rename from core/classes/subtasks/MuckrockAgencyIdentificationSubtask.py rename to src/core/classes/subtasks/MuckrockAgencyIdentificationSubtask.py index a6222cf8..4e0d874d 100644 --- a/core/classes/subtasks/MuckrockAgencyIdentificationSubtask.py +++ b/src/core/classes/subtasks/MuckrockAgencyIdentificationSubtask.py @@ -1,11 +1,11 @@ from typing import Optional -from source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface, AgencyLookupResponse, AgencyLookupResponseType -from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo -from core.exceptions import MuckrockAPIError -from core.helpers import process_match_agency_response_to_suggestions -from pdap_api_client.PDAPClient import PDAPClient -from pdap_api_client.DTOs import MatchAgencyResponse +from src.source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface, AgencyLookupResponse, AgencyLookupResponseType +from src.core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from src.core.exceptions import MuckrockAPIError +from src.core.helpers import process_match_agency_response_to_suggestions +from src.pdap_api_client.PDAPClient import PDAPClient +from src.pdap_api_client.DTOs import MatchAgencyResponse class MuckrockAgencyIdentificationSubtask: diff --git a/core/classes/task_operators/__init__.py b/src/core/classes/subtasks/__init__.py similarity index 100% rename from core/classes/task_operators/__init__.py rename to src/core/classes/subtasks/__init__.py diff --git a/core/classes/task_operators/AgencyIdentificationTaskOperator.py b/src/core/classes/task_operators/AgencyIdentificationTaskOperator.py similarity index 78% rename from core/classes/task_operators/AgencyIdentificationTaskOperator.py rename to src/core/classes/task_operators/AgencyIdentificationTaskOperator.py index 259ddff9..80b09d56 100644 --- a/core/classes/task_operators/AgencyIdentificationTaskOperator.py +++ b/src/core/classes/task_operators/AgencyIdentificationTaskOperator.py @@ -1,19 +1,19 @@ from aiohttp import ClientSession -from source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.URLErrorInfos import URLErrorPydanticInfo -from db.enums import TaskType -from collector_manager.enums import CollectorType -from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo -from core.DTOs.task_data_objects.AgencyIdentificationTDO import AgencyIdentificationTDO -from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase -from core.classes.subtasks.AutoGooglerAgencyIdentificationSubtask import AutoGooglerAgencyIdentificationSubtask -from core.classes.subtasks.CKANAgencyIdentificationSubtask import CKANAgencyIdentificationSubtask -from core.classes.subtasks.CommonCrawlerAgencyIdentificationSubtask import CommonCrawlerAgencyIdentificationSubtask -from core.classes.subtasks.MuckrockAgencyIdentificationSubtask import MuckrockAgencyIdentificationSubtask -from core.enums import SuggestionType -from pdap_api_client.PDAPClient import PDAPClient +from src.source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from src.db.enums import TaskType +from src.collector_manager.enums import CollectorType +from src.core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from src.core.DTOs.task_data_objects.AgencyIdentificationTDO import AgencyIdentificationTDO +from src.core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from src.core.classes.subtasks.AutoGooglerAgencyIdentificationSubtask import AutoGooglerAgencyIdentificationSubtask +from src.core.classes.subtasks.CKANAgencyIdentificationSubtask import CKANAgencyIdentificationSubtask +from src.core.classes.subtasks.CommonCrawlerAgencyIdentificationSubtask import CommonCrawlerAgencyIdentificationSubtask +from src.core.classes.subtasks.MuckrockAgencyIdentificationSubtask import MuckrockAgencyIdentificationSubtask +from src.core.enums import SuggestionType +from src.pdap_api_client.PDAPClient import PDAPClient # TODO: Validate with Manual Tests diff --git a/core/classes/task_operators/SubmitApprovedURLTaskOperator.py b/src/core/classes/task_operators/SubmitApprovedURLTaskOperator.py similarity index 85% rename from core/classes/task_operators/SubmitApprovedURLTaskOperator.py rename to src/core/classes/task_operators/SubmitApprovedURLTaskOperator.py index e3a4eab2..49b6b7c1 100644 --- a/core/classes/task_operators/SubmitApprovedURLTaskOperator.py +++ b/src/core/classes/task_operators/SubmitApprovedURLTaskOperator.py @@ -1,9 +1,9 @@ -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.URLErrorInfos import URLErrorPydanticInfo -from db.enums import TaskType -from core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmitApprovedURLTDO -from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase -from pdap_api_client.PDAPClient import PDAPClient +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from src.db.enums import TaskType +from src.core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmitApprovedURLTDO +from src.core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from src.pdap_api_client.PDAPClient import PDAPClient class SubmitApprovedURLTaskOperator(TaskOperatorBase): diff --git a/core/classes/task_operators/TaskOperatorBase.py b/src/core/classes/task_operators/TaskOperatorBase.py similarity index 90% rename from core/classes/task_operators/TaskOperatorBase.py rename to src/core/classes/task_operators/TaskOperatorBase.py index df12f362..7e6df091 100644 --- a/core/classes/task_operators/TaskOperatorBase.py +++ b/src/core/classes/task_operators/TaskOperatorBase.py @@ -1,9 +1,9 @@ import traceback from abc import ABC, abstractmethod -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.enums import TaskType -from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome, TaskOperatorRunInfo -from core.enums import BatchStatus +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.enums import TaskType +from src.core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome, TaskOperatorRunInfo +from src.core.enums import BatchStatus class TaskOperatorBase(ABC): diff --git a/core/classes/task_operators/URL404ProbeTaskOperator.py b/src/core/classes/task_operators/URL404ProbeTaskOperator.py similarity index 85% rename from core/classes/task_operators/URL404ProbeTaskOperator.py rename to src/core/classes/task_operators/URL404ProbeTaskOperator.py index 536fea23..648834d9 100644 --- a/core/classes/task_operators/URL404ProbeTaskOperator.py +++ b/src/core/classes/task_operators/URL404ProbeTaskOperator.py @@ -2,11 +2,11 @@ from pydantic import BaseModel -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.enums import TaskType -from core.DTOs.task_data_objects.URL404ProbeTDO import URL404ProbeTDO -from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase -from html_tag_collector.URLRequestInterface import URLRequestInterface +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.enums import TaskType +from src.core.DTOs.task_data_objects.URL404ProbeTDO import URL404ProbeTDO +from src.core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from src.html_tag_collector.URLRequestInterface import URLRequestInterface class URL404ProbeTDOSubsets(BaseModel): diff --git a/core/classes/task_operators/URLDuplicateTaskOperator.py b/src/core/classes/task_operators/URLDuplicateTaskOperator.py similarity index 83% rename from core/classes/task_operators/URLDuplicateTaskOperator.py rename to src/core/classes/task_operators/URLDuplicateTaskOperator.py index 09ab35ce..c332a461 100644 --- a/core/classes/task_operators/URLDuplicateTaskOperator.py +++ b/src/core/classes/task_operators/URLDuplicateTaskOperator.py @@ -2,11 +2,11 @@ from aiohttp import ClientResponseError -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.enums import TaskType -from core.DTOs.task_data_objects.URLDuplicateTDO import URLDuplicateTDO -from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase -from pdap_api_client.PDAPClient import PDAPClient +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.enums import TaskType +from src.core.DTOs.task_data_objects.URLDuplicateTDO import URLDuplicateTDO +from src.core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from src.pdap_api_client.PDAPClient import PDAPClient class URLDuplicateTaskOperator(TaskOperatorBase): diff --git a/core/classes/task_operators/URLHTMLTaskOperator.py b/src/core/classes/task_operators/URLHTMLTaskOperator.py similarity index 89% rename from core/classes/task_operators/URLHTMLTaskOperator.py rename to src/core/classes/task_operators/URLHTMLTaskOperator.py index 340c386b..26961d72 100644 --- a/core/classes/task_operators/URLHTMLTaskOperator.py +++ b/src/core/classes/task_operators/URLHTMLTaskOperator.py @@ -1,14 +1,14 @@ from http import HTTPStatus -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.URLErrorInfos import URLErrorPydanticInfo -from db.DTOs.URLInfo import URLInfo -from db.enums import TaskType -from core.DTOs.task_data_objects.UrlHtmlTDO import UrlHtmlTDO -from core.classes.HTMLContentInfoGetter import HTMLContentInfoGetter -from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase -from html_tag_collector.ResponseParser import HTMLResponseParser -from html_tag_collector.URLRequestInterface import URLRequestInterface +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from src.db.DTOs.URLInfo import URLInfo +from src.db.enums import TaskType +from src.core.DTOs.task_data_objects.UrlHtmlTDO import UrlHtmlTDO +from src.core.classes.HTMLContentInfoGetter import HTMLContentInfoGetter +from src.core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from src.html_tag_collector.ResponseParser import HTMLResponseParser +from src.html_tag_collector.URLRequestInterface import URLRequestInterface class URLHTMLTaskOperator(TaskOperatorBase): diff --git a/core/classes/task_operators/URLMiscellaneousMetadataTaskOperator.py b/src/core/classes/task_operators/URLMiscellaneousMetadataTaskOperator.py similarity index 73% rename from core/classes/task_operators/URLMiscellaneousMetadataTaskOperator.py rename to src/core/classes/task_operators/URLMiscellaneousMetadataTaskOperator.py index bf9eef04..086631ca 100644 --- a/core/classes/task_operators/URLMiscellaneousMetadataTaskOperator.py +++ b/src/core/classes/task_operators/URLMiscellaneousMetadataTaskOperator.py @@ -1,16 +1,16 @@ from typing import Optional -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.URLErrorInfos import URLErrorPydanticInfo -from db.enums import TaskType -from collector_manager.enums import CollectorType -from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO -from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase -from core.classes.subtasks.MiscellaneousMetadata.AutoGooglerMiscMetadataSubtask import AutoGooglerMiscMetadataSubtask -from core.classes.subtasks.MiscellaneousMetadata.CKANMiscMetadataSubtask import CKANMiscMetadataSubtask -from core.classes.subtasks.MiscellaneousMetadata.MiscellaneousMetadataSubtaskBase import \ +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from src.db.enums import TaskType +from src.collector_manager.enums import CollectorType +from src.core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO +from src.core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from src.core.classes.subtasks.MiscellaneousMetadata.AutoGooglerMiscMetadataSubtask import AutoGooglerMiscMetadataSubtask +from src.core.classes.subtasks.MiscellaneousMetadata.CKANMiscMetadataSubtask import CKANMiscMetadataSubtask +from src.core.classes.subtasks.MiscellaneousMetadata.MiscellaneousMetadataSubtaskBase import \ MiscellaneousMetadataSubtaskBase -from core.classes.subtasks.MiscellaneousMetadata.MuckrockMiscMetadataSubtask import MuckrockMiscMetadataSubtask +from src.core.classes.subtasks.MiscellaneousMetadata.MuckrockMiscMetadataSubtask import MuckrockMiscMetadataSubtask class URLMiscellaneousMetadataTaskOperator(TaskOperatorBase): diff --git a/core/classes/task_operators/URLRecordTypeTaskOperator.py b/src/core/classes/task_operators/URLRecordTypeTaskOperator.py similarity index 86% rename from core/classes/task_operators/URLRecordTypeTaskOperator.py rename to src/core/classes/task_operators/URLRecordTypeTaskOperator.py index 0f080a03..99a960a1 100644 --- a/core/classes/task_operators/URLRecordTypeTaskOperator.py +++ b/src/core/classes/task_operators/URLRecordTypeTaskOperator.py @@ -1,10 +1,10 @@ -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.URLErrorInfos import URLErrorPydanticInfo -from db.enums import TaskType -from core.DTOs.task_data_objects.URLRecordTypeTDO import URLRecordTypeTDO -from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase -from core.enums import RecordType -from llm_api_logic.OpenAIRecordClassifier import OpenAIRecordClassifier +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from src.db.enums import TaskType +from src.core.DTOs.task_data_objects.URLRecordTypeTDO import URLRecordTypeTDO +from src.core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from src.core.enums import RecordType +from src.llm_api_logic.OpenAIRecordClassifier import OpenAIRecordClassifier class URLRecordTypeTaskOperator(TaskOperatorBase): diff --git a/core/preprocessors/__init__.py b/src/core/classes/task_operators/__init__.py similarity index 100% rename from core/preprocessors/__init__.py rename to src/core/classes/task_operators/__init__.py diff --git a/core/enums.py b/src/core/enums.py similarity index 100% rename from core/enums.py rename to src/core/enums.py diff --git a/core/exceptions.py b/src/core/exceptions.py similarity index 100% rename from core/exceptions.py rename to src/core/exceptions.py diff --git a/core/helpers.py b/src/core/helpers.py similarity index 81% rename from core/helpers.py rename to src/core/helpers.py index 1fc51cde..038e14b9 100644 --- a/core/helpers.py +++ b/src/core/helpers.py @@ -1,12 +1,8 @@ -from http import HTTPStatus - -from fastapi import HTTPException - -from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo -from core.enums import SuggestionType -from core.exceptions import MatchAgencyError -from pdap_api_client.DTOs import MatchAgencyResponse -from pdap_api_client.enums import MatchAgencyResponseStatus +from src.core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from src.core.enums import SuggestionType +from src.core.exceptions import MatchAgencyError +from src.pdap_api_client.DTOs import MatchAgencyResponse +from src.pdap_api_client.enums import MatchAgencyResponseStatus def process_match_agency_response_to_suggestions( diff --git a/core/preprocessors/AutoGooglerPreprocessor.py b/src/core/preprocessors/AutoGooglerPreprocessor.py similarity index 87% rename from core/preprocessors/AutoGooglerPreprocessor.py rename to src/core/preprocessors/AutoGooglerPreprocessor.py index ebbf4474..d2d5b1e5 100644 --- a/core/preprocessors/AutoGooglerPreprocessor.py +++ b/src/core/preprocessors/AutoGooglerPreprocessor.py @@ -1,7 +1,7 @@ from typing import List -from db.DTOs.URLInfo import URLInfo -from core.preprocessors.PreprocessorBase import PreprocessorBase +from src.db.DTOs.URLInfo import URLInfo +from src.core.preprocessors.PreprocessorBase import PreprocessorBase class AutoGooglerPreprocessor(PreprocessorBase): diff --git a/core/preprocessors/CKANPreprocessor.py b/src/core/preprocessors/CKANPreprocessor.py similarity index 94% rename from core/preprocessors/CKANPreprocessor.py rename to src/core/preprocessors/CKANPreprocessor.py index 62a550a1..271f6b3f 100644 --- a/core/preprocessors/CKANPreprocessor.py +++ b/src/core/preprocessors/CKANPreprocessor.py @@ -1,7 +1,7 @@ from datetime import datetime from typing import List -from db.DTOs.URLInfo import URLInfo +from src.db.DTOs.URLInfo import URLInfo class CKANPreprocessor: diff --git a/core/preprocessors/CommonCrawlerPreprocessor.py b/src/core/preprocessors/CommonCrawlerPreprocessor.py similarity index 74% rename from core/preprocessors/CommonCrawlerPreprocessor.py rename to src/core/preprocessors/CommonCrawlerPreprocessor.py index 018d9bfb..131d8db3 100644 --- a/core/preprocessors/CommonCrawlerPreprocessor.py +++ b/src/core/preprocessors/CommonCrawlerPreprocessor.py @@ -1,7 +1,7 @@ from typing import List -from db.DTOs.URLInfo import URLInfo -from core.preprocessors.PreprocessorBase import PreprocessorBase +from src.db.DTOs.URLInfo import URLInfo +from src.core.preprocessors.PreprocessorBase import PreprocessorBase class CommonCrawlerPreprocessor(PreprocessorBase): diff --git a/core/preprocessors/ExamplePreprocessor.py b/src/core/preprocessors/ExamplePreprocessor.py similarity index 64% rename from core/preprocessors/ExamplePreprocessor.py rename to src/core/preprocessors/ExamplePreprocessor.py index 41f3b57e..3bf93455 100644 --- a/core/preprocessors/ExamplePreprocessor.py +++ b/src/core/preprocessors/ExamplePreprocessor.py @@ -1,8 +1,8 @@ from typing import List -from db.DTOs.URLInfo import URLInfo -from collector_manager.DTOs.ExampleOutputDTO import ExampleOutputDTO -from core.preprocessors.PreprocessorBase import PreprocessorBase +from src.db.DTOs.URLInfo import URLInfo +from src.collector_manager.DTOs.ExampleOutputDTO import ExampleOutputDTO +from src.core.preprocessors.PreprocessorBase import PreprocessorBase class ExamplePreprocessor(PreprocessorBase): diff --git a/core/preprocessors/MuckrockPreprocessor.py b/src/core/preprocessors/MuckrockPreprocessor.py similarity index 77% rename from core/preprocessors/MuckrockPreprocessor.py rename to src/core/preprocessors/MuckrockPreprocessor.py index 04ba221b..503004e9 100644 --- a/core/preprocessors/MuckrockPreprocessor.py +++ b/src/core/preprocessors/MuckrockPreprocessor.py @@ -1,7 +1,7 @@ from typing import List -from db.DTOs.URLInfo import URLInfo -from core.preprocessors.PreprocessorBase import PreprocessorBase +from src.db.DTOs.URLInfo import URLInfo +from src.core.preprocessors.PreprocessorBase import PreprocessorBase class MuckrockPreprocessor(PreprocessorBase): diff --git a/core/preprocessors/PreprocessorBase.py b/src/core/preprocessors/PreprocessorBase.py similarity index 92% rename from core/preprocessors/PreprocessorBase.py rename to src/core/preprocessors/PreprocessorBase.py index 6f44f8ae..30f73eed 100644 --- a/core/preprocessors/PreprocessorBase.py +++ b/src/core/preprocessors/PreprocessorBase.py @@ -2,7 +2,7 @@ from abc import ABC from typing import List -from db.DTOs.URLInfo import URLInfo +from src.db.DTOs.URLInfo import URLInfo class PreprocessorBase(ABC): diff --git a/core/preprocessors/README.md b/src/core/preprocessors/README.md similarity index 100% rename from core/preprocessors/README.md rename to src/core/preprocessors/README.md diff --git a/db/DTOs/__init__.py b/src/core/preprocessors/__init__.py similarity index 100% rename from db/DTOs/__init__.py rename to src/core/preprocessors/__init__.py diff --git a/db/AsyncDatabaseClient.py b/src/db/AsyncDatabaseClient.py similarity index 96% rename from db/AsyncDatabaseClient.py rename to src/db/AsyncDatabaseClient.py index a539d3b2..e9b78952 100644 --- a/db/AsyncDatabaseClient.py +++ b/src/db/AsyncDatabaseClient.py @@ -4,7 +4,7 @@ from typing import Optional, Type, Any, List from fastapi import HTTPException -from sqlalchemy import select, exists, func, case, desc, Select, not_, and_, update, asc, delete, insert, CTE, literal +from sqlalchemy import select, exists, func, case, desc, Select, not_, and_, update, asc, delete, literal from sqlalchemy.dialects import postgresql from sqlalchemy.exc import IntegrityError from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker @@ -12,61 +12,61 @@ from sqlalchemy.sql.functions import coalesce from starlette import status -from db.ConfigManager import ConfigManager -from db.DTOConverter import DTOConverter -from db.DTOs.BatchInfo import BatchInfo -from db.DTOs.DuplicateInfo import DuplicateInsertInfo, DuplicateInfo -from db.DTOs.InsertURLsInfo import InsertURLsInfo -from db.DTOs.LogInfo import LogInfo, LogOutputInfo -from db.DTOs.TaskInfo import TaskInfo -from db.DTOs.URLErrorInfos import URLErrorPydanticInfo -from db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo, HTMLContentType -from db.DTOs.URLInfo import URLInfo -from db.DTOs.URLMapping import URLMapping -from db.StatementComposer import StatementComposer -from db.constants import PLACEHOLDER_AGENCY_NAME -from db.enums import TaskType -from db.models import URL, URLErrorInfo, URLHTMLContent, Base, \ +from src.collector_manager.enums import URLStatus, CollectorType +from src.db.ConfigManager import ConfigManager +from src.db.DTOConverter import DTOConverter +from src.db.DTOs.BatchInfo import BatchInfo +from src.db.DTOs.DuplicateInfo import DuplicateInsertInfo, DuplicateInfo +from src.db.DTOs.InsertURLsInfo import InsertURLsInfo +from src.db.DTOs.LogInfo import LogInfo, LogOutputInfo +from src.db.DTOs.TaskInfo import TaskInfo +from src.db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from src.db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo, HTMLContentType +from src.db.DTOs.URLInfo import URLInfo +from src.db.DTOs.URLMapping import URLMapping +from src.db.StatementComposer import StatementComposer +from src.db.constants import PLACEHOLDER_AGENCY_NAME +from src.db.enums import TaskType +from src.db.models import URL, URLErrorInfo, URLHTMLContent, Base, \ RootURL, Task, TaskError, LinkTaskURL, Batch, Agency, AutomatedUrlAgencySuggestion, \ UserUrlAgencySuggestion, AutoRelevantSuggestion, AutoRecordTypeSuggestion, UserRelevantSuggestion, \ UserRecordTypeSuggestion, ReviewingUserURL, URLOptionalDataSourceMetadata, ConfirmedURLAgency, Duplicate, Log, \ BacklogSnapshot, URLDataSource, URLCheckedForDuplicate, URLProbedFor404 -from collector_manager.enums import URLStatus, CollectorType -from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo -from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, RejectionReason -from core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO, GetMetricsBacklogResponseInnerDTO -from core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO, \ +from src.core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo +from src.core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, RejectionReason +from src.core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO, GetMetricsBacklogResponseInnerDTO +from src.core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO, \ GetMetricsBatchesAggregatedInnerResponseDTO -from core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO, \ +from src.core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO, \ GetMetricsBatchesBreakdownInnerResponseDTO -from core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO -from core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO, \ +from src.core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO +from src.core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO, \ GetMetricsURLsBreakdownPendingResponseInnerDTO -from core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO, \ +from src.core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO, \ GetMetricsURLsBreakdownSubmittedInnerDTO -from core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseInfo -from core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseInfo -from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \ +from src.core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseInfo +from src.core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseInfo +from src.core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \ GetNextURLForAgencyAgencyInfo, GetNextURLForAgencyAnnotationInnerResponse -from core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse, \ +from src.core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse, \ GetNextURLForAllAnnotationInnerResponse -from core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewResponse, FinalReviewAnnotationInfo, \ +from src.core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewResponse, FinalReviewAnnotationInfo, \ FinalReviewOptionalMetadata -from core.DTOs.GetTasksResponse import GetTasksResponse, GetTasksResponseTaskInfo -from core.DTOs.GetURLsResponseInfo import GetURLsResponseInfo, GetURLsResponseErrorInfo, \ +from src.core.DTOs.GetTasksResponse import GetTasksResponse, GetTasksResponseTaskInfo +from src.core.DTOs.GetURLsResponseInfo import GetURLsResponseInfo, GetURLsResponseErrorInfo, \ GetURLsResponseInnerInfo -from core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO -from core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO -from core.DTOs.SearchURLResponse import SearchURLResponse -from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo -from core.DTOs.task_data_objects.AgencyIdentificationTDO import AgencyIdentificationTDO -from core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmitApprovedURLTDO, SubmittedURLInfo -from core.DTOs.task_data_objects.URL404ProbeTDO import URL404ProbeTDO -from core.DTOs.task_data_objects.URLDuplicateTDO import URLDuplicateTDO -from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO, URLHTMLMetadataInfo -from core.EnvVarManager import EnvVarManager -from core.enums import BatchStatus, SuggestionType, RecordType, SuggestedStatus -from html_tag_collector.DataClassTags import convert_to_response_html_info +from src.core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO +from src.core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO +from src.core.DTOs.SearchURLResponse import SearchURLResponse +from src.core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from src.core.DTOs.task_data_objects.AgencyIdentificationTDO import AgencyIdentificationTDO +from src.core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmitApprovedURLTDO, SubmittedURLInfo +from src.core.DTOs.task_data_objects.URL404ProbeTDO import URL404ProbeTDO +from src.core.DTOs.task_data_objects.URLDuplicateTDO import URLDuplicateTDO +from src.core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO, URLHTMLMetadataInfo +from src.core.EnvVarManager import EnvVarManager +from src.core.enums import BatchStatus, SuggestionType, RecordType, SuggestedStatus +from src.html_tag_collector.DataClassTags import convert_to_response_html_info # Type Hints diff --git a/db/ConfigManager.py b/src/db/ConfigManager.py similarity index 100% rename from db/ConfigManager.py rename to src/db/ConfigManager.py diff --git a/db/DTOConverter.py b/src/db/DTOConverter.py similarity index 91% rename from db/DTOConverter.py rename to src/db/DTOConverter.py index d95935d4..811aefa3 100644 --- a/db/DTOConverter.py +++ b/src/db/DTOConverter.py @@ -1,17 +1,17 @@ from typing import Optional -from db.DTOs.URLHTMLContentInfo import HTMLContentType, URLHTMLContentInfo -from db.DTOs.URLInfo import URLInfo -from db.DTOs.URLWithHTML import URLWithHTML -from db.models import AutomatedUrlAgencySuggestion, UserUrlAgencySuggestion, URLHTMLContent, URL, Agency, \ +from src.db.DTOs.URLHTMLContentInfo import HTMLContentType, URLHTMLContentInfo +from src.db.DTOs.URLInfo import URLInfo +from src.db.DTOs.URLWithHTML import URLWithHTML +from src.db.models import AutomatedUrlAgencySuggestion, UserUrlAgencySuggestion, URLHTMLContent, URL, \ AutoRecordTypeSuggestion, UserRecordTypeSuggestion, UserRelevantSuggestion, AutoRelevantSuggestion, \ ConfirmedURLAgency -from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAgencyInfo -from core.DTOs.GetNextURLForFinalReviewResponse import FinalReviewAnnotationRelevantInfo, \ +from src.core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAgencyInfo +from src.core.DTOs.GetNextURLForFinalReviewResponse import FinalReviewAnnotationRelevantInfo, \ FinalReviewAnnotationRecordTypeInfo, FinalReviewAnnotationAgencyAutoInfo, \ FinalReviewAnnotationAgencyInfo -from core.enums import RecordType, SuggestionType -from html_tag_collector.DataClassTags import ResponseHTMLInfo, ENUM_TO_ATTRIBUTE_MAPPING +from src.core.enums import RecordType, SuggestionType +from src.html_tag_collector.DataClassTags import ResponseHTMLInfo, ENUM_TO_ATTRIBUTE_MAPPING class DTOConverter: diff --git a/db/DTOs/BatchInfo.py b/src/db/DTOs/BatchInfo.py similarity index 94% rename from db/DTOs/BatchInfo.py rename to src/db/DTOs/BatchInfo.py index ba16539a..db5505bc 100644 --- a/db/DTOs/BatchInfo.py +++ b/src/db/DTOs/BatchInfo.py @@ -3,7 +3,7 @@ from pydantic import BaseModel -from core.enums import BatchStatus +from src.core.enums import BatchStatus class BatchInfo(BaseModel): diff --git a/db/DTOs/DuplicateInfo.py b/src/db/DTOs/DuplicateInfo.py similarity index 100% rename from db/DTOs/DuplicateInfo.py rename to src/db/DTOs/DuplicateInfo.py diff --git a/db/DTOs/GetTaskStatusResponseInfo.py b/src/db/DTOs/GetTaskStatusResponseInfo.py similarity index 74% rename from db/DTOs/GetTaskStatusResponseInfo.py rename to src/db/DTOs/GetTaskStatusResponseInfo.py index df44fd73..cb903ed2 100644 --- a/db/DTOs/GetTaskStatusResponseInfo.py +++ b/src/db/DTOs/GetTaskStatusResponseInfo.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from db.enums import TaskType +from src.db.enums import TaskType class GetTaskStatusResponseInfo(BaseModel): diff --git a/db/DTOs/InsertURLsInfo.py b/src/db/DTOs/InsertURLsInfo.py similarity index 81% rename from db/DTOs/InsertURLsInfo.py rename to src/db/DTOs/InsertURLsInfo.py index b7cbc924..21b89219 100644 --- a/db/DTOs/InsertURLsInfo.py +++ b/src/db/DTOs/InsertURLsInfo.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from db.DTOs.URLMapping import URLMapping +from src.db.DTOs.URLMapping import URLMapping class InsertURLsInfo(BaseModel): diff --git a/db/DTOs/LogInfo.py b/src/db/DTOs/LogInfo.py similarity index 100% rename from db/DTOs/LogInfo.py rename to src/db/DTOs/LogInfo.py diff --git a/db/DTOs/MetadataAnnotationInfo.py b/src/db/DTOs/MetadataAnnotationInfo.py similarity index 100% rename from db/DTOs/MetadataAnnotationInfo.py rename to src/db/DTOs/MetadataAnnotationInfo.py diff --git a/db/DTOs/README.md b/src/db/DTOs/README.md similarity index 100% rename from db/DTOs/README.md rename to src/db/DTOs/README.md diff --git a/db/DTOs/TaskInfo.py b/src/db/DTOs/TaskInfo.py similarity index 63% rename from db/DTOs/TaskInfo.py rename to src/db/DTOs/TaskInfo.py index feae2666..e8adadb1 100644 --- a/db/DTOs/TaskInfo.py +++ b/src/db/DTOs/TaskInfo.py @@ -3,10 +3,10 @@ from pydantic import BaseModel -from db.DTOs.URLErrorInfos import URLErrorPydanticInfo -from db.DTOs.URLInfo import URLInfo -from db.enums import TaskType -from core.enums import BatchStatus +from src.db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from src.db.DTOs.URLInfo import URLInfo +from src.db.enums import TaskType +from src.core.enums import BatchStatus class TaskInfo(BaseModel): diff --git a/db/DTOs/URLAnnotationInfo.py b/src/db/DTOs/URLAnnotationInfo.py similarity index 73% rename from db/DTOs/URLAnnotationInfo.py rename to src/db/DTOs/URLAnnotationInfo.py index e83e4752..64920e9c 100644 --- a/db/DTOs/URLAnnotationInfo.py +++ b/src/db/DTOs/URLAnnotationInfo.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo +from src.db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo class URLAnnotationInfo(BaseModel): diff --git a/db/DTOs/URLErrorInfos.py b/src/db/DTOs/URLErrorInfos.py similarity index 100% rename from db/DTOs/URLErrorInfos.py rename to src/db/DTOs/URLErrorInfos.py diff --git a/db/DTOs/URLHTMLContentInfo.py b/src/db/DTOs/URLHTMLContentInfo.py similarity index 100% rename from db/DTOs/URLHTMLContentInfo.py rename to src/db/DTOs/URLHTMLContentInfo.py diff --git a/db/DTOs/URLInfo.py b/src/db/DTOs/URLInfo.py similarity index 88% rename from db/DTOs/URLInfo.py rename to src/db/DTOs/URLInfo.py index 5a1d2221..3b6fc6b1 100644 --- a/db/DTOs/URLInfo.py +++ b/src/db/DTOs/URLInfo.py @@ -3,7 +3,7 @@ from pydantic import BaseModel -from collector_manager.enums import URLStatus +from src.collector_manager.enums import URLStatus class URLInfo(BaseModel): diff --git a/db/DTOs/URLMapping.py b/src/db/DTOs/URLMapping.py similarity index 100% rename from db/DTOs/URLMapping.py rename to src/db/DTOs/URLMapping.py diff --git a/db/DTOs/URLMetadataInfo.py b/src/db/DTOs/URLMetadataInfo.py similarity index 87% rename from db/DTOs/URLMetadataInfo.py rename to src/db/DTOs/URLMetadataInfo.py index 27431a99..acac01b8 100644 --- a/db/DTOs/URLMetadataInfo.py +++ b/src/db/DTOs/URLMetadataInfo.py @@ -3,7 +3,7 @@ from pydantic import BaseModel -from db.enums import URLMetadataAttributeType, ValidationStatus, ValidationSource +from src.db.enums import URLMetadataAttributeType, ValidationStatus, ValidationSource class URLMetadataInfo(BaseModel): diff --git a/db/DTOs/URLRelevancyInfo.py b/src/db/DTOs/URLRelevancyInfo.py similarity index 100% rename from db/DTOs/URLRelevancyInfo.py rename to src/db/DTOs/URLRelevancyInfo.py diff --git a/db/DTOs/URLWithHTML.py b/src/db/DTOs/URLWithHTML.py similarity index 68% rename from db/DTOs/URLWithHTML.py rename to src/db/DTOs/URLWithHTML.py index 53c77b82..0c767da8 100644 --- a/db/DTOs/URLWithHTML.py +++ b/src/db/DTOs/URLWithHTML.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo +from src.db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo class URLWithHTML(BaseModel): diff --git a/db/__init__.py b/src/db/DTOs/__init__.py similarity index 100% rename from db/__init__.py rename to src/db/DTOs/__init__.py diff --git a/db/DatabaseClient.py b/src/db/DatabaseClient.py similarity index 89% rename from db/DatabaseClient.py rename to src/db/DatabaseClient.py index 030e2db6..0a6c2f02 100644 --- a/db/DatabaseClient.py +++ b/src/db/DatabaseClient.py @@ -3,23 +3,20 @@ from sqlalchemy import create_engine, update from sqlalchemy.exc import IntegrityError -from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import sessionmaker, scoped_session, Session -from db.ConfigManager import ConfigManager -from db.DTOs.BatchInfo import BatchInfo -from db.DTOs.DuplicateInfo import DuplicateInsertInfo -from db.DTOs.InsertURLsInfo import InsertURLsInfo -from db.DTOs.LogInfo import LogInfo -from db.DTOs.URLInfo import URLInfo -from db.DTOs.URLMapping import URLMapping -from db.models import Base, Batch, URL, Log, Duplicate, URLDataSource -from collector_manager.enums import CollectorType, URLStatus -from core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO -from core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO -from core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmittedURLInfo -from core.EnvVarManager import EnvVarManager -from core.enums import BatchStatus +from src.collector_manager.enums import URLStatus +from src.db.ConfigManager import ConfigManager +from src.db.DTOs.BatchInfo import BatchInfo +from src.db.DTOs.DuplicateInfo import DuplicateInsertInfo +from src.db.DTOs.InsertURLsInfo import InsertURLsInfo +from src.db.DTOs.LogInfo import LogInfo +from src.db.DTOs.URLInfo import URLInfo +from src.db.DTOs.URLMapping import URLMapping +from src.db.models import Base, Batch, URL, Log, Duplicate, URLDataSource +from src.core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmittedURLInfo +from src.core.EnvVarManager import EnvVarManager +from src.core.enums import BatchStatus # Database Client diff --git a/db/README.md b/src/db/README.md similarity index 100% rename from db/README.md rename to src/db/README.md diff --git a/db/StatementComposer.py b/src/db/StatementComposer.py similarity index 86% rename from db/StatementComposer.py rename to src/db/StatementComposer.py index 73121e51..77df0dac 100644 --- a/db/StatementComposer.py +++ b/src/db/StatementComposer.py @@ -1,14 +1,13 @@ -from typing import Any, Optional +from typing import Any -from sqlalchemy import Select, select, exists, Table, func, Subquery, and_, not_, ColumnElement, case, literal, CTE +from sqlalchemy import Select, select, exists, func, Subquery, and_, not_, ColumnElement from sqlalchemy.orm import aliased -from db.enums import URLMetadataAttributeType, ValidationStatus, TaskType -from db.models import URL, URLHTMLContent, AutomatedUrlAgencySuggestion, URLOptionalDataSourceMetadata, Batch, \ - ConfirmedURLAgency, LinkTaskURL, Task, UserUrlAgencySuggestion, UserRecordTypeSuggestion, UserRelevantSuggestion, \ - AutoRecordTypeSuggestion, AutoRelevantSuggestion, ReviewingUserURL -from collector_manager.enums import URLStatus, CollectorType -from core.enums import BatchStatus +from src.collector_manager.enums import URLStatus +from src.db.enums import TaskType +from src.db.models import URL, URLHTMLContent, AutomatedUrlAgencySuggestion, URLOptionalDataSourceMetadata, Batch, \ + ConfirmedURLAgency, LinkTaskURL, Task, UserUrlAgencySuggestion, UserRecordTypeSuggestion, UserRelevantSuggestion +from src.core.enums import BatchStatus class StatementComposer: diff --git a/html_tag_collector/__init__.py b/src/db/__init__.py similarity index 100% rename from html_tag_collector/__init__.py rename to src/db/__init__.py diff --git a/db/constants.py b/src/db/constants.py similarity index 100% rename from db/constants.py rename to src/db/constants.py diff --git a/db/enums.py b/src/db/enums.py similarity index 100% rename from db/enums.py rename to src/db/enums.py diff --git a/db/helper_functions.py b/src/db/helper_functions.py similarity index 64% rename from db/helper_functions.py rename to src/db/helper_functions.py index 4f99556a..cf0efcc3 100644 --- a/db/helper_functions.py +++ b/src/db/helper_functions.py @@ -1,8 +1,4 @@ -import os - -import dotenv - -from core.EnvVarManager import EnvVarManager +from src.core.EnvVarManager import EnvVarManager def get_postgres_connection_string(is_async = False): diff --git a/db/models.py b/src/db/models.py similarity index 99% rename from db/models.py rename to src/db/models.py index 83ca97b4..3c4b615f 100644 --- a/db/models.py +++ b/src/db/models.py @@ -2,13 +2,13 @@ SQLAlchemy ORM models """ from sqlalchemy import func, Column, Integer, String, TIMESTAMP, Float, JSON, ForeignKey, Text, UniqueConstraint, \ - Boolean, DateTime, ARRAY + Boolean, ARRAY from sqlalchemy.dialects import postgresql from sqlalchemy.orm import declarative_base, relationship -from db.enums import PGEnum, TaskType -from core.enums import BatchStatus, RecordType -from util.helper_functions import get_enum_values +from src.db.enums import PGEnum, TaskType +from src.core.enums import BatchStatus, RecordType +from src.util.helper_functions import get_enum_values # Base class for SQLAlchemy ORM models Base = declarative_base() diff --git a/html_tag_collector/DataClassTags.py b/src/html_tag_collector/DataClassTags.py similarity index 92% rename from html_tag_collector/DataClassTags.py rename to src/html_tag_collector/DataClassTags.py index 12a0ecc3..c920a563 100644 --- a/html_tag_collector/DataClassTags.py +++ b/src/html_tag_collector/DataClassTags.py @@ -1,6 +1,6 @@ from dataclasses import dataclass -from db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo, HTMLContentType +from src.db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo, HTMLContentType @dataclass diff --git a/html_tag_collector/README.md b/src/html_tag_collector/README.md similarity index 100% rename from html_tag_collector/README.md rename to src/html_tag_collector/README.md diff --git a/html_tag_collector/ResponseParser.py b/src/html_tag_collector/ResponseParser.py similarity index 90% rename from html_tag_collector/ResponseParser.py rename to src/html_tag_collector/ResponseParser.py index 0a489eaf..4f6c5f74 100644 --- a/html_tag_collector/ResponseParser.py +++ b/src/html_tag_collector/ResponseParser.py @@ -1,18 +1,14 @@ import json -from collections import namedtuple -from dataclasses import asdict from enum import Enum from typing import Optional -import bs4 from bs4 import BeautifulSoup -from requests import Response -from html_tag_collector.DataClassTags import ResponseHTMLInfo -from html_tag_collector.RootURLCache import RootURLCache -from html_tag_collector.constants import HEADER_TAGS -from html_tag_collector.url_adjustment_functions import drop_hostname, remove_trailing_backslash, add_https -from html_tag_collector.util import remove_excess_whitespace +from src.html_tag_collector.DataClassTags import ResponseHTMLInfo +from src.html_tag_collector.RootURLCache import RootURLCache +from src.html_tag_collector.constants import HEADER_TAGS +from src.html_tag_collector.url_adjustment_functions import drop_hostname, remove_trailing_backslash, add_https +from src.html_tag_collector.util import remove_excess_whitespace class ParserTypeEnum(Enum): LXML = "lxml" diff --git a/html_tag_collector/RootURLCache.py b/src/html_tag_collector/RootURLCache.py similarity index 95% rename from html_tag_collector/RootURLCache.py rename to src/html_tag_collector/RootURLCache.py index b5f3f413..1231752f 100644 --- a/html_tag_collector/RootURLCache.py +++ b/src/html_tag_collector/RootURLCache.py @@ -5,8 +5,8 @@ from aiohttp import ClientSession from bs4 import BeautifulSoup -from db.AsyncDatabaseClient import AsyncDatabaseClient -from html_tag_collector.constants import REQUEST_HEADERS +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.html_tag_collector.constants import REQUEST_HEADERS DEBUG = False diff --git a/html_tag_collector/URLRequestInterface.py b/src/html_tag_collector/URLRequestInterface.py similarity index 100% rename from html_tag_collector/URLRequestInterface.py rename to src/html_tag_collector/URLRequestInterface.py diff --git a/llm_api_logic/__init__.py b/src/html_tag_collector/__init__.py similarity index 100% rename from llm_api_logic/__init__.py rename to src/html_tag_collector/__init__.py diff --git a/html_tag_collector/constants.py b/src/html_tag_collector/constants.py similarity index 100% rename from html_tag_collector/constants.py rename to src/html_tag_collector/constants.py diff --git a/html_tag_collector/url_adjustment_functions.py b/src/html_tag_collector/url_adjustment_functions.py similarity index 100% rename from html_tag_collector/url_adjustment_functions.py rename to src/html_tag_collector/url_adjustment_functions.py diff --git a/html_tag_collector/util.py b/src/html_tag_collector/util.py similarity index 100% rename from html_tag_collector/util.py rename to src/html_tag_collector/util.py diff --git a/llm_api_logic/DeepSeekRecordClassifier.py b/src/llm_api_logic/DeepSeekRecordClassifier.py similarity index 75% rename from llm_api_logic/DeepSeekRecordClassifier.py rename to src/llm_api_logic/DeepSeekRecordClassifier.py index e770f3c0..d9c71441 100644 --- a/llm_api_logic/DeepSeekRecordClassifier.py +++ b/src/llm_api_logic/DeepSeekRecordClassifier.py @@ -1,11 +1,8 @@ -import json import os from openai import AsyncOpenAI -from db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo -from core.enums import RecordType -from llm_api_logic.LLMRecordClassifierBase import RecordClassifierBase +from src.llm_api_logic.LLMRecordClassifierBase import RecordClassifierBase class DeepSeekRecordClassifier(RecordClassifierBase): diff --git a/llm_api_logic/LLMRecordClassifierBase.py b/src/llm_api_logic/LLMRecordClassifierBase.py similarity index 86% rename from llm_api_logic/LLMRecordClassifierBase.py rename to src/llm_api_logic/LLMRecordClassifierBase.py index 5648a90f..a29b8d65 100644 --- a/llm_api_logic/LLMRecordClassifierBase.py +++ b/src/llm_api_logic/LLMRecordClassifierBase.py @@ -4,10 +4,10 @@ from openai import AsyncOpenAI -from db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo -from llm_api_logic.RecordTypeStructuredOutput import RecordTypeStructuredOutput -from llm_api_logic.constants import RECORD_CLASSIFICATION_QUERY_CONTENT -from llm_api_logic.helpers import dictify_html_info +from src.db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo +from src.llm_api_logic.RecordTypeStructuredOutput import RecordTypeStructuredOutput +from src.llm_api_logic.constants import RECORD_CLASSIFICATION_QUERY_CONTENT +from src.llm_api_logic.helpers import dictify_html_info class RecordClassifierBase(ABC): diff --git a/llm_api_logic/OpenAIRecordClassifier.py b/src/llm_api_logic/OpenAIRecordClassifier.py similarity index 77% rename from llm_api_logic/OpenAIRecordClassifier.py rename to src/llm_api_logic/OpenAIRecordClassifier.py index cc0829b5..3511b193 100644 --- a/llm_api_logic/OpenAIRecordClassifier.py +++ b/src/llm_api_logic/OpenAIRecordClassifier.py @@ -1,9 +1,9 @@ from openai.types.chat import ParsedChatCompletion -from core.EnvVarManager import EnvVarManager -from llm_api_logic.LLMRecordClassifierBase import RecordClassifierBase -from llm_api_logic.RecordTypeStructuredOutput import RecordTypeStructuredOutput +from src.core.EnvVarManager import EnvVarManager +from src.llm_api_logic.LLMRecordClassifierBase import RecordClassifierBase +from src.llm_api_logic.RecordTypeStructuredOutput import RecordTypeStructuredOutput class OpenAIRecordClassifier(RecordClassifierBase): diff --git a/llm_api_logic/RecordTypeStructuredOutput.py b/src/llm_api_logic/RecordTypeStructuredOutput.py similarity index 86% rename from llm_api_logic/RecordTypeStructuredOutput.py rename to src/llm_api_logic/RecordTypeStructuredOutput.py index a5993ae9..735254a1 100644 --- a/llm_api_logic/RecordTypeStructuredOutput.py +++ b/src/llm_api_logic/RecordTypeStructuredOutput.py @@ -5,7 +5,7 @@ from pydantic import BaseModel -from core.enums import RecordType +from src.core.enums import RecordType diff --git a/pdap_api_client/__init__.py b/src/llm_api_logic/__init__.py similarity index 100% rename from pdap_api_client/__init__.py rename to src/llm_api_logic/__init__.py diff --git a/llm_api_logic/constants.py b/src/llm_api_logic/constants.py similarity index 100% rename from llm_api_logic/constants.py rename to src/llm_api_logic/constants.py diff --git a/llm_api_logic/helpers.py b/src/llm_api_logic/helpers.py similarity index 76% rename from llm_api_logic/helpers.py rename to src/llm_api_logic/helpers.py index b8a81b13..e1e0ffea 100644 --- a/llm_api_logic/helpers.py +++ b/src/llm_api_logic/helpers.py @@ -1,4 +1,4 @@ -from db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo +from src.db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo def dictify_html_info(html_infos: list[URLHTMLContentInfo]) -> dict[str, str]: diff --git a/pdap_api_client/DTOs.py b/src/pdap_api_client/DTOs.py similarity index 91% rename from pdap_api_client/DTOs.py rename to src/pdap_api_client/DTOs.py index 23d240d7..960e1995 100644 --- a/pdap_api_client/DTOs.py +++ b/src/pdap_api_client/DTOs.py @@ -3,7 +3,7 @@ from pydantic import BaseModel -from pdap_api_client.enums import MatchAgencyResponseStatus +from src.pdap_api_client.enums import MatchAgencyResponseStatus class MatchAgencyInfo(BaseModel): diff --git a/pdap_api_client/PDAPClient.py b/src/pdap_api_client/PDAPClient.py similarity index 94% rename from pdap_api_client/PDAPClient.py rename to src/pdap_api_client/PDAPClient.py index 491b7c3b..653d9c5d 100644 --- a/pdap_api_client/PDAPClient.py +++ b/src/pdap_api_client/PDAPClient.py @@ -1,9 +1,9 @@ from typing import Optional -from core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmitApprovedURLTDO, SubmittedURLInfo -from pdap_api_client.DTOs import MatchAgencyInfo, UniqueURLDuplicateInfo, \ +from src.core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmitApprovedURLTDO, SubmittedURLInfo +from src.pdap_api_client.DTOs import MatchAgencyInfo, UniqueURLDuplicateInfo, \ MatchAgencyResponse -from pdap_api_client.enums import MatchAgencyResponseStatus +from src.pdap_api_client.enums import MatchAgencyResponseStatus from pdap_access_manager import AccessManager, DataSourcesNamespaces, RequestInfo, RequestType diff --git a/security_manager/__init__.py b/src/pdap_api_client/__init__.py similarity index 100% rename from security_manager/__init__.py rename to src/pdap_api_client/__init__.py diff --git a/pdap_api_client/enums.py b/src/pdap_api_client/enums.py similarity index 100% rename from pdap_api_client/enums.py rename to src/pdap_api_client/enums.py diff --git a/security_manager/SecurityManager.py b/src/security_manager/SecurityManager.py similarity index 100% rename from security_manager/SecurityManager.py rename to src/security_manager/SecurityManager.py diff --git a/source_collectors/__init__.py b/src/security_manager/__init__.py similarity index 100% rename from source_collectors/__init__.py rename to src/security_manager/__init__.py diff --git a/source_collectors/README.md b/src/source_collectors/README.md similarity index 100% rename from source_collectors/README.md rename to src/source_collectors/README.md diff --git a/source_collectors/auto_googler/__init__.py b/src/source_collectors/__init__.py similarity index 100% rename from source_collectors/auto_googler/__init__.py rename to src/source_collectors/__init__.py diff --git a/source_collectors/auto_googler/AutoGoogler.py b/src/source_collectors/auto_googler/AutoGoogler.py similarity index 79% rename from source_collectors/auto_googler/AutoGoogler.py rename to src/source_collectors/auto_googler/AutoGoogler.py index 368f75fb..b6e5b96d 100644 --- a/source_collectors/auto_googler/AutoGoogler.py +++ b/src/source_collectors/auto_googler/AutoGoogler.py @@ -1,8 +1,6 @@ -import asyncio - -from source_collectors.auto_googler.DTOs import GoogleSearchQueryResultsInnerDTO -from source_collectors.auto_googler.GoogleSearcher import GoogleSearcher -from source_collectors.auto_googler.SearchConfig import SearchConfig +from src.source_collectors.auto_googler.DTOs import GoogleSearchQueryResultsInnerDTO +from src.source_collectors.auto_googler.GoogleSearcher import GoogleSearcher +from src.source_collectors.auto_googler.SearchConfig import SearchConfig class AutoGoogler: diff --git a/source_collectors/auto_googler/AutoGooglerCollector.py b/src/source_collectors/auto_googler/AutoGooglerCollector.py similarity index 65% rename from source_collectors/auto_googler/AutoGooglerCollector.py rename to src/source_collectors/auto_googler/AutoGooglerCollector.py index 01387d0b..f9d06265 100644 --- a/source_collectors/auto_googler/AutoGooglerCollector.py +++ b/src/source_collectors/auto_googler/AutoGooglerCollector.py @@ -1,13 +1,13 @@ -from collector_manager.AsyncCollectorBase import AsyncCollectorBase -from collector_manager.enums import CollectorType -from core.EnvVarManager import EnvVarManager -from core.preprocessors.AutoGooglerPreprocessor import AutoGooglerPreprocessor -from source_collectors.auto_googler.AutoGoogler import AutoGoogler -from source_collectors.auto_googler.DTOs import AutoGooglerInputDTO, AutoGooglerInnerOutputDTO -from source_collectors.auto_googler.GoogleSearcher import GoogleSearcher -from source_collectors.auto_googler.SearchConfig import SearchConfig -from util.helper_functions import base_model_list_dump +from src.collector_manager.AsyncCollectorBase import AsyncCollectorBase +from src.collector_manager.enums import CollectorType +from src.core.EnvVarManager import EnvVarManager +from src.core.preprocessors.AutoGooglerPreprocessor import AutoGooglerPreprocessor +from src.source_collectors.auto_googler.AutoGoogler import AutoGoogler +from src.source_collectors.auto_googler.DTOs import AutoGooglerInputDTO, AutoGooglerInnerOutputDTO +from src.source_collectors.auto_googler.GoogleSearcher import GoogleSearcher +from src.source_collectors.auto_googler.SearchConfig import SearchConfig +from src.util.helper_functions import base_model_list_dump class AutoGooglerCollector(AsyncCollectorBase): diff --git a/source_collectors/auto_googler/DTOs.py b/src/source_collectors/auto_googler/DTOs.py similarity index 100% rename from source_collectors/auto_googler/DTOs.py rename to src/source_collectors/auto_googler/DTOs.py diff --git a/source_collectors/auto_googler/GoogleSearcher.py b/src/source_collectors/auto_googler/GoogleSearcher.py similarity index 95% rename from source_collectors/auto_googler/GoogleSearcher.py rename to src/source_collectors/auto_googler/GoogleSearcher.py index fe52ea45..c7cf73b8 100644 --- a/source_collectors/auto_googler/GoogleSearcher.py +++ b/src/source_collectors/auto_googler/GoogleSearcher.py @@ -1,11 +1,9 @@ -import asyncio from typing import Union import aiohttp -from googleapiclient.discovery import build from googleapiclient.errors import HttpError -from source_collectors.auto_googler.DTOs import GoogleSearchQueryResultsInnerDTO +from src.source_collectors.auto_googler.DTOs import GoogleSearchQueryResultsInnerDTO class QuotaExceededError(Exception): diff --git a/source_collectors/auto_googler/README.md b/src/source_collectors/auto_googler/README.md similarity index 100% rename from source_collectors/auto_googler/README.md rename to src/source_collectors/auto_googler/README.md diff --git a/source_collectors/auto_googler/SearchConfig.py b/src/source_collectors/auto_googler/SearchConfig.py similarity index 100% rename from source_collectors/auto_googler/SearchConfig.py rename to src/source_collectors/auto_googler/SearchConfig.py diff --git a/source_collectors/ckan/__init__.py b/src/source_collectors/auto_googler/__init__.py similarity index 100% rename from source_collectors/ckan/__init__.py rename to src/source_collectors/auto_googler/__init__.py diff --git a/source_collectors/ckan/CKANAPIInterface.py b/src/source_collectors/ckan/CKANAPIInterface.py similarity index 100% rename from source_collectors/ckan/CKANAPIInterface.py rename to src/source_collectors/ckan/CKANAPIInterface.py diff --git a/source_collectors/ckan/CKANCollector.py b/src/source_collectors/ckan/CKANCollector.py similarity index 81% rename from source_collectors/ckan/CKANCollector.py rename to src/source_collectors/ckan/CKANCollector.py index 873a8593..2dee4258 100644 --- a/source_collectors/ckan/CKANCollector.py +++ b/src/source_collectors/ckan/CKANCollector.py @@ -1,14 +1,14 @@ from pydantic import BaseModel -from collector_manager.AsyncCollectorBase import AsyncCollectorBase -from collector_manager.enums import CollectorType -from core.preprocessors.CKANPreprocessor import CKANPreprocessor -from source_collectors.ckan.DTOs import CKANInputDTO -from source_collectors.ckan.ckan_scraper_toolkit import ckan_package_search, ckan_group_package_show, \ +from src.collector_manager.AsyncCollectorBase import AsyncCollectorBase +from src.collector_manager.enums import CollectorType +from src.core.preprocessors.CKANPreprocessor import CKANPreprocessor +from src.source_collectors.ckan.DTOs import CKANInputDTO +from src.source_collectors.ckan.ckan_scraper_toolkit import ckan_package_search, ckan_group_package_show, \ ckan_package_search_from_organization -from source_collectors.ckan.scrape_ckan_data_portals import perform_search, get_flat_list, deduplicate_entries, \ +from src.source_collectors.ckan.scrape_ckan_data_portals import perform_search, get_flat_list, deduplicate_entries, \ get_collections, filter_result, parse_result -from util.helper_functions import base_model_list_dump +from src.util.helper_functions import base_model_list_dump SEARCH_FUNCTION_MAPPINGS = { "package_search": ckan_package_search, diff --git a/source_collectors/ckan/DTOs.py b/src/source_collectors/ckan/DTOs.py similarity index 100% rename from source_collectors/ckan/DTOs.py rename to src/source_collectors/ckan/DTOs.py diff --git a/source_collectors/ckan/README.md b/src/source_collectors/ckan/README.md similarity index 100% rename from source_collectors/ckan/README.md rename to src/source_collectors/ckan/README.md diff --git a/source_collectors/common_crawler/__init__.py b/src/source_collectors/ckan/__init__.py similarity index 100% rename from source_collectors/common_crawler/__init__.py rename to src/source_collectors/ckan/__init__.py diff --git a/source_collectors/ckan/ckan_scraper_toolkit.py b/src/source_collectors/ckan/ckan_scraper_toolkit.py similarity index 99% rename from source_collectors/ckan/ckan_scraper_toolkit.py rename to src/source_collectors/ckan/ckan_scraper_toolkit.py index 641dec2a..2dca5e51 100644 --- a/source_collectors/ckan/ckan_scraper_toolkit.py +++ b/src/source_collectors/ckan/ckan_scraper_toolkit.py @@ -10,7 +10,7 @@ import aiohttp from bs4 import BeautifulSoup, ResultSet, Tag -from source_collectors.ckan.CKANAPIInterface import CKANAPIInterface +from src.source_collectors.ckan.CKANAPIInterface import CKANAPIInterface @dataclass diff --git a/source_collectors/ckan/constants.py b/src/source_collectors/ckan/constants.py similarity index 100% rename from source_collectors/ckan/constants.py rename to src/source_collectors/ckan/constants.py diff --git a/source_collectors/ckan/scrape_ckan_data_portals.py b/src/source_collectors/ckan/scrape_ckan_data_portals.py similarity index 97% rename from source_collectors/ckan/scrape_ckan_data_portals.py rename to src/source_collectors/ckan/scrape_ckan_data_portals.py index 3a292b02..48c810f8 100644 --- a/source_collectors/ckan/scrape_ckan_data_portals.py +++ b/src/source_collectors/ckan/scrape_ckan_data_portals.py @@ -7,8 +7,8 @@ from from_root import from_root from tqdm import tqdm -from source_collectors.ckan.ckan_scraper_toolkit import Package, ckan_collection_search -from source_collectors.ckan.constants import CKAN_DATA_TYPES, CKAN_TYPE_CONVERSION_MAPPING +from src.source_collectors.ckan.ckan_scraper_toolkit import Package, ckan_collection_search +from src.source_collectors.ckan.constants import CKAN_DATA_TYPES, CKAN_TYPE_CONVERSION_MAPPING p = from_root(".pydocstyle").parent sys.path.insert(1, str(p)) diff --git a/source_collectors/ckan/search_terms.py b/src/source_collectors/ckan/search_terms.py similarity index 100% rename from source_collectors/ckan/search_terms.py rename to src/source_collectors/ckan/search_terms.py diff --git a/source_collectors/common_crawler/CommonCrawler.py b/src/source_collectors/common_crawler/CommonCrawler.py similarity index 98% rename from source_collectors/common_crawler/CommonCrawler.py rename to src/source_collectors/common_crawler/CommonCrawler.py index db683611..64649b77 100644 --- a/source_collectors/common_crawler/CommonCrawler.py +++ b/src/source_collectors/common_crawler/CommonCrawler.py @@ -1,4 +1,3 @@ -import asyncio import json import time from http import HTTPStatus @@ -7,7 +6,7 @@ import aiohttp -from source_collectors.common_crawler.utils import URLWithParameters +from src.source_collectors.common_crawler.utils import URLWithParameters async def async_make_request( search_url: 'URLWithParameters' diff --git a/source_collectors/common_crawler/CommonCrawlerCollector.py b/src/source_collectors/common_crawler/CommonCrawlerCollector.py similarity index 64% rename from source_collectors/common_crawler/CommonCrawlerCollector.py rename to src/source_collectors/common_crawler/CommonCrawlerCollector.py index eb28d545..571a847e 100644 --- a/source_collectors/common_crawler/CommonCrawlerCollector.py +++ b/src/source_collectors/common_crawler/CommonCrawlerCollector.py @@ -1,8 +1,8 @@ -from collector_manager.AsyncCollectorBase import AsyncCollectorBase -from collector_manager.enums import CollectorType -from core.preprocessors.CommonCrawlerPreprocessor import CommonCrawlerPreprocessor -from source_collectors.common_crawler.CommonCrawler import CommonCrawler -from source_collectors.common_crawler.DTOs import CommonCrawlerInputDTO +from src.collector_manager.AsyncCollectorBase import AsyncCollectorBase +from src.collector_manager.enums import CollectorType +from src.core.preprocessors.CommonCrawlerPreprocessor import CommonCrawlerPreprocessor +from src.source_collectors.common_crawler.CommonCrawler import CommonCrawler +from src.source_collectors.common_crawler.DTOs import CommonCrawlerInputDTO class CommonCrawlerCollector(AsyncCollectorBase): diff --git a/source_collectors/common_crawler/DTOs.py b/src/source_collectors/common_crawler/DTOs.py similarity index 100% rename from source_collectors/common_crawler/DTOs.py rename to src/source_collectors/common_crawler/DTOs.py diff --git a/source_collectors/helpers/__init__.py b/src/source_collectors/common_crawler/__init__.py similarity index 100% rename from source_collectors/helpers/__init__.py rename to src/source_collectors/common_crawler/__init__.py diff --git a/source_collectors/common_crawler/crawler.py b/src/source_collectors/common_crawler/crawler.py similarity index 100% rename from source_collectors/common_crawler/crawler.py rename to src/source_collectors/common_crawler/crawler.py diff --git a/source_collectors/common_crawler/utils.py b/src/source_collectors/common_crawler/utils.py similarity index 100% rename from source_collectors/common_crawler/utils.py rename to src/source_collectors/common_crawler/utils.py diff --git a/source_collectors/helpers/RequestManager.py b/src/source_collectors/helpers/RequestManager.py similarity index 100% rename from source_collectors/helpers/RequestManager.py rename to src/source_collectors/helpers/RequestManager.py diff --git a/source_collectors/muckrock/__init__.py b/src/source_collectors/helpers/__init__.py similarity index 100% rename from source_collectors/muckrock/__init__.py rename to src/source_collectors/helpers/__init__.py diff --git a/source_collectors/muckrock/.gitignore b/src/source_collectors/muckrock/.gitignore similarity index 100% rename from source_collectors/muckrock/.gitignore rename to src/source_collectors/muckrock/.gitignore diff --git a/source_collectors/muckrock/DTOs.py b/src/source_collectors/muckrock/DTOs.py similarity index 100% rename from source_collectors/muckrock/DTOs.py rename to src/source_collectors/muckrock/DTOs.py diff --git a/source_collectors/muckrock/MuckrockAPIInterface.py b/src/source_collectors/muckrock/MuckrockAPIInterface.py similarity index 100% rename from source_collectors/muckrock/MuckrockAPIInterface.py rename to src/source_collectors/muckrock/MuckrockAPIInterface.py diff --git a/source_collectors/muckrock/README.md b/src/source_collectors/muckrock/README.md similarity index 100% rename from source_collectors/muckrock/README.md rename to src/source_collectors/muckrock/README.md diff --git a/source_collectors/muckrock/classes/__init__.py b/src/source_collectors/muckrock/__init__.py similarity index 100% rename from source_collectors/muckrock/classes/__init__.py rename to src/source_collectors/muckrock/__init__.py diff --git a/source_collectors/muckrock/allegheny-county-towns.txt b/src/source_collectors/muckrock/allegheny-county-towns.txt similarity index 100% rename from source_collectors/muckrock/allegheny-county-towns.txt rename to src/source_collectors/muckrock/allegheny-county-towns.txt diff --git a/source_collectors/muckrock/classes/FOIASearcher.py b/src/source_collectors/muckrock/classes/FOIASearcher.py similarity index 96% rename from source_collectors/muckrock/classes/FOIASearcher.py rename to src/source_collectors/muckrock/classes/FOIASearcher.py index cb3af7e8..a6cde337 100644 --- a/source_collectors/muckrock/classes/FOIASearcher.py +++ b/src/source_collectors/muckrock/classes/FOIASearcher.py @@ -2,7 +2,7 @@ from tqdm import tqdm -from source_collectors.muckrock.classes.muckrock_fetchers import FOIAFetcher +from src.source_collectors.muckrock.classes.muckrock_fetchers import FOIAFetcher class SearchCompleteException(Exception): diff --git a/source_collectors/muckrock/classes/MuckrockCollector.py b/src/source_collectors/muckrock/classes/MuckrockCollector.py similarity index 83% rename from source_collectors/muckrock/classes/MuckrockCollector.py rename to src/source_collectors/muckrock/classes/MuckrockCollector.py index 0511a21d..38a52af8 100644 --- a/source_collectors/muckrock/classes/MuckrockCollector.py +++ b/src/source_collectors/muckrock/classes/MuckrockCollector.py @@ -1,18 +1,18 @@ import itertools -from collector_manager.AsyncCollectorBase import AsyncCollectorBase -from collector_manager.enums import CollectorType -from core.preprocessors.MuckrockPreprocessor import MuckrockPreprocessor -from source_collectors.muckrock.DTOs import MuckrockAllFOIARequestsCollectorInputDTO, \ +from src.collector_manager.AsyncCollectorBase import AsyncCollectorBase +from src.collector_manager.enums import CollectorType +from src.core.preprocessors.MuckrockPreprocessor import MuckrockPreprocessor +from src.source_collectors.muckrock.DTOs import MuckrockAllFOIARequestsCollectorInputDTO, \ MuckrockCountySearchCollectorInputDTO, MuckrockSimpleSearchCollectorInputDTO -from source_collectors.muckrock.classes.FOIASearcher import FOIASearcher, SearchCompleteException -from source_collectors.muckrock.classes.fetch_requests.FOIALoopFetchRequest import FOIALoopFetchRequest -from source_collectors.muckrock.classes.fetch_requests.JurisdictionLoopFetchRequest import JurisdictionLoopFetchRequest -from source_collectors.muckrock.classes.muckrock_fetchers.FOIAFetcher import FOIAFetcher -from source_collectors.muckrock.classes.muckrock_fetchers.FOIALoopFetcher import FOIALoopFetcher -from source_collectors.muckrock.classes.muckrock_fetchers.JurisdictionGeneratorFetcher import \ +from src.source_collectors.muckrock.classes.FOIASearcher import FOIASearcher, SearchCompleteException +from src.source_collectors.muckrock.classes.fetch_requests.FOIALoopFetchRequest import FOIALoopFetchRequest +from src.source_collectors.muckrock.classes.fetch_requests.JurisdictionLoopFetchRequest import JurisdictionLoopFetchRequest +from src.source_collectors.muckrock.classes.muckrock_fetchers.FOIAFetcher import FOIAFetcher +from src.source_collectors.muckrock.classes.muckrock_fetchers.FOIALoopFetcher import FOIALoopFetcher +from src.source_collectors.muckrock.classes.muckrock_fetchers.JurisdictionGeneratorFetcher import \ JurisdictionGeneratorFetcher -from source_collectors.muckrock.classes.muckrock_fetchers.MuckrockFetcher import MuckrockNoMoreDataError +from src.source_collectors.muckrock.classes.muckrock_fetchers.MuckrockFetcher import MuckrockNoMoreDataError class MuckrockSimpleSearchCollector(AsyncCollectorBase): diff --git a/source_collectors/muckrock/classes/exceptions/__init__.py b/src/source_collectors/muckrock/classes/__init__.py similarity index 100% rename from source_collectors/muckrock/classes/exceptions/__init__.py rename to src/source_collectors/muckrock/classes/__init__.py diff --git a/source_collectors/muckrock/classes/exceptions/RequestFailureException.py b/src/source_collectors/muckrock/classes/exceptions/RequestFailureException.py similarity index 100% rename from source_collectors/muckrock/classes/exceptions/RequestFailureException.py rename to src/source_collectors/muckrock/classes/exceptions/RequestFailureException.py diff --git a/source_collectors/muckrock/classes/fetch_requests/__init__.py b/src/source_collectors/muckrock/classes/exceptions/__init__.py similarity index 100% rename from source_collectors/muckrock/classes/fetch_requests/__init__.py rename to src/source_collectors/muckrock/classes/exceptions/__init__.py diff --git a/src/source_collectors/muckrock/classes/fetch_requests/FOIALoopFetchRequest.py b/src/source_collectors/muckrock/classes/fetch_requests/FOIALoopFetchRequest.py new file mode 100644 index 00000000..be008edf --- /dev/null +++ b/src/source_collectors/muckrock/classes/fetch_requests/FOIALoopFetchRequest.py @@ -0,0 +1,5 @@ +from src.source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest + + +class FOIALoopFetchRequest(FetchRequest): + jurisdiction: int diff --git a/source_collectors/muckrock/classes/fetch_requests/FetchRequestBase.py b/src/source_collectors/muckrock/classes/fetch_requests/FetchRequestBase.py similarity index 100% rename from source_collectors/muckrock/classes/fetch_requests/FetchRequestBase.py rename to src/source_collectors/muckrock/classes/fetch_requests/FetchRequestBase.py diff --git a/source_collectors/muckrock/classes/fetch_requests/JurisdictionLoopFetchRequest.py b/src/source_collectors/muckrock/classes/fetch_requests/JurisdictionLoopFetchRequest.py similarity index 52% rename from source_collectors/muckrock/classes/fetch_requests/JurisdictionLoopFetchRequest.py rename to src/source_collectors/muckrock/classes/fetch_requests/JurisdictionLoopFetchRequest.py index 5941fa4a..7adfbdd4 100644 --- a/source_collectors/muckrock/classes/fetch_requests/JurisdictionLoopFetchRequest.py +++ b/src/source_collectors/muckrock/classes/fetch_requests/JurisdictionLoopFetchRequest.py @@ -1,4 +1,4 @@ -from source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest +from src.source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest class JurisdictionLoopFetchRequest(FetchRequest): diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/__init__.py b/src/source_collectors/muckrock/classes/fetch_requests/__init__.py similarity index 100% rename from source_collectors/muckrock/classes/muckrock_fetchers/__init__.py rename to src/source_collectors/muckrock/classes/fetch_requests/__init__.py diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/AgencyFetcher.py b/src/source_collectors/muckrock/classes/muckrock_fetchers/AgencyFetcher.py similarity index 56% rename from source_collectors/muckrock/classes/muckrock_fetchers/AgencyFetcher.py rename to src/source_collectors/muckrock/classes/muckrock_fetchers/AgencyFetcher.py index e73180df..abb59c6d 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/AgencyFetcher.py +++ b/src/source_collectors/muckrock/classes/muckrock_fetchers/AgencyFetcher.py @@ -1,6 +1,6 @@ -from source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest -from source_collectors.muckrock.classes.muckrock_fetchers.MuckrockFetcher import MuckrockFetcher -from source_collectors.muckrock.constants import BASE_MUCKROCK_URL +from src.source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest +from src.source_collectors.muckrock.classes.muckrock_fetchers.MuckrockFetcher import MuckrockFetcher +from src.source_collectors.muckrock.constants import BASE_MUCKROCK_URL class AgencyFetchRequest(FetchRequest): diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetchManager.py b/src/source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetchManager.py similarity index 74% rename from source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetchManager.py rename to src/source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetchManager.py index 0a405596..1b843efd 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetchManager.py +++ b/src/source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetchManager.py @@ -1,5 +1,5 @@ -from source_collectors.muckrock.classes.fetch_requests.FOIALoopFetchRequest import FOIALoopFetchRequest -from source_collectors.muckrock.constants import BASE_MUCKROCK_URL +from src.source_collectors.muckrock.classes.fetch_requests.FOIALoopFetchRequest import FOIALoopFetchRequest +from src.source_collectors.muckrock.constants import BASE_MUCKROCK_URL class FOIAFetchManager: diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetcher.py b/src/source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetcher.py similarity index 81% rename from source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetcher.py rename to src/source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetcher.py index 3a057864..5113665c 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetcher.py +++ b/src/source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetcher.py @@ -1,6 +1,6 @@ -from source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest -from source_collectors.muckrock.classes.muckrock_fetchers.MuckrockFetcher import MuckrockFetcher -from source_collectors.muckrock.constants import BASE_MUCKROCK_URL +from src.source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest +from src.source_collectors.muckrock.classes.muckrock_fetchers.MuckrockFetcher import MuckrockFetcher +from src.source_collectors.muckrock.constants import BASE_MUCKROCK_URL FOIA_BASE_URL = f"{BASE_MUCKROCK_URL}/foia" diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/FOIAGeneratorFetcher.py b/src/source_collectors/muckrock/classes/muckrock_fetchers/FOIAGeneratorFetcher.py similarity index 59% rename from source_collectors/muckrock/classes/muckrock_fetchers/FOIAGeneratorFetcher.py rename to src/source_collectors/muckrock/classes/muckrock_fetchers/FOIAGeneratorFetcher.py index 8fc971c6..952ab03e 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/FOIAGeneratorFetcher.py +++ b/src/source_collectors/muckrock/classes/muckrock_fetchers/FOIAGeneratorFetcher.py @@ -1,6 +1,6 @@ -from source_collectors.muckrock.classes.fetch_requests.FOIALoopFetchRequest import FOIALoopFetchRequest -from source_collectors.muckrock.classes.muckrock_fetchers.FOIAFetchManager import FOIAFetchManager -from source_collectors.muckrock.classes.muckrock_fetchers.MuckrockNextFetcher import MuckrockGeneratorFetcher +from src.source_collectors.muckrock.classes.fetch_requests.FOIALoopFetchRequest import FOIALoopFetchRequest +from src.source_collectors.muckrock.classes.muckrock_fetchers.FOIAFetchManager import FOIAFetchManager +from src.source_collectors.muckrock.classes.muckrock_fetchers.MuckrockNextFetcher import MuckrockGeneratorFetcher class FOIAGeneratorFetcher(MuckrockGeneratorFetcher): diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/FOIALoopFetcher.py b/src/source_collectors/muckrock/classes/muckrock_fetchers/FOIALoopFetcher.py similarity index 65% rename from source_collectors/muckrock/classes/muckrock_fetchers/FOIALoopFetcher.py rename to src/source_collectors/muckrock/classes/muckrock_fetchers/FOIALoopFetcher.py index d1bed9e9..31ce7e1e 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/FOIALoopFetcher.py +++ b/src/source_collectors/muckrock/classes/muckrock_fetchers/FOIALoopFetcher.py @@ -1,8 +1,8 @@ from datasets import tqdm -from source_collectors.muckrock.classes.fetch_requests.FOIALoopFetchRequest import FOIALoopFetchRequest -from source_collectors.muckrock.classes.muckrock_fetchers.FOIAFetchManager import FOIAFetchManager -from source_collectors.muckrock.classes.muckrock_fetchers.MuckrockLoopFetcher import MuckrockLoopFetcher +from src.source_collectors.muckrock.classes.fetch_requests.FOIALoopFetchRequest import FOIALoopFetchRequest +from src.source_collectors.muckrock.classes.muckrock_fetchers.FOIAFetchManager import FOIAFetchManager +from src.source_collectors.muckrock.classes.muckrock_fetchers.MuckrockLoopFetcher import MuckrockLoopFetcher class FOIALoopFetcher(MuckrockLoopFetcher): diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionByIDFetcher.py b/src/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionByIDFetcher.py similarity index 62% rename from source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionByIDFetcher.py rename to src/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionByIDFetcher.py index 08db97dd..0f29b9d8 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionByIDFetcher.py +++ b/src/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionByIDFetcher.py @@ -1,6 +1,6 @@ -from source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest -from source_collectors.muckrock.classes.muckrock_fetchers.MuckrockFetcher import MuckrockFetcher -from source_collectors.muckrock.constants import BASE_MUCKROCK_URL +from src.source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest +from src.source_collectors.muckrock.classes.muckrock_fetchers.MuckrockFetcher import MuckrockFetcher +from src.source_collectors.muckrock.constants import BASE_MUCKROCK_URL class JurisdictionByIDFetchRequest(FetchRequest): diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionFetchManager.py b/src/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionFetchManager.py similarity index 80% rename from source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionFetchManager.py rename to src/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionFetchManager.py index f1145921..2b789461 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionFetchManager.py +++ b/src/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionFetchManager.py @@ -1,5 +1,5 @@ -from source_collectors.muckrock.classes.fetch_requests.JurisdictionLoopFetchRequest import JurisdictionLoopFetchRequest -from source_collectors.muckrock.constants import BASE_MUCKROCK_URL +from src.source_collectors.muckrock.classes.fetch_requests.JurisdictionLoopFetchRequest import JurisdictionLoopFetchRequest +from src.source_collectors.muckrock.constants import BASE_MUCKROCK_URL class JurisdictionFetchManager: diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionGeneratorFetcher.py b/src/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionGeneratorFetcher.py similarity index 57% rename from source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionGeneratorFetcher.py rename to src/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionGeneratorFetcher.py index 4cc2343d..8463e90b 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionGeneratorFetcher.py +++ b/src/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionGeneratorFetcher.py @@ -1,6 +1,6 @@ -from source_collectors.muckrock.classes.fetch_requests.JurisdictionLoopFetchRequest import JurisdictionLoopFetchRequest -from source_collectors.muckrock.classes.muckrock_fetchers.JurisdictionFetchManager import JurisdictionFetchManager -from source_collectors.muckrock.classes.muckrock_fetchers.MuckrockNextFetcher import MuckrockGeneratorFetcher +from src.source_collectors.muckrock.classes.fetch_requests.JurisdictionLoopFetchRequest import JurisdictionLoopFetchRequest +from src.source_collectors.muckrock.classes.muckrock_fetchers.JurisdictionFetchManager import JurisdictionFetchManager +from src.source_collectors.muckrock.classes.muckrock_fetchers.MuckrockNextFetcher import MuckrockGeneratorFetcher class JurisdictionGeneratorFetcher(MuckrockGeneratorFetcher): diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionLoopFetcher.py b/src/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionLoopFetcher.py similarity index 77% rename from source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionLoopFetcher.py rename to src/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionLoopFetcher.py index 3cf05359..9cd94d85 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionLoopFetcher.py +++ b/src/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionLoopFetcher.py @@ -1,8 +1,8 @@ from tqdm import tqdm -from source_collectors.muckrock.classes.fetch_requests.JurisdictionLoopFetchRequest import JurisdictionLoopFetchRequest -from source_collectors.muckrock.classes.muckrock_fetchers.JurisdictionFetchManager import JurisdictionFetchManager -from source_collectors.muckrock.classes.muckrock_fetchers.MuckrockLoopFetcher import MuckrockLoopFetcher +from src.source_collectors.muckrock.classes.fetch_requests.JurisdictionLoopFetchRequest import JurisdictionLoopFetchRequest +from src.source_collectors.muckrock.classes.muckrock_fetchers.JurisdictionFetchManager import JurisdictionFetchManager +from src.source_collectors.muckrock.classes.muckrock_fetchers.MuckrockLoopFetcher import MuckrockLoopFetcher class JurisdictionLoopFetcher(MuckrockLoopFetcher): diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockFetcher.py b/src/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockFetcher.py similarity index 91% rename from source_collectors/muckrock/classes/muckrock_fetchers/MuckrockFetcher.py rename to src/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockFetcher.py index c1a6eecb..57ef54bc 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockFetcher.py +++ b/src/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockFetcher.py @@ -1,11 +1,10 @@ import abc -import asyncio from abc import ABC import requests import aiohttp -from source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest +from src.source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest class MuckrockNoMoreDataError(Exception): diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockIterFetcherBase.py b/src/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockIterFetcherBase.py similarity index 81% rename from source_collectors/muckrock/classes/muckrock_fetchers/MuckrockIterFetcherBase.py rename to src/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockIterFetcherBase.py index 67253034..e8416a92 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockIterFetcherBase.py +++ b/src/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockIterFetcherBase.py @@ -1,11 +1,10 @@ -import asyncio from abc import ABC, abstractmethod import aiohttp import requests -from source_collectors.muckrock.classes.exceptions.RequestFailureException import RequestFailureException -from source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest +from src.source_collectors.muckrock.classes.exceptions.RequestFailureException import RequestFailureException +from src.source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest class MuckrockIterFetcherBase(ABC): diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockLoopFetcher.py b/src/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockLoopFetcher.py similarity index 76% rename from source_collectors/muckrock/classes/muckrock_fetchers/MuckrockLoopFetcher.py rename to src/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockLoopFetcher.py index 2e4814a5..1573572d 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockLoopFetcher.py +++ b/src/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockLoopFetcher.py @@ -1,8 +1,8 @@ from abc import abstractmethod from time import sleep -from source_collectors.muckrock.classes.exceptions.RequestFailureException import RequestFailureException -from source_collectors.muckrock.classes.muckrock_fetchers.MuckrockIterFetcherBase import MuckrockIterFetcherBase +from src.source_collectors.muckrock.classes.exceptions.RequestFailureException import RequestFailureException +from src.source_collectors.muckrock.classes.muckrock_fetchers.MuckrockIterFetcherBase import MuckrockIterFetcherBase class MuckrockLoopFetcher(MuckrockIterFetcherBase): diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockNextFetcher.py b/src/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockNextFetcher.py similarity index 77% rename from source_collectors/muckrock/classes/muckrock_fetchers/MuckrockNextFetcher.py rename to src/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockNextFetcher.py index 889e8446..da4c3a8b 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockNextFetcher.py +++ b/src/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockNextFetcher.py @@ -1,5 +1,5 @@ -from source_collectors.muckrock.classes.exceptions.RequestFailureException import RequestFailureException -from source_collectors.muckrock.classes.muckrock_fetchers.MuckrockIterFetcherBase import MuckrockIterFetcherBase +from src.source_collectors.muckrock.classes.exceptions.RequestFailureException import RequestFailureException +from src.source_collectors.muckrock.classes.muckrock_fetchers.MuckrockIterFetcherBase import MuckrockIterFetcherBase class MuckrockGeneratorFetcher(MuckrockIterFetcherBase): diff --git a/util/__init__.py b/src/source_collectors/muckrock/classes/muckrock_fetchers/__init__.py similarity index 100% rename from util/__init__.py rename to src/source_collectors/muckrock/classes/muckrock_fetchers/__init__.py diff --git a/source_collectors/muckrock/constants.py b/src/source_collectors/muckrock/constants.py similarity index 100% rename from source_collectors/muckrock/constants.py rename to src/source_collectors/muckrock/constants.py diff --git a/source_collectors/muckrock/generate_detailed_muckrock_csv.py b/src/source_collectors/muckrock/generate_detailed_muckrock_csv.py similarity index 96% rename from source_collectors/muckrock/generate_detailed_muckrock_csv.py rename to src/source_collectors/muckrock/generate_detailed_muckrock_csv.py index 94e0034f..d654d1df 100644 --- a/source_collectors/muckrock/generate_detailed_muckrock_csv.py +++ b/src/source_collectors/muckrock/generate_detailed_muckrock_csv.py @@ -12,8 +12,8 @@ from pydantic import BaseModel -from source_collectors.muckrock.classes.muckrock_fetchers import AgencyFetcher -from source_collectors.muckrock.classes.muckrock_fetchers.JurisdictionByIDFetcher import JurisdictionByIDFetcher +from src.source_collectors.muckrock.classes.muckrock_fetchers import AgencyFetcher +from src.source_collectors.muckrock.classes.muckrock_fetchers.JurisdictionByIDFetcher import JurisdictionByIDFetcher from utils import format_filename_json_to_csv, load_json_file diff --git a/source_collectors/muckrock/schemas.py b/src/source_collectors/muckrock/schemas.py similarity index 100% rename from source_collectors/muckrock/schemas.py rename to src/source_collectors/muckrock/schemas.py diff --git a/source_collectors/muckrock/utils.py b/src/source_collectors/muckrock/utils.py similarity index 100% rename from source_collectors/muckrock/utils.py rename to src/source_collectors/muckrock/utils.py diff --git a/src/util/__init__.py b/src/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/util/alembic_helpers.py b/src/util/alembic_helpers.py similarity index 100% rename from util/alembic_helpers.py rename to src/util/alembic_helpers.py diff --git a/util/db_manager.py b/src/util/db_manager.py similarity index 100% rename from util/db_manager.py rename to src/util/db_manager.py diff --git a/util/helper_functions.py b/src/util/helper_functions.py similarity index 100% rename from util/helper_functions.py rename to src/util/helper_functions.py diff --git a/util/miscellaneous_functions.py b/src/util/miscellaneous_functions.py similarity index 100% rename from util/miscellaneous_functions.py rename to src/util/miscellaneous_functions.py diff --git a/tests/alembic/conftest.py b/tests/alembic/conftest.py index 42a04a6b..83e55c97 100644 --- a/tests/alembic/conftest.py +++ b/tests/alembic/conftest.py @@ -3,7 +3,7 @@ from sqlalchemy import create_engine, inspect, MetaData from sqlalchemy.orm import scoped_session, sessionmaker -from db.helper_functions import get_postgres_connection_string +from src.db.helper_functions import get_postgres_connection_string from tests.helpers.AlembicRunner import AlembicRunner diff --git a/tests/automated/integration/api/conftest.py b/tests/automated/integration/api/conftest.py index c709d202..dab293db 100644 --- a/tests/automated/integration/api/conftest.py +++ b/tests/automated/integration/api/conftest.py @@ -1,19 +1,19 @@ import asyncio from dataclasses import dataclass from typing import Generator -from unittest.mock import MagicMock, AsyncMock +from unittest.mock import AsyncMock import pytest import pytest_asyncio from starlette.testclient import TestClient -from api.main import app -from core.AsyncCore import AsyncCore -from api.routes.review import requires_final_review_permission -from core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse -from core.SourceCollectorCore import SourceCollectorCore -from core.enums import BatchStatus -from security_manager.SecurityManager import get_access_info, AccessInfo, Permissions, require_permission +from src.api.main import app +from src.api.routes.review import requires_final_review_permission +from src.core.AsyncCore import AsyncCore +from src.core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse +from src.core.SourceCollectorCore import SourceCollectorCore +from src.core.enums import BatchStatus +from src.security_manager.SecurityManager import get_access_info, AccessInfo, Permissions from tests.helpers.DBDataCreator import DBDataCreator from tests.automated.integration.api.helpers.RequestValidator import RequestValidator diff --git a/tests/automated/integration/api/helpers/RequestValidator.py b/tests/automated/integration/api/helpers/RequestValidator.py index 5fabd69b..145235b4 100644 --- a/tests/automated/integration/api/helpers/RequestValidator.py +++ b/tests/automated/integration/api/helpers/RequestValidator.py @@ -5,40 +5,40 @@ from pydantic import BaseModel from starlette.testclient import TestClient -from db.DTOs.BatchInfo import BatchInfo -from db.DTOs.GetTaskStatusResponseInfo import GetTaskStatusResponseInfo -from db.DTOs.TaskInfo import TaskInfo -from db.enums import TaskType -from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO -from collector_manager.enums import CollectorType -from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo -from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewBaseInfo, FinalReviewRejectionInfo -from core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse -from core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse -from core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse -from core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO -from core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO -from core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO -from core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO -from core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO -from core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO -from core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo -from core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo -from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \ +from src.db.DTOs.BatchInfo import BatchInfo +from src.db.DTOs.GetTaskStatusResponseInfo import GetTaskStatusResponseInfo +from src.db.DTOs.TaskInfo import TaskInfo +from src.db.enums import TaskType +from src.collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO +from src.collector_manager.enums import CollectorType +from src.core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo +from src.core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewRejectionInfo +from src.core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse +from src.core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse +from src.core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse +from src.core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO +from src.core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO +from src.core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO +from src.core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO +from src.core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO +from src.core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO +from src.core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo +from src.core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo +from src.core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \ URLAgencyAnnotationPostInfo -from core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse -from core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewOuterResponse -from core.DTOs.GetTasksResponse import GetTasksResponse -from core.DTOs.GetURLsByBatchResponse import GetURLsByBatchResponse -from core.DTOs.GetURLsResponseInfo import GetURLsResponseInfo -from core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO -from core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO -from core.DTOs.MessageResponse import MessageResponse -from core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo -from core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo -from core.DTOs.SearchURLResponse import SearchURLResponse -from core.enums import BatchStatus -from util.helper_functions import update_if_not_none +from src.core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse +from src.core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewOuterResponse +from src.core.DTOs.GetTasksResponse import GetTasksResponse +from src.core.DTOs.GetURLsByBatchResponse import GetURLsByBatchResponse +from src.core.DTOs.GetURLsResponseInfo import GetURLsResponseInfo +from src.core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO +from src.core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO +from src.core.DTOs.MessageResponse import MessageResponse +from src.core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo +from src.core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo +from src.core.DTOs.SearchURLResponse import SearchURLResponse +from src.core.enums import BatchStatus +from src.util.helper_functions import update_if_not_none class ExpectedResponseInfo(BaseModel): diff --git a/tests/automated/integration/api/test_annotate.py b/tests/automated/integration/api/test_annotate.py index a3344b68..89c695f1 100644 --- a/tests/automated/integration/api/test_annotate.py +++ b/tests/automated/integration/api/test_annotate.py @@ -3,21 +3,21 @@ import pytest from fastapi import HTTPException -from db.DTOs.InsertURLsInfo import InsertURLsInfo -from db.DTOs.URLMapping import URLMapping -from db.models import UserUrlAgencySuggestion, UserRelevantSuggestion, UserRecordTypeSuggestion -from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo -from core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo -from core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo -from core.DTOs.GetNextURLForAgencyAnnotationResponse import URLAgencyAnnotationPostInfo -from core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo -from core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo -from core.classes.ErrorManager import ErrorTypes -from core.enums import RecordType, SuggestionType, SuggestedStatus -from core.exceptions import FailedValidationException +from src.db.DTOs.InsertURLsInfo import InsertURLsInfo +from src.db.DTOs.URLMapping import URLMapping +from src.db.models import UserUrlAgencySuggestion, UserRelevantSuggestion, UserRecordTypeSuggestion +from src.core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo +from src.core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo +from src.core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo +from src.core.DTOs.GetNextURLForAgencyAnnotationResponse import URLAgencyAnnotationPostInfo +from src.core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo +from src.core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo +from src.core.classes.ErrorManager import ErrorTypes +from src.core.enums import RecordType, SuggestionType, SuggestedStatus +from src.core.exceptions import FailedValidationException +from src.html_tag_collector.DataClassTags import ResponseHTMLInfo from tests.helpers.complex_test_data_functions import AnnotateAgencySetupInfo, setup_for_annotate_agency, \ setup_for_get_next_url_for_final_review -from html_tag_collector.DataClassTags import ResponseHTMLInfo from tests.helpers.DBDataCreator import BatchURLCreationInfo from tests.automated.integration.api.conftest import MOCK_USER_ID diff --git a/tests/automated/integration/api/test_batch.py b/tests/automated/integration/api/test_batch.py index 2f7e2ebb..082f932b 100644 --- a/tests/automated/integration/api/test_batch.py +++ b/tests/automated/integration/api/test_batch.py @@ -1,13 +1,10 @@ -import asyncio -import time - import pytest -from db.DTOs.BatchInfo import BatchInfo -from db.DTOs.InsertURLsInfo import InsertURLsInfo -from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO -from collector_manager.enums import CollectorType, URLStatus -from core.enums import BatchStatus +from src.db.DTOs.BatchInfo import BatchInfo +from src.db.DTOs.InsertURLsInfo import InsertURLsInfo +from src.collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO +from src.collector_manager.enums import CollectorType, URLStatus +from src.core.enums import BatchStatus @pytest.mark.asyncio async def test_get_batch_status_pending_url_filter(api_test_helper): diff --git a/tests/automated/integration/api/test_duplicates.py b/tests/automated/integration/api/test_duplicates.py index 654a9c65..e96588d4 100644 --- a/tests/automated/integration/api/test_duplicates.py +++ b/tests/automated/integration/api/test_duplicates.py @@ -1,10 +1,7 @@ -import asyncio -import time - import pytest -from db.DTOs.BatchInfo import BatchInfo -from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO +from src.db.DTOs.BatchInfo import BatchInfo +from src.collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO from tests.automated.integration.api.conftest import disable_task_trigger diff --git a/tests/automated/integration/api/test_example_collector.py b/tests/automated/integration/api/test_example_collector.py index 83d1ad6d..fbc77005 100644 --- a/tests/automated/integration/api/test_example_collector.py +++ b/tests/automated/integration/api/test_example_collector.py @@ -3,16 +3,16 @@ import pytest -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.BatchInfo import BatchInfo -from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO -from collector_manager.ExampleCollector import ExampleCollector -from collector_manager.enums import CollectorType -from core.AsyncCoreLogger import AsyncCoreLogger -from core.DTOs.BatchStatusInfo import BatchStatusInfo -from core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse -from core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse -from core.enums import BatchStatus +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.BatchInfo import BatchInfo +from src.collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO +from src.collector_manager.ExampleCollector import ExampleCollector +from src.collector_manager.enums import CollectorType +from src.core.AsyncCoreLogger import AsyncCoreLogger +from src.core.DTOs.BatchStatusInfo import BatchStatusInfo +from src.core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse +from src.core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse +from src.core.enums import BatchStatus from tests.helpers.patch_functions import block_sleep from tests.automated.integration.api.conftest import disable_task_trigger diff --git a/tests/automated/integration/api/test_manual_batch.py b/tests/automated/integration/api/test_manual_batch.py index 1c0a2ecc..85a8cdec 100644 --- a/tests/automated/integration/api/test_manual_batch.py +++ b/tests/automated/integration/api/test_manual_batch.py @@ -1,10 +1,10 @@ import pytest -from db.models import Batch, URL, URLOptionalDataSourceMetadata -from collector_manager.enums import CollectorType -from core.DTOs.ManualBatchInputDTO import ManualBatchInnerInputDTO, ManualBatchInputDTO -from core.enums import RecordType +from src.db.models import Batch, URL, URLOptionalDataSourceMetadata +from src.collector_manager.enums import CollectorType +from src.core.DTOs.ManualBatchInputDTO import ManualBatchInnerInputDTO, ManualBatchInputDTO +from src.core.enums import RecordType @pytest.mark.asyncio diff --git a/tests/automated/integration/api/test_metrics.py b/tests/automated/integration/api/test_metrics.py index 7d0fadfc..16611b0e 100644 --- a/tests/automated/integration/api/test_metrics.py +++ b/tests/automated/integration/api/test_metrics.py @@ -1,8 +1,8 @@ import pendulum import pytest -from collector_manager.enums import URLStatus, CollectorType -from core.enums import BatchStatus, RecordType, SuggestedStatus +from src.collector_manager.enums import URLStatus, CollectorType +from src.core.enums import BatchStatus, RecordType, SuggestedStatus from tests.helpers.test_batch_creation_parameters import TestBatchCreationParameters, TestURLCreationParameters, \ AnnotationInfo diff --git a/tests/automated/integration/api/test_review.py b/tests/automated/integration/api/test_review.py index a034b740..0e347a77 100644 --- a/tests/automated/integration/api/test_review.py +++ b/tests/automated/integration/api/test_review.py @@ -1,12 +1,12 @@ import pytest -from db.constants import PLACEHOLDER_AGENCY_NAME -from db.models import URL, URLOptionalDataSourceMetadata, ConfirmedURLAgency, Agency -from collector_manager.enums import URLStatus -from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewBaseInfo, RejectionReason, \ +from src.db.constants import PLACEHOLDER_AGENCY_NAME +from src.db.models import URL, URLOptionalDataSourceMetadata, ConfirmedURLAgency, Agency +from src.collector_manager.enums import URLStatus +from src.core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, RejectionReason, \ FinalReviewRejectionInfo -from core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewOuterResponse -from core.enums import RecordType, SuggestedStatus +from src.core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewOuterResponse +from src.core.enums import RecordType, SuggestedStatus from tests.helpers.complex_test_data_functions import setup_for_get_next_url_for_final_review diff --git a/tests/automated/integration/api/test_search.py b/tests/automated/integration/api/test_search.py index 917690fc..3252f144 100644 --- a/tests/automated/integration/api/test_search.py +++ b/tests/automated/integration/api/test_search.py @@ -1,6 +1,6 @@ import pytest -from core.DTOs.SearchURLResponse import SearchURLResponse +from src.core.DTOs.SearchURLResponse import SearchURLResponse @pytest.mark.asyncio diff --git a/tests/automated/integration/api/test_task.py b/tests/automated/integration/api/test_task.py index c13f97f9..21e662f1 100644 --- a/tests/automated/integration/api/test_task.py +++ b/tests/automated/integration/api/test_task.py @@ -1,6 +1,6 @@ import pytest -from db.enums import TaskType +from src.db.enums import TaskType from tests.automated.integration.api.conftest import APITestHelper diff --git a/tests/automated/integration/api/test_url.py b/tests/automated/integration/api/test_url.py index 9068af5e..0ec2e836 100644 --- a/tests/automated/integration/api/test_url.py +++ b/tests/automated/integration/api/test_url.py @@ -1,7 +1,7 @@ import pytest -from db.DTOs.InsertURLsInfo import InsertURLsInfo -from core.DTOs.GetURLsResponseInfo import GetURLsResponseInfo +from src.db.DTOs.InsertURLsInfo import InsertURLsInfo +from src.core.DTOs.GetURLsResponseInfo import GetURLsResponseInfo @pytest.mark.asyncio diff --git a/tests/automated/integration/collector_db/test_database_structure.py b/tests/automated/integration/collector_db/test_database_structure.py index 88b186ad..022b5502 100644 --- a/tests/automated/integration/collector_db/test_database_structure.py +++ b/tests/automated/integration/collector_db/test_database_structure.py @@ -14,17 +14,17 @@ import sqlalchemy as sa from sqlalchemy import create_engine from sqlalchemy.dialects import postgresql -from sqlalchemy.exc import DataError, DBAPIError - -from db.DTOs.InsertURLsInfo import InsertURLsInfo -from db.enums import URLHTMLContentType -from db.helper_functions import get_postgres_connection_string -from db.models import Base, Agency -from collector_manager.enums import CollectorType, URLStatus -from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo -from core.enums import BatchStatus, SuggestionType -from tests.helpers.DBDataCreator import DBDataCreator, BatchURLCreationInfo -from util.helper_functions import get_enum_values +from sqlalchemy.exc import DataError + +from src.db.DTOs.InsertURLsInfo import InsertURLsInfo +from src.db.enums import URLHTMLContentType +from src.db.helper_functions import get_postgres_connection_string +from src.db.models import Base, Agency +from src.collector_manager.enums import CollectorType, URLStatus +from src.core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from src.core.enums import BatchStatus, SuggestionType +from src.util.helper_functions import get_enum_values +from tests.helpers.DBDataCreator import DBDataCreator SATypes: TypeAlias = sa.Integer or sa.String or postgresql.ENUM or sa.TIMESTAMP or sa.Text diff --git a/tests/automated/integration/collector_db/test_db_client.py b/tests/automated/integration/collector_db/test_db_client.py index 644bd500..5f8faa05 100644 --- a/tests/automated/integration/collector_db/test_db_client.py +++ b/tests/automated/integration/collector_db/test_db_client.py @@ -3,17 +3,17 @@ import pytest from fastapi import HTTPException -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.BatchInfo import BatchInfo -from db.DTOs.LogInfo import LogInfo -from db.DTOs.URLErrorInfos import URLErrorPydanticInfo -from db.DTOs.URLInfo import URLInfo -from db.DTOs.URLMapping import URLMapping -from db.constants import PLACEHOLDER_AGENCY_NAME -from db.models import URL, ReviewingUserURL, URLOptionalDataSourceMetadata, ConfirmedURLAgency, Agency -from collector_manager.enums import URLStatus -from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo -from core.enums import BatchStatus, RecordType, SuggestionType, SuggestedStatus +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.BatchInfo import BatchInfo +from src.db.DTOs.LogInfo import LogInfo +from src.db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from src.db.DTOs.URLInfo import URLInfo +from src.db.DTOs.URLMapping import URLMapping +from src.db.constants import PLACEHOLDER_AGENCY_NAME +from src.db.models import URL, ReviewingUserURL, URLOptionalDataSourceMetadata, ConfirmedURLAgency, Agency +from src.collector_manager.enums import URLStatus +from src.core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo +from src.core.enums import BatchStatus, RecordType, SuggestionType, SuggestedStatus from tests.helpers.complex_test_data_functions import setup_for_get_next_url_for_annotation, setup_for_annotate_agency from tests.helpers.DBDataCreator import DBDataCreator from tests.helpers.complex_test_data_functions import setup_for_get_next_url_for_final_review diff --git a/tests/automated/integration/conftest.py b/tests/automated/integration/conftest.py index 3912f3e8..8aa79e36 100644 --- a/tests/automated/integration/conftest.py +++ b/tests/automated/integration/conftest.py @@ -2,11 +2,11 @@ import pytest -from db.AsyncDatabaseClient import AsyncDatabaseClient -from collector_manager.AsyncCollectorManager import AsyncCollectorManager -from core.AsyncCore import AsyncCore -from core.AsyncCoreLogger import AsyncCoreLogger -from core.SourceCollectorCore import SourceCollectorCore +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.collector_manager.AsyncCollectorManager import AsyncCollectorManager +from src.core.AsyncCore import AsyncCore +from src.core.AsyncCoreLogger import AsyncCoreLogger +from src.core.SourceCollectorCore import SourceCollectorCore @pytest.fixture diff --git a/tests/automated/integration/core/test_async_core.py b/tests/automated/integration/core/test_async_core.py index 7fa3d757..fc0e1b7f 100644 --- a/tests/automated/integration/core/test_async_core.py +++ b/tests/automated/integration/core/test_async_core.py @@ -1,15 +1,15 @@ import types -from unittest.mock import MagicMock, AsyncMock, call +from unittest.mock import AsyncMock, call import pytest -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.enums import TaskType -from db.models import Task -from core.AsyncCore import AsyncCore -from core.DTOs.TaskOperatorRunInfo import TaskOperatorRunInfo, TaskOperatorOutcome -from core.TaskManager import TaskManager -from core.enums import BatchStatus +from src.db import AsyncDatabaseClient +from src.db.enums import TaskType +from src.db.models import Task +from src.core.AsyncCore import AsyncCore +from src.core.DTOs.TaskOperatorRunInfo import TaskOperatorRunInfo, TaskOperatorOutcome +from src.core.TaskManager import TaskManager +from src.core.enums import BatchStatus from tests.helpers.DBDataCreator import DBDataCreator def setup_async_core(adb_client: AsyncDatabaseClient): diff --git a/tests/automated/integration/core/test_example_collector_lifecycle.py b/tests/automated/integration/core/test_example_collector_lifecycle.py index f094e5b7..936be0d8 100644 --- a/tests/automated/integration/core/test_example_collector_lifecycle.py +++ b/tests/automated/integration/core/test_example_collector_lifecycle.py @@ -2,13 +2,13 @@ import pytest -from db.DTOs.BatchInfo import BatchInfo -from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO -from collector_manager.enums import CollectorType, URLStatus -from core.AsyncCore import AsyncCore -from core.DTOs.CollectorStartInfo import CollectorStartInfo -from core.SourceCollectorCore import SourceCollectorCore -from core.enums import BatchStatus +from src.db.DTOs.BatchInfo import BatchInfo +from src.collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO +from src.collector_manager.enums import CollectorType, URLStatus +from src.core.AsyncCore import AsyncCore +from src.core.DTOs.CollectorStartInfo import CollectorStartInfo +from src.core.SourceCollectorCore import SourceCollectorCore +from src.core.enums import BatchStatus from tests.helpers.patch_functions import block_sleep diff --git a/tests/automated/integration/html_tag_collector/test_root_url_cache.py b/tests/automated/integration/html_tag_collector/test_root_url_cache.py index 206347e3..f24fdca9 100644 --- a/tests/automated/integration/html_tag_collector/test_root_url_cache.py +++ b/tests/automated/integration/html_tag_collector/test_root_url_cache.py @@ -1,6 +1,6 @@ import pytest -from html_tag_collector.RootURLCache import RootURLCacheResponseInfo, RootURLCache +from src.html_tag_collector.RootURLCache import RootURLCacheResponseInfo, RootURLCache async def mock_get_request(url: str) -> RootURLCacheResponseInfo: diff --git a/tests/automated/integration/security_manager/test_security_manager.py b/tests/automated/integration/security_manager/test_security_manager.py index eb7e8506..295b67b0 100644 --- a/tests/automated/integration/security_manager/test_security_manager.py +++ b/tests/automated/integration/security_manager/test_security_manager.py @@ -2,10 +2,10 @@ import pytest from starlette.testclient import TestClient -from api.main import app -from security_manager.SecurityManager import Permissions, ALGORITHM +from src.api.main import app +from src.security_manager.SecurityManager import Permissions, ALGORITHM -PATCH_ROOT = "security_manager.SecurityManager" +PATCH_ROOT = "src.security_manager.SecurityManager" def get_patch_path(patch_name): return f"{PATCH_ROOT}.{patch_name}" diff --git a/tests/automated/integration/tasks/conftest.py b/tests/automated/integration/tasks/conftest.py index a4136b20..42d5b29c 100644 --- a/tests/automated/integration/tasks/conftest.py +++ b/tests/automated/integration/tasks/conftest.py @@ -3,7 +3,7 @@ import pytest from pdap_access_manager import AccessManager -from pdap_api_client.PDAPClient import PDAPClient +from src.pdap_api_client.PDAPClient import PDAPClient @pytest.fixture diff --git a/tests/automated/integration/tasks/test_agency_preannotation_task.py b/tests/automated/integration/tasks/test_agency_preannotation_task.py index c0de5c52..afd55c85 100644 --- a/tests/automated/integration/tasks/test_agency_preannotation_task.py +++ b/tests/automated/integration/tasks/test_agency_preannotation_task.py @@ -5,23 +5,23 @@ import pytest from aiohttp import ClientSession +from src.pdap_api_client.enums import MatchAgencyResponseStatus from tests.helpers.test_batch_creation_parameters import TestBatchCreationParameters, TestURLCreationParameters -from source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface, AgencyLookupResponseType, AgencyLookupResponse -from db.models import Agency, AutomatedUrlAgencySuggestion -from collector_manager.enums import CollectorType, URLStatus -from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome -from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo -from core.classes.task_operators.AgencyIdentificationTaskOperator import AgencyIdentificationTaskOperator -from core.classes.subtasks.AutoGooglerAgencyIdentificationSubtask import AutoGooglerAgencyIdentificationSubtask -from core.classes.subtasks.CKANAgencyIdentificationSubtask import CKANAgencyIdentificationSubtask -from core.classes.subtasks.CommonCrawlerAgencyIdentificationSubtask import CommonCrawlerAgencyIdentificationSubtask -from core.classes.subtasks.MuckrockAgencyIdentificationSubtask import MuckrockAgencyIdentificationSubtask -from core.enums import SuggestionType +from src.source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface, AgencyLookupResponseType, AgencyLookupResponse +from src.db.models import Agency, AutomatedUrlAgencySuggestion +from src.collector_manager.enums import CollectorType, URLStatus +from src.core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from src.core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from src.core.classes.task_operators.AgencyIdentificationTaskOperator import AgencyIdentificationTaskOperator +from src.core.classes.subtasks.AutoGooglerAgencyIdentificationSubtask import AutoGooglerAgencyIdentificationSubtask +from src.core.classes.subtasks.CKANAgencyIdentificationSubtask import CKANAgencyIdentificationSubtask +from src.core.classes.subtasks.CommonCrawlerAgencyIdentificationSubtask import CommonCrawlerAgencyIdentificationSubtask +from src.core.classes.subtasks.MuckrockAgencyIdentificationSubtask import MuckrockAgencyIdentificationSubtask +from src.core.enums import SuggestionType from pdap_access_manager import AccessManager -from pdap_api_client.DTOs import MatchAgencyResponse, MatchAgencyInfo -from pdap_api_client.PDAPClient import PDAPClient -from pdap_api_client.enums import MatchAgencyResponseStatus -from tests.helpers.DBDataCreator import DBDataCreator, BatchURLCreationInfo, BatchURLCreationInfoV2 +from src.pdap_api_client.DTOs import MatchAgencyResponse, MatchAgencyInfo +from src.pdap_api_client.PDAPClient import PDAPClient +from tests.helpers.DBDataCreator import DBDataCreator, BatchURLCreationInfoV2 sample_agency_suggestions = [ URLAgencySuggestionInfo( diff --git a/tests/automated/integration/tasks/test_example_task.py b/tests/automated/integration/tasks/test_example_task.py index c0515103..7f5d5e73 100644 --- a/tests/automated/integration/tasks/test_example_task.py +++ b/tests/automated/integration/tasks/test_example_task.py @@ -2,9 +2,9 @@ import pytest -from db.enums import TaskType -from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome -from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from src.db.enums import TaskType +from src.core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from src.core.classes.task_operators.TaskOperatorBase import TaskOperatorBase from tests.helpers.DBDataCreator import DBDataCreator class ExampleTaskOperator(TaskOperatorBase): diff --git a/tests/automated/integration/tasks/test_submit_approved_url_task.py b/tests/automated/integration/tasks/test_submit_approved_url_task.py index d5453005..f561af17 100644 --- a/tests/automated/integration/tasks/test_submit_approved_url_task.py +++ b/tests/automated/integration/tasks/test_submit_approved_url_task.py @@ -1,19 +1,19 @@ from http import HTTPStatus -from unittest.mock import MagicMock, AsyncMock +from unittest.mock import AsyncMock import pytest from deepdiff import DeepDiff -from db.enums import TaskType -from db.models import URL, URLErrorInfo, URLDataSource -from collector_manager.enums import URLStatus -from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo -from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome -from core.classes.task_operators.SubmitApprovedURLTaskOperator import SubmitApprovedURLTaskOperator -from core.enums import RecordType, SubmitResponseStatus +from src.db.enums import TaskType +from src.db.models import URL, URLErrorInfo, URLDataSource +from src.collector_manager.enums import URLStatus +from src.core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo +from src.core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from src.core.classes.task_operators.SubmitApprovedURLTaskOperator import SubmitApprovedURLTaskOperator +from src.core.enums import RecordType, SubmitResponseStatus from tests.helpers.DBDataCreator import BatchURLCreationInfo, DBDataCreator from pdap_access_manager import RequestInfo, RequestType, ResponseInfo, DataSourcesNamespaces -from pdap_api_client.PDAPClient import PDAPClient +from src.pdap_api_client.PDAPClient import PDAPClient def mock_make_request(pdap_client: PDAPClient, urls: list[str]): diff --git a/tests/automated/integration/tasks/test_url_404_probe.py b/tests/automated/integration/tasks/test_url_404_probe.py index a897a59e..63283751 100644 --- a/tests/automated/integration/tasks/test_url_404_probe.py +++ b/tests/automated/integration/tasks/test_url_404_probe.py @@ -5,11 +5,11 @@ import pytest from aiohttp import ClientResponseError, RequestInfo -from db.models import URLProbedFor404, URL -from collector_manager.enums import URLStatus -from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome -from core.classes.task_operators.URL404ProbeTaskOperator import URL404ProbeTaskOperator -from html_tag_collector.URLRequestInterface import URLResponseInfo, URLRequestInterface +from src.db.models import URLProbedFor404, URL +from src.collector_manager.enums import URLStatus +from src.core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from src.core.classes.task_operators.URL404ProbeTaskOperator import URL404ProbeTaskOperator +from src.html_tag_collector.URLRequestInterface import URLResponseInfo, URLRequestInterface from tests.helpers.DBDataCreator import DBDataCreator from tests.helpers.test_batch_creation_parameters import TestBatchCreationParameters, TestURLCreationParameters diff --git a/tests/automated/integration/tasks/test_url_duplicate_task.py b/tests/automated/integration/tasks/test_url_duplicate_task.py index 0987a2f4..32bb435f 100644 --- a/tests/automated/integration/tasks/test_url_duplicate_task.py +++ b/tests/automated/integration/tasks/test_url_duplicate_task.py @@ -3,15 +3,15 @@ import pytest -from db.DTOs.URLMapping import URLMapping -from db.models import URL, URLCheckedForDuplicate -from collector_manager.enums import URLStatus -from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome -from core.classes.task_operators.URLDuplicateTaskOperator import URLDuplicateTaskOperator +from src.db.DTOs.URLMapping import URLMapping +from src.db.models import URL, URLCheckedForDuplicate +from src.collector_manager.enums import URLStatus +from src.core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from src.core.classes.task_operators.URLDuplicateTaskOperator import URLDuplicateTaskOperator from tests.helpers.DBDataCreator import DBDataCreator from tests.helpers.test_batch_creation_parameters import TestBatchCreationParameters, TestURLCreationParameters from pdap_access_manager import ResponseInfo -from pdap_api_client.PDAPClient import PDAPClient +from src.pdap_api_client.PDAPClient import PDAPClient @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/test_url_html_task.py b/tests/automated/integration/tasks/test_url_html_task.py index 7926b26b..273a4c97 100644 --- a/tests/automated/integration/tasks/test_url_html_task.py +++ b/tests/automated/integration/tasks/test_url_html_task.py @@ -3,18 +3,18 @@ from typing import Optional import pytest -from aiohttp import ClientError, ClientResponseError, RequestInfo - -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.enums import TaskType -from collector_manager.enums import URLStatus -from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome -from core.classes.task_operators.URLHTMLTaskOperator import URLHTMLTaskOperator +from aiohttp import ClientResponseError, RequestInfo + +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.enums import TaskType +from src.collector_manager.enums import URLStatus +from src.core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from src.core.classes.task_operators.URLHTMLTaskOperator import URLHTMLTaskOperator +from src.html_tag_collector.DataClassTags import ResponseHTMLInfo from tests.helpers.DBDataCreator import DBDataCreator -from html_tag_collector.DataClassTags import ResponseHTMLInfo -from html_tag_collector.ResponseParser import HTMLResponseParser -from html_tag_collector.RootURLCache import RootURLCache -from html_tag_collector.URLRequestInterface import URLRequestInterface, URLResponseInfo +from src.html_tag_collector.ResponseParser import HTMLResponseParser +from src.html_tag_collector.RootURLCache import RootURLCache +from src.html_tag_collector.URLRequestInterface import URLRequestInterface, URLResponseInfo @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/test_url_miscellaneous_metadata_task.py b/tests/automated/integration/tasks/test_url_miscellaneous_metadata_task.py index 2b63c33c..e6a5a72f 100644 --- a/tests/automated/integration/tasks/test_url_miscellaneous_metadata_task.py +++ b/tests/automated/integration/tasks/test_url_miscellaneous_metadata_task.py @@ -2,10 +2,10 @@ import pytest -from db.models import URL, URLOptionalDataSourceMetadata -from collector_manager.enums import CollectorType -from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome -from core.classes.task_operators.URLMiscellaneousMetadataTaskOperator import URLMiscellaneousMetadataTaskOperator +from src.db.models import URL, URLOptionalDataSourceMetadata +from src.collector_manager.enums import CollectorType +from src.core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from src.core.classes.task_operators.URLMiscellaneousMetadataTaskOperator import URLMiscellaneousMetadataTaskOperator from tests.helpers.DBDataCreator import DBDataCreator diff --git a/tests/automated/integration/tasks/test_url_record_type_task.py b/tests/automated/integration/tasks/test_url_record_type_task.py index 1f26812a..ab50ae6f 100644 --- a/tests/automated/integration/tasks/test_url_record_type_task.py +++ b/tests/automated/integration/tasks/test_url_record_type_task.py @@ -2,13 +2,13 @@ import pytest -from db.enums import TaskType -from db.models import AutoRecordTypeSuggestion -from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome -from core.classes.task_operators.URLRecordTypeTaskOperator import URLRecordTypeTaskOperator -from core.enums import RecordType +from src.db.enums import TaskType +from src.db.models import AutoRecordTypeSuggestion +from src.core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from src.core.classes.task_operators.URLRecordTypeTaskOperator import URLRecordTypeTaskOperator +from src.core.enums import RecordType from tests.helpers.DBDataCreator import DBDataCreator -from llm_api_logic.DeepSeekRecordClassifier import DeepSeekRecordClassifier +from src.llm_api_logic.DeepSeekRecordClassifier import DeepSeekRecordClassifier @pytest.mark.asyncio async def test_url_record_type_task(db_data_creator: DBDataCreator): diff --git a/tests/automated/unit/core/test_core_logger.py b/tests/automated/unit/core/test_core_logger.py index f60f989c..b092bd0e 100644 --- a/tests/automated/unit/core/test_core_logger.py +++ b/tests/automated/unit/core/test_core_logger.py @@ -3,8 +3,8 @@ import pytest -from db.DTOs.LogInfo import LogInfo -from core.AsyncCoreLogger import AsyncCoreLogger +from src.db.DTOs.LogInfo import LogInfo +from src.core.AsyncCoreLogger import AsyncCoreLogger @pytest.mark.asyncio diff --git a/tests/automated/unit/dto/test_all_annotation_post_info.py b/tests/automated/unit/dto/test_all_annotation_post_info.py index 1b35234a..3bc20c02 100644 --- a/tests/automated/unit/dto/test_all_annotation_post_info.py +++ b/tests/automated/unit/dto/test_all_annotation_post_info.py @@ -1,8 +1,8 @@ import pytest -from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo -from core.enums import RecordType, SuggestedStatus -from core.exceptions import FailedValidationException +from src.core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo +from src.core.enums import RecordType, SuggestedStatus +from src.core.exceptions import FailedValidationException # Mock values to pass mock_record_type = RecordType.ARREST_RECORDS.value # replace with valid RecordType if Enum diff --git a/tests/automated/unit/security_manager/test_security_manager.py b/tests/automated/unit/security_manager/test_security_manager.py index fd03fee5..8f650e25 100644 --- a/tests/automated/unit/security_manager/test_security_manager.py +++ b/tests/automated/unit/security_manager/test_security_manager.py @@ -4,14 +4,14 @@ from fastapi import HTTPException from jwt import InvalidTokenError -from security_manager.SecurityManager import SecurityManager, Permissions, AccessInfo, get_access_info +from src.security_manager.SecurityManager import SecurityManager, Permissions, AccessInfo, get_access_info SECRET_KEY = "test_secret_key" VALID_TOKEN = "valid_token" INVALID_TOKEN = "invalid_token" FAKE_PAYLOAD = {"sub": 1, "permissions": [Permissions.SOURCE_COLLECTOR.value]} -PATCH_ROOT = "security_manager.SecurityManager" +PATCH_ROOT = "src.security_manager.SecurityManager" def get_patch_path(patch_name): return f"{PATCH_ROOT}.{patch_name}" diff --git a/tests/automated/unit/source_collectors/test_autogoogler_collector.py b/tests/automated/unit/source_collectors/test_autogoogler_collector.py index dc5de285..a8b74d9e 100644 --- a/tests/automated/unit/source_collectors/test_autogoogler_collector.py +++ b/tests/automated/unit/source_collectors/test_autogoogler_collector.py @@ -2,16 +2,16 @@ import pytest -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.URLInfo import URLInfo -from core.AsyncCoreLogger import AsyncCoreLogger -from source_collectors.auto_googler.AutoGooglerCollector import AutoGooglerCollector -from source_collectors.auto_googler.DTOs import GoogleSearchQueryResultsInnerDTO, AutoGooglerInputDTO +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.URLInfo import URLInfo +from src.core.AsyncCoreLogger import AsyncCoreLogger +from src.source_collectors.auto_googler.AutoGooglerCollector import AutoGooglerCollector +from src.source_collectors.auto_googler.DTOs import GoogleSearchQueryResultsInnerDTO, AutoGooglerInputDTO @pytest.fixture def patch_get_query_results(monkeypatch): - patch_path = "source_collectors.auto_googler.GoogleSearcher.GoogleSearcher.get_query_results" + patch_path = "src.source_collectors.auto_googler.GoogleSearcher.GoogleSearcher.get_query_results" mock = AsyncMock() mock.side_effect = [ [GoogleSearchQueryResultsInnerDTO(url="https://include.com/1", title="keyword", snippet="snippet 1"),], diff --git a/tests/automated/unit/source_collectors/test_ckan_collector.py b/tests/automated/unit/source_collectors/test_ckan_collector.py deleted file mode 100644 index 747b0852..00000000 --- a/tests/automated/unit/source_collectors/test_ckan_collector.py +++ /dev/null @@ -1,63 +0,0 @@ -import json -import pickle -from unittest.mock import MagicMock, AsyncMock - -import pytest - -from db.AsyncDatabaseClient import AsyncDatabaseClient -from core.AsyncCoreLogger import AsyncCoreLogger -from source_collectors.ckan.CKANCollector import CKANCollector -from source_collectors.ckan.DTOs import CKANInputDTO - - -@pytest.fixture -def mock_ckan_collector_methods(monkeypatch): - mock = AsyncMock() - - mock_path = "source_collectors.ckan.CKANCollector.CKANCollector.get_results" - with open("tests/test_data/ckan_get_result_test_data.json", "r", encoding="utf-8") as f: - data = json.load(f) - - mock.get_results = AsyncMock() - mock.get_results.return_value = data - monkeypatch.setattr(mock_path, mock.get_results) - - mock_path = "source_collectors.ckan.CKANCollector.CKANCollector.add_collection_child_packages" - with open("tests/test_data/ckan_add_collection_child_packages.pkl", "rb") as f: - data = pickle.load(f) - - mock.add_collection_child_packages = AsyncMock() - mock.add_collection_child_packages.return_value = data - monkeypatch.setattr(mock_path, mock.add_collection_child_packages) - - - - yield mock - -@pytest.mark.asyncio -async def test_ckan_collector(mock_ckan_collector_methods): - mock = mock_ckan_collector_methods - - collector = CKANCollector( - batch_id=1, - dto=CKANInputDTO(), - logger=AsyncMock(spec=AsyncCoreLogger), - adb_client=AsyncMock(spec=AsyncDatabaseClient), - raise_error=True - ) - await collector.run() - - mock.get_results.assert_called_once() - mock.add_collection_child_packages.assert_called_once() - - collector.adb_client.insert_urls.assert_called_once() - url_infos = collector.adb_client.insert_urls.call_args[1]['url_infos'] - assert len(url_infos) == 2560 - first_url_info = url_infos[0] - assert first_url_info.url == 'https://catalog.data.gov/dataset/crash-reporting-drivers-data' - assert first_url_info.collector_metadata['submitted_name'] == 'Crash Reporting - Drivers Data' - - last_url_info = url_infos[-1] - assert last_url_info.url == 'https://data.houstontx.gov/dataset/houston-police-department-crime-statistics' - assert last_url_info.collector_metadata["description"] == 'Multiple datasets related to Houston Police Department Crime Stats' - diff --git a/tests/automated/unit/source_collectors/test_common_crawl_collector.py b/tests/automated/unit/source_collectors/test_common_crawl_collector.py index 6023b8cf..0f7ccab3 100644 --- a/tests/automated/unit/source_collectors/test_common_crawl_collector.py +++ b/tests/automated/unit/source_collectors/test_common_crawl_collector.py @@ -2,16 +2,16 @@ import pytest -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.URLInfo import URLInfo -from core.AsyncCoreLogger import AsyncCoreLogger -from source_collectors.common_crawler.CommonCrawlerCollector import CommonCrawlerCollector -from source_collectors.common_crawler.DTOs import CommonCrawlerInputDTO +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.URLInfo import URLInfo +from src.core.AsyncCoreLogger import AsyncCoreLogger +from src.source_collectors.common_crawler.CommonCrawlerCollector import CommonCrawlerCollector +from src.source_collectors.common_crawler.DTOs import CommonCrawlerInputDTO @pytest.fixture def mock_get_common_crawl_search_results(): - mock_path = "source_collectors.common_crawler.CommonCrawler.get_common_crawl_search_results" + mock_path = "src.source_collectors.common_crawler.CommonCrawler.get_common_crawl_search_results" # Results contain other keys, but those are not relevant and thus # can be ignored mock_results = [ diff --git a/tests/automated/unit/source_collectors/test_example_collector.py b/tests/automated/unit/source_collectors/test_example_collector.py index e5d113cc..b0aa69cb 100644 --- a/tests/automated/unit/source_collectors/test_example_collector.py +++ b/tests/automated/unit/source_collectors/test_example_collector.py @@ -1,9 +1,9 @@ from unittest.mock import AsyncMock -from db.DatabaseClient import DatabaseClient -from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO -from collector_manager.ExampleCollector import ExampleCollector -from core.AsyncCoreLogger import AsyncCoreLogger +from src.db.DatabaseClient import DatabaseClient +from src.collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO +from src.collector_manager.ExampleCollector import ExampleCollector +from src.core.AsyncCoreLogger import AsyncCoreLogger def test_example_collector(): diff --git a/tests/automated/unit/source_collectors/test_muckrock_collectors.py b/tests/automated/unit/source_collectors/test_muckrock_collectors.py index cd49ffb6..a73e156a 100644 --- a/tests/automated/unit/source_collectors/test_muckrock_collectors.py +++ b/tests/automated/unit/source_collectors/test_muckrock_collectors.py @@ -3,19 +3,19 @@ import pytest -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.URLInfo import URLInfo -from core.AsyncCoreLogger import AsyncCoreLogger -from source_collectors.muckrock.DTOs import MuckrockSimpleSearchCollectorInputDTO, \ +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.URLInfo import URLInfo +from src.core.AsyncCoreLogger import AsyncCoreLogger +from src.source_collectors.muckrock.DTOs import MuckrockSimpleSearchCollectorInputDTO, \ MuckrockCountySearchCollectorInputDTO, MuckrockAllFOIARequestsCollectorInputDTO -from source_collectors.muckrock.classes.MuckrockCollector import MuckrockSimpleSearchCollector, \ +from src.source_collectors.muckrock.classes.MuckrockCollector import MuckrockSimpleSearchCollector, \ MuckrockCountyLevelSearchCollector, MuckrockAllFOIARequestsCollector -from source_collectors.muckrock.classes.muckrock_fetchers.FOIAFetcher import FOIAFetchRequest +from src.source_collectors.muckrock.classes.muckrock_fetchers.FOIAFetcher import FOIAFetchRequest @pytest.fixture def patch_muckrock_fetcher(monkeypatch): - patch_path = "source_collectors.muckrock.classes.muckrock_fetchers.MuckrockFetcher.MuckrockFetcher.fetch" + patch_path = "src.source_collectors.muckrock.classes.muckrock_fetchers.MuckrockFetcher.MuckrockFetcher.fetch" inner_test_data = [ {"absolute_url": "https://include.com/1", "title": "keyword"}, {"absolute_url": "https://include.com/2", "title": "keyword"}, @@ -66,7 +66,7 @@ async def test_muckrock_simple_collector(patch_muckrock_fetcher): @pytest.fixture def patch_muckrock_county_level_search_collector_methods(monkeypatch): - patch_root = ("source_collectors.muckrock.classes.MuckrockCollector." + patch_root = ("src.source_collectors.muckrock.classes.MuckrockCollector." "MuckrockCountyLevelSearchCollector.") patch_path_get_jurisdiction_ids = patch_root + "get_jurisdiction_ids" patch_path_get_foia_records = patch_root + "get_foia_records" @@ -125,7 +125,7 @@ async def test_muckrock_county_search_collector(patch_muckrock_county_level_sear @pytest.fixture def patch_muckrock_full_search_collector(monkeypatch): - patch_path = ("source_collectors.muckrock.classes.MuckrockCollector." + patch_path = ("src.source_collectors.muckrock.classes.MuckrockCollector." "MuckrockAllFOIARequestsCollector.get_page_data") test_data = [{ "results": [ @@ -148,7 +148,7 @@ def patch_muckrock_full_search_collector(monkeypatch): mock.get_page_data = AsyncMock(return_value=test_data) monkeypatch.setattr(patch_path, mock.get_page_data) - patch_path = ("source_collectors.muckrock.classes.MuckrockCollector." + patch_path = ("src.source_collectors.muckrock.classes.MuckrockCollector." "FOIAFetcher") mock.foia_fetcher = MagicMock() monkeypatch.setattr(patch_path, mock.foia_fetcher) diff --git a/tests/automated/unit/test_function_trigger.py b/tests/automated/unit/test_function_trigger.py index 37b3c948..cc3a77b2 100644 --- a/tests/automated/unit/test_function_trigger.py +++ b/tests/automated/unit/test_function_trigger.py @@ -3,7 +3,7 @@ import pytest -from core.FunctionTrigger import FunctionTrigger +from src.core.FunctionTrigger import FunctionTrigger @pytest.mark.asyncio diff --git a/tests/conftest.py b/tests/conftest.py index 99281103..cab4a2ad 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,14 +3,14 @@ from sqlalchemy import create_engine, inspect, MetaData from sqlalchemy.orm import scoped_session, sessionmaker -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DatabaseClient import DatabaseClient -from db.helper_functions import get_postgres_connection_string -from db.models import Base -from core.EnvVarManager import EnvVarManager +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DatabaseClient import DatabaseClient +from src.db.helper_functions import get_postgres_connection_string +from src.db.models import Base +from src.core.EnvVarManager import EnvVarManager +from src.util.helper_functions import load_from_environment from tests.helpers.AlembicRunner import AlembicRunner from tests.helpers.DBDataCreator import DBDataCreator -from util.helper_functions import load_from_environment @pytest.fixture(autouse=True, scope="session") diff --git a/tests/helpers/DBDataCreator.py b/tests/helpers/DBDataCreator.py index a0036e2a..8e03eeed 100644 --- a/tests/helpers/DBDataCreator.py +++ b/tests/helpers/DBDataCreator.py @@ -1,26 +1,25 @@ -import asyncio from datetime import datetime from random import randint from typing import List, Optional -from pydantic import BaseModel, model_validator - -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.BatchInfo import BatchInfo -from db.DTOs.DuplicateInfo import DuplicateInsertInfo -from db.DTOs.InsertURLsInfo import InsertURLsInfo -from db.DTOs.URLErrorInfos import URLErrorPydanticInfo -from db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo, HTMLContentType -from db.DTOs.URLInfo import URLInfo -from db.DTOs.URLMapping import URLMapping -from db.DatabaseClient import DatabaseClient -from db.enums import TaskType -from collector_manager.enums import CollectorType, URLStatus -from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, RejectionReason -from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo -from core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmittedURLInfo -from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO -from core.enums import BatchStatus, SuggestionType, RecordType, SuggestedStatus +from pydantic import BaseModel + +from src.db.AsyncDatabaseClient import AsyncDatabaseClient +from src.db.DTOs.BatchInfo import BatchInfo +from src.db.DTOs.DuplicateInfo import DuplicateInsertInfo +from src.db.DTOs.InsertURLsInfo import InsertURLsInfo +from src.db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from src.db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo, HTMLContentType +from src.db.DTOs.URLInfo import URLInfo +from src.db.DTOs.URLMapping import URLMapping +from src.db.DatabaseClient import DatabaseClient +from src.db.enums import TaskType +from src.collector_manager.enums import CollectorType, URLStatus +from src.core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, RejectionReason +from src.core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from src.core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmittedURLInfo +from src.core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO +from src.core.enums import BatchStatus, SuggestionType, RecordType, SuggestedStatus from tests.helpers.test_batch_creation_parameters import TestBatchCreationParameters, AnnotationInfo from tests.helpers.simple_test_data_functions import generate_test_urls diff --git a/tests/helpers/assert_functions.py b/tests/helpers/assert_functions.py index 7deaacc3..32fe608c 100644 --- a/tests/helpers/assert_functions.py +++ b/tests/helpers/assert_functions.py @@ -1,5 +1,5 @@ -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.models import Task +from src.db import AsyncDatabaseClient +from src.db.models import Task async def assert_database_has_no_tasks(adb_client: AsyncDatabaseClient): diff --git a/tests/helpers/complex_test_data_functions.py b/tests/helpers/complex_test_data_functions.py index 32dfca02..5d1e237c 100644 --- a/tests/helpers/complex_test_data_functions.py +++ b/tests/helpers/complex_test_data_functions.py @@ -2,10 +2,10 @@ from pydantic import BaseModel -from db.DTOs.InsertURLsInfo import InsertURLsInfo -from db.DTOs.URLMapping import URLMapping -from collector_manager.enums import URLStatus -from core.enums import RecordType, SuggestionType +from src.db.DTOs.InsertURLsInfo import InsertURLsInfo +from src.db.DTOs.URLMapping import URLMapping +from src.collector_manager.enums import URLStatus +from src.core.enums import RecordType, SuggestionType from tests.helpers.DBDataCreator import BatchURLCreationInfo from tests.helpers.DBDataCreator import DBDataCreator diff --git a/tests/helpers/patch_functions.py b/tests/helpers/patch_functions.py index bb805d29..a5798014 100644 --- a/tests/helpers/patch_functions.py +++ b/tests/helpers/patch_functions.py @@ -4,7 +4,7 @@ async def block_sleep(monkeypatch) -> AwaitableBarrier: barrier = AwaitableBarrier() monkeypatch.setattr( - "collector_manager.ExampleCollector.ExampleCollector.sleep", + "src.collector_manager.ExampleCollector.ExampleCollector.sleep", barrier ) return barrier diff --git a/tests/helpers/test_batch_creation_parameters.py b/tests/helpers/test_batch_creation_parameters.py index 5d679569..7952b762 100644 --- a/tests/helpers/test_batch_creation_parameters.py +++ b/tests/helpers/test_batch_creation_parameters.py @@ -3,8 +3,8 @@ from pydantic import BaseModel, model_validator -from collector_manager.enums import URLStatus, CollectorType -from core.enums import BatchStatus, AnnotationType, RecordType, SuggestedStatus +from src.collector_manager.enums import URLStatus, CollectorType +from src.core.enums import BatchStatus, RecordType, SuggestedStatus class AnnotationInfo(BaseModel): diff --git a/tests/manual/agency_identifier/test_muckrock_api_interface.py b/tests/manual/agency_identifier/test_muckrock_api_interface.py index e3a86ed9..8f76385e 100644 --- a/tests/manual/agency_identifier/test_muckrock_api_interface.py +++ b/tests/manual/agency_identifier/test_muckrock_api_interface.py @@ -1,7 +1,7 @@ import pytest from aiohttp import ClientSession -from source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface +from src.source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface @pytest.mark.asyncio diff --git a/tests/manual/api/test_authorization.py b/tests/manual/api/test_authorization.py index d17fbe1c..062b8d66 100644 --- a/tests/manual/api/test_authorization.py +++ b/tests/manual/api/test_authorization.py @@ -3,7 +3,7 @@ def test_root_endpoint_without_mocked_dependency(): # Here, we use the app without a dependency override - from api.main import app + from src.api.main import app with TestClient(app) as c: response = c.get( url="/", diff --git a/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py b/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py index d832c2a8..3e545b2e 100644 --- a/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py +++ b/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py @@ -2,17 +2,16 @@ import dotenv -import api.dependencies -from db.DTOs.BatchInfo import BatchInfo -from collector_manager.enums import CollectorType -from core.enums import BatchStatus +from src.db.DTOs.BatchInfo import BatchInfo +from src.collector_manager import CollectorType +from src.core.enums import BatchStatus from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion def test_auto_googler_collector_lifecycle(test_core): # TODO: Rework for Async ci = test_core - db_client = api.dependencies.db_client + db_client = src.api.dependencies.db_client dotenv.load_dotenv() config = { @@ -30,7 +29,7 @@ def test_auto_googler_collector_lifecycle(test_core): config=config ) - batch_info: BatchInfo = api.dependencies.db_client.get_batch_by_id(1) + batch_info: BatchInfo = src.api.dependencies.db_client.get_batch_by_id(1) assert batch_info.strategy == "auto_googler" assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.total_url_count == 20 diff --git a/tests/manual/core/lifecycle/test_ckan_lifecycle.py b/tests/manual/core/lifecycle/test_ckan_lifecycle.py index 2d4a4f7a..567d8f9b 100644 --- a/tests/manual/core/lifecycle/test_ckan_lifecycle.py +++ b/tests/manual/core/lifecycle/test_ckan_lifecycle.py @@ -1,15 +1,13 @@ - -import api.dependencies -from db.DTOs.BatchInfo import BatchInfo -from collector_manager.enums import CollectorType -from core.enums import BatchStatus -from source_collectors.ckan.search_terms import group_search, package_search, organization_search +from src.db.DTOs.BatchInfo import BatchInfo +from src.collector_manager import CollectorType +from src.core.enums import BatchStatus +from src.source_collectors.ckan.search_terms import group_search, package_search, organization_search from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion def test_ckan_lifecycle(test_core): ci = test_core - db_client = api.dependencies.db_client + db_client = src.api.dependencies.db_client config = { "package_search": package_search, diff --git a/tests/manual/core/lifecycle/test_common_crawler_lifecycle.py b/tests/manual/core/lifecycle/test_common_crawler_lifecycle.py index 03fe5855..3883c864 100644 --- a/tests/manual/core/lifecycle/test_common_crawler_lifecycle.py +++ b/tests/manual/core/lifecycle/test_common_crawler_lifecycle.py @@ -1,14 +1,13 @@ import time -import api.dependencies -from collector_manager.enums import CollectorType -from core.SourceCollectorCore import SourceCollectorCore -from core.enums import BatchStatus +from src.collector_manager import CollectorType +from src.core.SourceCollectorCore import SourceCollectorCore +from src.core.enums import BatchStatus def test_common_crawler_lifecycle(test_core: SourceCollectorCore): core = test_core - db_client = api.dependencies.db_client + db_client = src.api.dependencies.db_client config = { "common_crawl_id": "CC-MAIN-2023-50", diff --git a/tests/manual/core/lifecycle/test_muckrock_lifecycles.py b/tests/manual/core/lifecycle/test_muckrock_lifecycles.py index 26e4aa36..a5dfce38 100644 --- a/tests/manual/core/lifecycle/test_muckrock_lifecycles.py +++ b/tests/manual/core/lifecycle/test_muckrock_lifecycles.py @@ -1,15 +1,13 @@ - -import api.dependencies -from db.DTOs.BatchInfo import BatchInfo -from collector_manager.enums import CollectorType -from core.enums import BatchStatus +from src.db.DTOs.BatchInfo import BatchInfo +from src.collector_manager import CollectorType +from src.core.enums import BatchStatus from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion from test_automated.integration.core.helpers.constants import ALLEGHENY_COUNTY_MUCKROCK_ID, ALLEGHENY_COUNTY_TOWN_NAMES def test_muckrock_simple_search_collector_lifecycle(test_core): ci = test_core - db_client = api.dependencies.db_client + db_client = src.api.dependencies.db_client config = { "search_string": "police", @@ -31,7 +29,7 @@ def test_muckrock_simple_search_collector_lifecycle(test_core): def test_muckrock_county_level_search_collector_lifecycle(test_core): ci = test_core - db_client = api.dependencies.db_client + db_client = src.api.dependencies.db_client config = { "parent_jurisdiction_id": ALLEGHENY_COUNTY_MUCKROCK_ID, @@ -53,7 +51,7 @@ def test_muckrock_county_level_search_collector_lifecycle(test_core): def test_muckrock_full_search_collector_lifecycle(test_core): ci = test_core - db_client = api.dependencies.db_client + db_client = src.api.dependencies.db_client config = { "start_page": 1, diff --git a/tests/manual/html_collector/test_html_tag_collector_integration.py b/tests/manual/html_collector/test_html_tag_collector_integration.py index 5777f907..674360a4 100644 --- a/tests/manual/html_collector/test_html_tag_collector_integration.py +++ b/tests/manual/html_collector/test_html_tag_collector_integration.py @@ -1,12 +1,12 @@ import pytest -from db.AsyncDatabaseClient import AsyncDatabaseClient -from db.DTOs.URLInfo import URLInfo -from core.classes.task_operators.URLHTMLTaskOperator import URLHTMLTaskOperator +from src.db import AsyncDatabaseClient +from src.db.DTOs import URLInfo +from src.core.classes.task_operators.URLHTMLTaskOperator import URLHTMLTaskOperator from tests.helpers.DBDataCreator import DBDataCreator -from html_tag_collector.ResponseParser import HTMLResponseParser -from html_tag_collector.RootURLCache import RootURLCache -from html_tag_collector.URLRequestInterface import URLRequestInterface +from src.html_tag_collector.ResponseParser import HTMLResponseParser +from src.html_tag_collector import RootURLCache +from src.html_tag_collector.URLRequestInterface import URLRequestInterface URLS = [ "https://pdap.io", diff --git a/tests/manual/llm_api_logic/test_deepseek_record_classifier.py b/tests/manual/llm_api_logic/test_deepseek_record_classifier.py index 18363a71..cf239aa4 100644 --- a/tests/manual/llm_api_logic/test_deepseek_record_classifier.py +++ b/tests/manual/llm_api_logic/test_deepseek_record_classifier.py @@ -1,12 +1,12 @@ import pytest -from db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo -from llm_api_logic.DeepSeekRecordClassifier import DeepSeekRecordClassifier +from src.db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo +from src.llm_api_logic.DeepSeekRecordClassifier import DeepSeekRecordClassifier @pytest.mark.asyncio async def test_deepseek_record_classifier(): - from db.DTOs.URLHTMLContentInfo import HTMLContentType as hct + from src.db.DTOs.URLHTMLContentInfo import HTMLContentType as hct d = { hct.TITLE: "Oath of Office for Newly Promoted Corporal Lumpkin with Acworth Police – City of Acworth, GA", diff --git a/tests/manual/llm_api_logic/test_openai_record_classifier.py b/tests/manual/llm_api_logic/test_openai_record_classifier.py index 57b56a54..b1812a27 100644 --- a/tests/manual/llm_api_logic/test_openai_record_classifier.py +++ b/tests/manual/llm_api_logic/test_openai_record_classifier.py @@ -1,12 +1,12 @@ import pytest -from db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo -from llm_api_logic.OpenAIRecordClassifier import OpenAIRecordClassifier +from src.db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo +from src.llm_api_logic.OpenAIRecordClassifier import OpenAIRecordClassifier @pytest.mark.asyncio async def test_openai_record_classifier(): - from db.DTOs.URLHTMLContentInfo import HTMLContentType as hct + from src.db.DTOs.URLHTMLContentInfo import HTMLContentType as hct d = { hct.TITLE: "Oath of Office for Newly Promoted Corporal Lumpkin with Acworth Police – City of Acworth, GA", diff --git a/tests/manual/pdap_client/test_access_manager.py b/tests/manual/pdap_client/test_access_manager.py index b1245eca..2844464a 100644 --- a/tests/manual/pdap_client/test_access_manager.py +++ b/tests/manual/pdap_client/test_access_manager.py @@ -2,7 +2,7 @@ from aiohttp import ClientSession from pdap_access_manager import AccessManager -from util.helper_functions import get_from_env +from src.util import get_from_env @pytest.mark.asyncio diff --git a/tests/manual/pdap_client/test_pdap_client.py b/tests/manual/pdap_client/test_pdap_client.py index 5d10037c..a8a8da29 100644 --- a/tests/manual/pdap_client/test_pdap_client.py +++ b/tests/manual/pdap_client/test_pdap_client.py @@ -2,8 +2,8 @@ from aiohttp import ClientSession from pdap_access_manager import AccessManager -from pdap_api_client.PDAPClient import PDAPClient -from util.helper_functions import get_from_env +from src.pdap_api_client.PDAPClient import PDAPClient +from src.util import get_from_env @pytest.mark.asyncio diff --git a/tests/manual/source_collectors/test_autogoogler_collector.py b/tests/manual/source_collectors/test_autogoogler_collector.py index cabdcc1e..926875f9 100644 --- a/tests/manual/source_collectors/test_autogoogler_collector.py +++ b/tests/manual/source_collectors/test_autogoogler_collector.py @@ -1,11 +1,11 @@ -from unittest.mock import MagicMock, AsyncMock +from unittest.mock import AsyncMock import pytest -from db.AsyncDatabaseClient import AsyncDatabaseClient -from core.AsyncCoreLogger import AsyncCoreLogger -from source_collectors.auto_googler.AutoGooglerCollector import AutoGooglerCollector -from source_collectors.auto_googler.DTOs import AutoGooglerInputDTO +from src.db import AsyncDatabaseClient +from src.core.AsyncCoreLogger import AsyncCoreLogger +from src.source_collectors.auto_googler.AutoGooglerCollector import AutoGooglerCollector +from src.source_collectors.auto_googler.DTOs import AutoGooglerInputDTO @pytest.mark.asyncio async def test_autogoogler_collector(): diff --git a/tests/manual/source_collectors/test_ckan_collector.py b/tests/manual/source_collectors/test_ckan_collector.py index e6a6c1f8..0fadec3a 100644 --- a/tests/manual/source_collectors/test_ckan_collector.py +++ b/tests/manual/source_collectors/test_ckan_collector.py @@ -1,13 +1,13 @@ -from unittest.mock import MagicMock, AsyncMock +from unittest.mock import AsyncMock import pytest from marshmallow import Schema, fields -from db.AsyncDatabaseClient import AsyncDatabaseClient -from core.AsyncCoreLogger import AsyncCoreLogger -from source_collectors.ckan.CKANCollector import CKANCollector -from source_collectors.ckan.DTOs import CKANInputDTO -from source_collectors.ckan.search_terms import package_search, group_search, organization_search +from src.db import AsyncDatabaseClient +from src.core.AsyncCoreLogger import AsyncCoreLogger +from src.source_collectors.ckan import CKANCollector +from src.source_collectors.ckan.DTOs import CKANInputDTO +from src.source_collectors.ckan.search_terms import package_search, group_search, organization_search class CKANSchema(Schema): diff --git a/tests/manual/source_collectors/test_common_crawler_collector.py b/tests/manual/source_collectors/test_common_crawler_collector.py index 12be9ec7..c91da5e7 100644 --- a/tests/manual/source_collectors/test_common_crawler_collector.py +++ b/tests/manual/source_collectors/test_common_crawler_collector.py @@ -1,12 +1,12 @@ -from unittest.mock import MagicMock, AsyncMock +from unittest.mock import AsyncMock import pytest from marshmallow import Schema, fields -from db.AsyncDatabaseClient import AsyncDatabaseClient -from core.AsyncCoreLogger import AsyncCoreLogger -from source_collectors.common_crawler.CommonCrawlerCollector import CommonCrawlerCollector -from source_collectors.common_crawler.DTOs import CommonCrawlerInputDTO +from src.db import AsyncDatabaseClient +from src.core.AsyncCoreLogger import AsyncCoreLogger +from src.source_collectors.common_crawler import CommonCrawlerCollector +from src.source_collectors.common_crawler.DTOs import CommonCrawlerInputDTO class CommonCrawlerSchema(Schema): diff --git a/tests/manual/source_collectors/test_muckrock_collectors.py b/tests/manual/source_collectors/test_muckrock_collectors.py index c30473df..5d0fd1ca 100644 --- a/tests/manual/source_collectors/test_muckrock_collectors.py +++ b/tests/manual/source_collectors/test_muckrock_collectors.py @@ -1,14 +1,14 @@ -from unittest.mock import MagicMock, AsyncMock +from unittest.mock import AsyncMock import pytest -from db.AsyncDatabaseClient import AsyncDatabaseClient -from core.AsyncCoreLogger import AsyncCoreLogger -from source_collectors.muckrock.DTOs import MuckrockSimpleSearchCollectorInputDTO, \ +from src.db import AsyncDatabaseClient +from src.core.AsyncCoreLogger import AsyncCoreLogger +from src.source_collectors.muckrock.DTOs import MuckrockSimpleSearchCollectorInputDTO, \ MuckrockCountySearchCollectorInputDTO, MuckrockAllFOIARequestsCollectorInputDTO -from source_collectors.muckrock.classes.MuckrockCollector import MuckrockSimpleSearchCollector, \ +from src.source_collectors.muckrock.classes import MuckrockSimpleSearchCollector, \ MuckrockCountyLevelSearchCollector, MuckrockAllFOIARequestsCollector -from source_collectors.muckrock.schemas import MuckrockURLInfoSchema +from src.source_collectors.muckrock.schemas import MuckrockURLInfoSchema from tests.automated.integration.core.helpers.constants import ALLEGHENY_COUNTY_MUCKROCK_ID, \ ALLEGHENY_COUNTY_TOWN_NAMES diff --git a/tests/manual/unsorted/test_root_url_cache_unit.py b/tests/manual/unsorted/test_root_url_cache_unit.py index 56967c14..f319d813 100644 --- a/tests/manual/unsorted/test_root_url_cache_unit.py +++ b/tests/manual/unsorted/test_root_url_cache_unit.py @@ -5,7 +5,7 @@ import pytest -from html_tag_collector.RootURLCache import RootURLCache # Adjust import according to your package structure +from src.html_tag_collector import RootURLCache # Adjust import according to your package structure @pytest.fixture diff --git a/tests/test_data/ckan_add_collection_child_packages.pkl b/tests/test_data/ckan_add_collection_child_packages.pkl deleted file mode 100644 index 7ad2897d..00000000 Binary files a/tests/test_data/ckan_add_collection_child_packages.pkl and /dev/null differ