diff --git a/alembic/versions/2025_10_09_2046-7c4049508bfc_add_link_tables_for_location_batch_and_.py b/alembic/versions/2025_10_09_2046-7c4049508bfc_add_link_tables_for_location_batch_and_.py new file mode 100644 index 00000000..8972c0d0 --- /dev/null +++ b/alembic/versions/2025_10_09_2046-7c4049508bfc_add_link_tables_for_location_batch_and_.py @@ -0,0 +1,58 @@ +"""Add link tables for location_batch and agency_batch + +Revision ID: 7c4049508bfc +Revises: dff1085d1c3d +Create Date: 2025-10-09 20:46:30.013715 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from src.util.alembic_helpers import batch_id_column, location_id_column, created_at_column, agency_id_column + +# revision identifiers, used by Alembic. +revision: str = '7c4049508bfc' +down_revision: Union[str, None] = 'dff1085d1c3d' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + + + + +def upgrade() -> None: + _create_link_location_batches_table() + _create_link_agency_batches_table() + +def _create_link_location_batches_table(): + op.create_table( + "link_location_batches", + batch_id_column(), + location_id_column(), + created_at_column(), + sa.PrimaryKeyConstraint( + 'batch_id', + 'location_id', + name='link_location_batches_pk' + ) + ) + + +def _create_link_agency_batches_table(): + op.create_table( + "link_agency_batches", + batch_id_column(), + agency_id_column(), + created_at_column(), + sa.PrimaryKeyConstraint( + 'batch_id', + 'agency_id', + name='link_agency_batches_pk' + ) + ) + + +def downgrade() -> None: + pass diff --git a/src/collectors/impl/auto_googler/auto_googler.py b/src/collectors/impl/auto_googler/auto_googler.py index c8cddb08..bbaefed9 100644 --- a/src/collectors/impl/auto_googler/auto_googler.py +++ b/src/collectors/impl/auto_googler/auto_googler.py @@ -9,7 +9,11 @@ class AutoGoogler: and processing them for source collection """ - def __init__(self, search_config: SearchConfig, google_searcher: GoogleSearcher): + def __init__( + self, + search_config: SearchConfig, + google_searcher: GoogleSearcher + ): self.search_config = search_config self.google_searcher = google_searcher self.data: dict[str, list[GoogleSearchQueryResultsInnerDTO]] = { diff --git a/src/collectors/impl/auto_googler/collector.py b/src/collectors/impl/auto_googler/collector.py index bec62c3d..9046f421 100644 --- a/src/collectors/impl/auto_googler/collector.py +++ b/src/collectors/impl/auto_googler/collector.py @@ -1,4 +1,7 @@ +from typing import Any +from src.collectors.impl.auto_googler.queries.agency import AutoGooglerAddAgencyQueryBuilder +from src.collectors.impl.auto_googler.queries.location import AutoGooglerAddLocationQueryBuilder from src.collectors.impl.base import AsyncCollectorBase from src.collectors.enums import CollectorType from src.core.env_var_manager import EnvVarManager @@ -8,6 +11,7 @@ from src.collectors.impl.auto_googler.dtos.input import AutoGooglerInputDTO from src.collectors.impl.auto_googler.searcher import GoogleSearcher from src.collectors.impl.auto_googler.dtos.config import SearchConfig +from src.db.models.impl.link.agency_batch.sqlalchemy import LinkAgencyBatch from src.util.helper_functions import base_model_list_dump @@ -17,11 +21,37 @@ class AutoGooglerCollector(AsyncCollectorBase): async def run_to_completion(self) -> AutoGoogler: dto: AutoGooglerInputDTO = self.dto + + queries: list[str] = dto.queries.copy() + + if dto.agency_id is not None: + + agency_name: str = await self.adb_client.run_query_builder( + AutoGooglerAddAgencyQueryBuilder( + batch_id=self.batch_id, + agency_id=dto.agency_id, + ) + ) + + # Add to all queries + queries = [f"{query} {agency_name}" for query in queries] + + if dto.location_id is not None: + location_name: str = await self.adb_client.run_query_builder( + AutoGooglerAddLocationQueryBuilder( + batch_id=self.batch_id, + location_id=dto.location_id, + ) + ) + + # Add to all queries + queries = [f"{query} {location_name}" for query in queries] + env_var_manager = EnvVarManager.get() auto_googler = AutoGoogler( search_config=SearchConfig( urls_per_result=dto.urls_per_result, - queries=dto.queries, + queries=queries, ), google_searcher=GoogleSearcher( api_key=env_var_manager.google_api_key, @@ -34,9 +64,9 @@ async def run_to_completion(self) -> AutoGoogler: async def run_implementation(self) -> None: - auto_googler = await self.run_to_completion() + auto_googler: AutoGoogler = await self.run_to_completion() - inner_data = [] + inner_data: list[dict[str, Any]] = [] for query in auto_googler.search_config.queries: query_results: list[AutoGooglerInnerOutputDTO] = auto_googler.data[query] inner_data.append({ diff --git a/src/collectors/impl/auto_googler/dtos/input.py b/src/collectors/impl/auto_googler/dtos/input.py index 801d6104..07c55eec 100644 --- a/src/collectors/impl/auto_googler/dtos/input.py +++ b/src/collectors/impl/auto_googler/dtos/input.py @@ -13,3 +13,11 @@ class AutoGooglerInputDTO(BaseModel): min_length=1, max_length=100 ) + agency_id: int | None = Field( + description="ID of the agency to search for. Optional.", + default=None + ) + location_id: int | None = Field( + description="ID of the location to search for. Optional.", + default=None + ) diff --git a/src/collectors/impl/auto_googler/queries/__init__.py b/src/collectors/impl/auto_googler/queries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/collectors/impl/auto_googler/queries/agency.py b/src/collectors/impl/auto_googler/queries/agency.py new file mode 100644 index 00000000..344ea31f --- /dev/null +++ b/src/collectors/impl/auto_googler/queries/agency.py @@ -0,0 +1,36 @@ +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.link.agency_batch.sqlalchemy import LinkAgencyBatch +from src.db.queries.base.builder import QueryBuilderBase + +from src.db.helpers.session import session_helper as sh + +class AutoGooglerAddAgencyQueryBuilder(QueryBuilderBase): + + def __init__( + self, + batch_id: int, + agency_id: int, + ): + super().__init__() + self.batch_id = batch_id + self.agency_id = agency_id + + async def run(self, session: AsyncSession) -> str: + """Add link and return agency name.""" + + link = LinkAgencyBatch( + batch_id=self.batch_id, + agency_id=self.agency_id + ) + session.add(link) + + query = ( + select( + Agency.name + ) + ) + + return await sh.scalar(session, query=query) \ No newline at end of file diff --git a/src/collectors/impl/auto_googler/queries/location.py b/src/collectors/impl/auto_googler/queries/location.py new file mode 100644 index 00000000..b554176a --- /dev/null +++ b/src/collectors/impl/auto_googler/queries/location.py @@ -0,0 +1,39 @@ +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.link.location_batch.sqlalchemy import LinkLocationBatch +from src.db.models.views.location_expanded import LocationExpandedView +from src.db.queries.base.builder import QueryBuilderBase + +from src.db.helpers.session import session_helper as sh + +class AutoGooglerAddLocationQueryBuilder(QueryBuilderBase): + + def __init__( + self, + batch_id: int, + location_id: int + ): + super().__init__() + self.batch_id = batch_id + self.location_id = location_id + + async def run(self, session: AsyncSession) -> str: + """Add link and return location name.""" + + link = LinkLocationBatch( + batch_id=self.batch_id, + location_id=self.location_id + ) + session.add(link) + + query = ( + select( + LocationExpandedView.full_display_name + ) + .where( + LocationExpandedView.id == self.location_id + ) + ) + + return await sh.scalar(session, query=query) diff --git a/src/core/core.py b/src/core/core.py index cce56dfe..fe5c1ef5 100644 --- a/src/core/core.py +++ b/src/core/core.py @@ -108,9 +108,9 @@ async def get_batch_logs(self, batch_id: int) -> GetBatchLogsResponse: # region Collector async def initiate_collector( - self, - collector_type: CollectorType, - user_id: int, + self, + collector_type: CollectorType, + user_id: int, dto: BaseModel | None = None, ) -> CollectorStartInfo: """ diff --git a/src/db/models/impl/link/agency_batch/__init__.py b/src/db/models/impl/link/agency_batch/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/link/agency_batch/sqlalchemy.py b/src/db/models/impl/link/agency_batch/sqlalchemy.py new file mode 100644 index 00000000..57e235ba --- /dev/null +++ b/src/db/models/impl/link/agency_batch/sqlalchemy.py @@ -0,0 +1,20 @@ +from sqlalchemy import PrimaryKeyConstraint + +from src.db.models.mixins import CreatedAtMixin, LocationDependentMixin, AgencyDependentMixin, BatchDependentMixin +from src.db.models.templates_.base import Base + + +class LinkAgencyBatch( + Base, + CreatedAtMixin, + BatchDependentMixin, + AgencyDependentMixin, +): + __tablename__ = "link_agency_batches" + __table_args__ = ( + PrimaryKeyConstraint( + 'batch_id', + 'agency_id', + name='link_agency_batches_pk' + ), + ) diff --git a/src/db/models/impl/link/location_batch/__init__.py b/src/db/models/impl/link/location_batch/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/link/location_batch/sqlalchemy.py b/src/db/models/impl/link/location_batch/sqlalchemy.py new file mode 100644 index 00000000..e73a5ec8 --- /dev/null +++ b/src/db/models/impl/link/location_batch/sqlalchemy.py @@ -0,0 +1,21 @@ +from sqlalchemy import PrimaryKeyConstraint + +from src.db.models.mixins import LocationDependentMixin, BatchDependentMixin, CreatedAtMixin +from src.db.models.templates_.base import Base + + +class LinkLocationBatch( + Base, + LocationDependentMixin, + BatchDependentMixin, + CreatedAtMixin +): + + __tablename__ = "link_location_batches" + __table_args__ = ( + PrimaryKeyConstraint( + 'batch_id', + 'location_id', + name='link_location_batches_pk' + ), + ) \ No newline at end of file