Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,15 @@
from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion
from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic
from src.external.pdap.dtos.match_agency.post import MatchAgencyInfo
from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse
from src.external.pdap.enums import MatchAgencyResponseStatus

def convert_match_agency_response_to_subtask_data(

def convert_agency_suggestions_to_subtask_data(

Check warning on line 7 in src/core/tasks/url/operators/agency_identification/subtasks/convert.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/url/operators/agency_identification/subtasks/convert.py#L7 <103>

Missing docstring in public function
Raw output
./src/core/tasks/url/operators/agency_identification/subtasks/convert.py:7:1: D103 Missing docstring in public function
url_id: int,
response: MatchAgencyResponse,
agency_suggestions: list[AgencySuggestion],
subtask_type: AutoAgencyIDSubtaskType,
task_id: int
):
suggestions: list[AgencySuggestion] = \
_convert_match_agency_response_to_suggestions(
response
)
agencies_found: bool = len(suggestions) > 0
task_id: int,
) -> AutoAgencyIDSubtaskData:
agencies_found: bool = len(agency_suggestions) > 0
subtask_pydantic = URLAutoAgencyIDSubtaskPydantic(
url_id=url_id,
type=subtask_type,
Expand All @@ -25,30 +19,6 @@
)
return AutoAgencyIDSubtaskData(
pydantic_model=subtask_pydantic,
suggestions=suggestions
suggestions=agency_suggestions
)

def _convert_match_agency_response_to_suggestions(
match_response: MatchAgencyResponse,
) -> list[AgencySuggestion]:
if match_response.status == MatchAgencyResponseStatus.EXACT_MATCH:
match_info: MatchAgencyInfo = match_response.matches[0]
return [
AgencySuggestion(
agency_id=int(match_info.id),
confidence=100
)
]
if match_response.status == MatchAgencyResponseStatus.NO_MATCH:
return []
if match_response.status != MatchAgencyResponseStatus.PARTIAL_MATCH:
raise ValueError(f"Unknown Match Agency Response Status: {match_response.status}")
total_confidence: int = 100
confidence_per_match: int = total_confidence // len(match_response.matches)
return [
AgencySuggestion(
agency_id=int(match_info.id),
confidence=confidence_per_match
)
for match_info in match_response.matches
]
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,18 @@
from typing_extensions import override

from src.core.tasks.url.operators.agency_identification.subtasks.convert import \
convert_match_agency_response_to_subtask_data
convert_agency_suggestions_to_subtask_data
from src.core.tasks.url.operators.agency_identification.subtasks.impl.ckan_.params import CKANAgencyIDSubtaskParams
from src.core.tasks.url.operators.agency_identification.subtasks.impl.ckan_.query import \
GetCKANAgencyIDSubtaskParamsQueryBuilder
from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData
from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion
from src.core.tasks.url.operators.agency_identification.subtasks.queries.match_agency import MatchAgencyQueryBuilder
from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import \
AgencyIDSubtaskOperatorBase
from src.db.client.async_ import AsyncDatabaseClient
from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
from src.external.pdap.client import PDAPClient
from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse


@final
Expand All @@ -35,12 +36,14 @@ async def inner_logic(self) -> None:
subtask_data_list: list[AutoAgencyIDSubtaskData] = []
for param in params:
agency_name: str = param.collector_metadata["agency_name"]
response: MatchAgencyResponse = await self.pdap_client.match_agency(
name=agency_name
agency_suggestions: list[AgencySuggestion] = await self.adb_client.run_query_builder(
MatchAgencyQueryBuilder(
agency_name=agency_name
)
)
subtask_data: AutoAgencyIDSubtaskData = convert_match_agency_response_to_subtask_data(
subtask_data: AutoAgencyIDSubtaskData = convert_agency_suggestions_to_subtask_data(
url_id=param.url_id,
response=response,
agency_suggestions=agency_suggestions,
subtask_type=AutoAgencyIDSubtaskType.CKAN,
task_id=self.task_id
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,19 @@
from src.collectors.impl.muckrock.api_interface.lookup_response import AgencyLookupResponse
from src.collectors.impl.muckrock.enums import AgencyLookupResponseType
from src.core.tasks.url.operators.agency_identification.subtasks.convert import \
convert_match_agency_response_to_subtask_data
convert_agency_suggestions_to_subtask_data
from src.core.tasks.url.operators.agency_identification.subtasks.impl.muckrock_.params import \
MuckrockAgencyIDSubtaskParams
from src.core.tasks.url.operators.agency_identification.subtasks.impl.muckrock_.query import \
GetMuckrockAgencyIDSubtaskParamsQueryBuilder
from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData
from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion
from src.core.tasks.url.operators.agency_identification.subtasks.queries.match_agency import MatchAgencyQueryBuilder
from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import AgencyIDSubtaskOperatorBase
from src.db.client.async_ import AsyncDatabaseClient
from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode
from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic
from src.external.pdap.client import PDAPClient
from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse


@final
Expand Down Expand Up @@ -52,12 +53,14 @@ async def inner_logic(self) -> None:
)
subtask_data_list.append(data)
continue
match_agency_response: MatchAgencyResponse = await self.pdap_client.match_agency(
name=agency_lookup_response.name
agency_suggestions: list[AgencySuggestion] = await self.adb_client.run_query_builder(
MatchAgencyQueryBuilder(
agency_name=agency_lookup_response.name
)
)
subtask_data: AutoAgencyIDSubtaskData = convert_match_agency_response_to_subtask_data(
subtask_data: AutoAgencyIDSubtaskData = convert_agency_suggestions_to_subtask_data(
url_id=param.url_id,
response=match_agency_response,
agency_suggestions=agency_suggestions,
subtask_type=AutoAgencyIDSubtaskType.MUCKROCK,
task_id=self.task_id
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from typing import Sequence

Check warning on line 1 in src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py:1:1: D100 Missing docstring in public module

from sqlalchemy import select, func, desc, RowMapping
from sqlalchemy.ext.asyncio import AsyncSession

from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion
from src.db.models.impl.agency.sqlalchemy import Agency
from src.db.queries.base.builder import QueryBuilderBase


class MatchAgencyQueryBuilder(QueryBuilderBase):

Check warning on line 11 in src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py#L11 <101>

Missing docstring in public class
Raw output
./src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py:11:1: D101 Missing docstring in public class

def __init__(

Check warning on line 13 in src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py#L13 <107>

Missing docstring in __init__
Raw output
./src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py:13:1: D107 Missing docstring in __init__
self,
agency_name: str
):
super().__init__()
self.agency_name = agency_name

async def run(self, session: AsyncSession) -> list[AgencySuggestion]:

Check warning on line 20 in src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py#L20 <102>

Missing docstring in public method
Raw output
./src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py:20:1: D102 Missing docstring in public method
query = (
select(
Agency.id,
func.similarity(Agency.name, self.agency_name).label("similarity")
)
.where(
func.similarity(Agency.name, self.agency_name) > 0.5
)
.order_by(
desc("similarity")
)
.limit(10)
)
mappings: Sequence[RowMapping] = await self.sh.mappings(
session=session,
query=query
)
return [
AgencySuggestion(
agency_id=mapping[Agency.id],
confidence=int(mapping["similarity"] * 100)
)
for mapping in mappings
]

Check warning on line 44 in src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py#L44 <292>

no newline at end of file
Raw output
./src/core/tasks/url/operators/agency_identification/subtasks/queries/match_agency.py:44:10: W292 no newline at end of file
47 changes: 0 additions & 47 deletions src/external/pdap/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
from pdap_access_manager.models.response import ResponseInfo

from src.external.pdap._templates.request_builder import PDAPRequestBuilderBase
from src.external.pdap.dtos.match_agency.post import MatchAgencyInfo
from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse
from src.external.pdap.dtos.unique_url_duplicate import UniqueURLDuplicateInfo
from src.external.pdap.enums import MatchAgencyResponseStatus


class PDAPClient:
Expand All @@ -26,50 +23,6 @@ async def run_request_builder(
) -> Any:
return await request_builder.run(self.access_manager)

async def match_agency(
self,
name: str,
state: str | None = None,
county: str | None = None,
locality: str | None = None
) -> MatchAgencyResponse:
"""
Returns agencies, if any, that match or partially match the search criteria
"""
url: str = f"{self.access_manager.data_sources_url}/v2/match/agency"

headers: dict[str, str] = await self.access_manager.jwt_header()
headers['Content-Type']: str = "application/json"
request_info = RequestInfo(
type_=RequestType.POST,
url=url,
headers=headers,
json_={
"name": name,
"state": state,
"county": county,
"locality": locality
}
)
response_info: ResponseInfo = await self.access_manager.make_request(request_info)
matches: list[MatchAgencyInfo] = []
for agency in response_info.data["agencies"]:
mai = MatchAgencyInfo(
id=agency['id'],
submitted_name=agency['name']
)
if len(agency['locations']) > 0:
first_location: dict[str, Any] = agency['locations'][0]
mai.state = first_location['state']
mai.county = first_location['county']
mai.locality = first_location['locality']
matches.append(mai)

return MatchAgencyResponse(
status=MatchAgencyResponseStatus(response_info.data["status"]),
matches=matches
)

async def is_url_duplicate(
self,
url_to_check: str
Expand Down
Empty file.
11 changes: 0 additions & 11 deletions src/external/pdap/dtos/match_agency/post.py

This file was deleted.

11 changes: 0 additions & 11 deletions src/external/pdap/dtos/match_agency/response.py

This file was deleted.

6 changes: 0 additions & 6 deletions src/external/pdap/enums.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
from enum import Enum


class MatchAgencyResponseStatus(Enum):
EXACT_MATCH = "Exact Match"
PARTIAL_MATCH = "Partial Matches"
NO_MATCH = "No Match"


class ApprovalStatus(Enum):
APPROVED = "approved"
REJECTED = "rejected"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from unittest.mock import AsyncMock

import pytest

from src.collectors.enums import CollectorType
Expand All @@ -9,19 +7,16 @@
from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask
from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion
from src.external.pdap.enums import MatchAgencyResponseStatus
from src.core.tasks.url.operators.agency_identification.subtasks.impl.ckan_.core import CKANAgencyIDSubtaskOperator
from src.core.enums import SuggestionType
from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse
from src.external.pdap.dtos.match_agency.post import MatchAgencyInfo
from tests.helpers.asserts import assert_task_run_success
from tests.helpers.data_creator.core import DBDataCreator


@pytest.mark.asyncio
async def test_ckan_subtask(
operator: AgencyIdentificationTaskOperator,
db_data_creator: DBDataCreator
db_data_creator: DBDataCreator,
test_agency_id: int,
test_agency_id_2: int
):
# Test that ckan subtask correctly sends agency id to
# CKANAPIInterface, sends resultant agency name to
Expand Down Expand Up @@ -53,25 +48,6 @@ async def test_ckan_subtask(
assert await operator.meets_task_prerequisites()
assert operator._subtask == AutoAgencyIDSubtaskType.CKAN

pdap_client_mock = operator.loader._pdap_client
pdap_client_mock.match_agency.return_value = MatchAgencyResponse(
status=MatchAgencyResponseStatus.PARTIAL_MATCH,
matches=[
MatchAgencyInfo(
id=1,
submitted_name="Mock Agency Name",
),
MatchAgencyInfo(
id=2,
submitted_name="Another Mock Agency Name",
)
]
)

# Create agencies
await db_data_creator.create_agency(1)
await db_data_creator.create_agency(2)

# Run the operator
run_info: TaskOperatorRunInfo = await operator.run_task()
assert_task_run_success(run_info)
Expand All @@ -92,9 +68,9 @@ async def test_ckan_subtask(
AgencyIDSubtaskSuggestion
)
assert len(suggestions) == 2
assert {suggestion.confidence for suggestion in suggestions} == {50}
assert {suggestion.agency_id for suggestion in suggestions} == {1, 2}
assert {suggestion.agency_id for suggestion in suggestions} == {
test_agency_id,
test_agency_id_2
}
assert {suggestion.subtask_id for suggestion in suggestions} == {subtask_id}

# Assert methods called as expected
pdap_client_mock.match_agency.assert_called_once_with(name="Test Agency")
Loading