Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 42 additions & 20 deletions api/routes/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

from api.dependencies import get_async_core
from core.AsyncCore import AsyncCore
from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo
from core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo
from core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo
from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \
URLAgencyAnnotationPostInfo
from core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse
from core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo
from core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo
from security_manager.SecurityManager import get_access_info, AccessInfo
Expand All @@ -18,6 +20,11 @@
responses={404: {"description": "Not found"}},
)

batch_query = Query(
description="The batch id of the next URL to get. "
"If not specified, defaults to first qualifying URL",
default=None
)

@annotate_router.get("/relevance")
async def get_next_url_for_relevance_annotation(
Expand All @@ -40,10 +47,7 @@
url_id: int = Path(description="The URL id to annotate"),
async_core: AsyncCore = Depends(get_async_core),
access_info: AccessInfo = Depends(get_access_info),
batch_id: Optional[int] = Query(
description="The batch id of the next URL to get. "
"If not specified, defaults to first qualifying URL",
default=None),
batch_id: Optional[int] = batch_query
) -> GetNextRelevanceAnnotationResponseOuterInfo:
"""
Post URL annotation and get next URL to annotate
Expand All @@ -62,10 +66,7 @@
async def get_next_url_for_record_type_annotation(
access_info: AccessInfo = Depends(get_access_info),
async_core: AsyncCore = Depends(get_async_core),
batch_id: Optional[int] = Query(
description="The batch id of the next URL to get. "
"If not specified, defaults to first qualifying URL",
default=None),
batch_id: Optional[int] = batch_query
) -> GetNextRecordTypeAnnotationResponseOuterInfo:
return await async_core.get_next_url_for_record_type_annotation(
user_id=access_info.user_id,
Expand All @@ -78,10 +79,7 @@
url_id: int = Path(description="The URL id to annotate"),
async_core: AsyncCore = Depends(get_async_core),
access_info: AccessInfo = Depends(get_access_info),
batch_id: Optional[int] = Query(
description="The batch id of the next URL to get. "
"If not specified, defaults to first qualifying URL",
default=None),
batch_id: Optional[int] = batch_query
) -> GetNextRecordTypeAnnotationResponseOuterInfo:
"""
Post URL annotation and get next URL to annotate
Expand All @@ -100,10 +98,7 @@
async def get_next_url_for_agency_annotation(
access_info: AccessInfo = Depends(get_access_info),
async_core: AsyncCore = Depends(get_async_core),
batch_id: Optional[int] = Query(
description="The batch id of the next URL to get. "
"If not specified, defaults to first qualifying URL",
default=None),
batch_id: Optional[int] = batch_query
) -> GetNextURLForAgencyAnnotationResponse:
return await async_core.get_next_url_agency_for_annotation(
user_id=access_info.user_id,
Expand All @@ -116,10 +111,7 @@
agency_annotation_post_info: URLAgencyAnnotationPostInfo,
async_core: AsyncCore = Depends(get_async_core),
access_info: AccessInfo = Depends(get_access_info),
batch_id: Optional[int] = Query(
description="The batch id of the next URL to get. "
"If not specified, defaults to first qualifying URL",
default=None),
batch_id: Optional[int] = batch_query
) -> GetNextURLForAgencyAnnotationResponse:
"""
Post URL annotation and get next URL to annotate
Expand All @@ -133,3 +125,33 @@
user_id=access_info.user_id,
batch_id=batch_id
)

@annotate_router.get("/all")
async def get_next_url_for_all_annotations(

Check warning on line 130 in api/routes/annotate.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] api/routes/annotate.py#L130 <103>

Missing docstring in public function
Raw output
./api/routes/annotate.py:130:1: D103 Missing docstring in public function
access_info: AccessInfo = Depends(get_access_info),

Check warning on line 131 in api/routes/annotate.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] api/routes/annotate.py#L131 <100>

Unused argument 'access_info'
Raw output
./api/routes/annotate.py:131:9: U100 Unused argument 'access_info'
async_core: AsyncCore = Depends(get_async_core),
batch_id: Optional[int] = batch_query
) -> GetNextURLForAllAnnotationResponse:
return await async_core.get_next_url_for_all_annotations(
batch_id=batch_id
)

@annotate_router.post("/all/{url_id}")
async def annotate_url_for_all_annotations_and_get_next_url(
url_id: int,
all_annotation_post_info: AllAnnotationPostInfo,
async_core: AsyncCore = Depends(get_async_core),
access_info: AccessInfo = Depends(get_access_info),
batch_id: Optional[int] = batch_query
) -> GetNextURLForAllAnnotationResponse:
"""
Post URL annotation and get next URL to annotate
"""
await async_core.submit_url_for_all_annotations(
user_id=access_info.user_id,
url_id=url_id,
post_info=all_annotation_post_info
)
return await async_core.get_next_url_for_all_annotations(
batch_id=batch_id
)

Check warning on line 157 in api/routes/annotate.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] api/routes/annotate.py#L157 <292>

no newline at end of file
Raw output
./api/routes/annotate.py:157:6: W292 no newline at end of file
198 changes: 146 additions & 52 deletions collector_db/AsyncDatabaseClient.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
from typing import Optional, Type, Any, List

from fastapi import HTTPException
from sqlalchemy import select, exists, func, case, desc, Select, not_, and_, or_, update, Delete, Insert, asc, delete
from sqlalchemy import select, exists, func, case, desc, Select, not_, and_, update, asc, delete
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
from sqlalchemy.orm import selectinload, joinedload, QueryableAttribute, aliased
from sqlalchemy.sql.functions import coalesce
from starlette import status

from collector_db.ConfigManager import ConfigManager
Expand All @@ -23,18 +22,20 @@
from collector_db.DTOs.URLMapping import URLMapping
from collector_db.StatementComposer import StatementComposer
from collector_db.constants import PLACEHOLDER_AGENCY_NAME
from collector_db.enums import URLMetadataAttributeType, TaskType
from collector_db.helper_functions import get_postgres_connection_string
from collector_db.enums import TaskType
from collector_db.models import URL, URLErrorInfo, URLHTMLContent, Base, \
RootURL, Task, TaskError, LinkTaskURL, Batch, Agency, AutomatedUrlAgencySuggestion, \
UserUrlAgencySuggestion, AutoRelevantSuggestion, AutoRecordTypeSuggestion, UserRelevantSuggestion, \
UserRecordTypeSuggestion, ReviewingUserURL, URLOptionalDataSourceMetadata, ConfirmedURLAgency, Duplicate, Log
from collector_manager.enums import URLStatus, CollectorType
from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo
from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo
from core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseInfo
from core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseInfo
from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \
GetNextURLForAgencyAgencyInfo, GetNextURLForAgencyAnnotationInnerResponse
from core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse, \
GetNextURLForAllAnnotationInnerResponse
from core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewResponse, FinalReviewAnnotationInfo, \
FinalReviewOptionalMetadata
from core.DTOs.GetTasksResponse import GetTasksResponse, GetTasksResponseTaskInfo
Expand Down Expand Up @@ -129,7 +130,6 @@
session: AsyncSession,
user_suggestion_model_to_exclude: UserSuggestionModel,
auto_suggestion_relationship: QueryableAttribute,
user_id: int,
batch_id: Optional[int],
check_if_annotated_not_relevant: bool = False
) -> URL:
Expand All @@ -140,14 +140,7 @@
.where(URL.outcome == URLStatus.PENDING.value)
# URL must not have user suggestion
.where(
not_(
exists(
select(user_suggestion_model_to_exclude)
.where(
user_suggestion_model_to_exclude.url_id == URL.id,
)
)
)
StatementComposer.user_suggestion_not_exists(user_suggestion_model_to_exclude)
)
)

Expand Down Expand Up @@ -213,7 +206,6 @@
session,
user_suggestion_model_to_exclude=UserRelevantSuggestion,
auto_suggestion_relationship=URL.auto_relevant_suggestion,
user_id=user_id,
batch_id=batch_id
)
if url is None:
Expand Down Expand Up @@ -254,7 +246,6 @@
session,
user_suggestion_model_to_exclude=UserRecordTypeSuggestion,
auto_suggestion_relationship=URL.auto_record_type_suggestion,
user_id=user_id,
batch_id=batch_id,
check_if_annotated_not_relevant=True
)
Expand Down Expand Up @@ -823,9 +814,7 @@
select(URL.id, URL.url)
# Must not have confirmed agencies
.where(
and_(
URL.outcome == URLStatus.PENDING.value
)
URL.outcome == URLStatus.PENDING.value
)
)

Expand All @@ -838,9 +827,7 @@
.where(
~exists(
select(UserUrlAgencySuggestion).
where(
UserUrlAgencySuggestion.url_id == URL.id
).
where(UserUrlAgencySuggestion.url_id == URL.id).
correlate(URL)
)
)
Expand Down Expand Up @@ -885,37 +872,8 @@
result = results[0]
url_id = result[0]
url = result[1]
# Get relevant autosuggestions and agency info, if an associated agency exists
statement = (
select(
AutomatedUrlAgencySuggestion.agency_id,
AutomatedUrlAgencySuggestion.is_unknown,
Agency.name,
Agency.state,
Agency.county,
Agency.locality
)
.join(Agency, isouter=True)
.where(AutomatedUrlAgencySuggestion.url_id == url_id)
)
raw_autosuggestions = await session.execute(statement)
autosuggestions = raw_autosuggestions.all()
agency_suggestions = []
for autosuggestion in autosuggestions:
agency_id = autosuggestion[0]
is_unknown = autosuggestion[1]
name = autosuggestion[2]
state = autosuggestion[3]
county = autosuggestion[4]
locality = autosuggestion[5]
agency_suggestions.append(GetNextURLForAgencyAgencyInfo(
suggestion_type=SuggestionType.AUTO_SUGGESTION if not is_unknown else SuggestionType.UNKNOWN,
pdap_agency_id=agency_id,
agency_name=name,
state=state,
county=county,
locality=locality
))

agency_suggestions = await self.get_agency_suggestions(session, url_id=url_id)

# Get HTML content info
html_content_infos = await self.get_html_content_info(url_id)
Expand Down Expand Up @@ -1626,5 +1584,141 @@
)
await session.execute(statement)

async def get_agency_suggestions(self, session, url_id: int) -> List[GetNextURLForAgencyAgencyInfo]:

Check warning on line 1587 in collector_db/AsyncDatabaseClient.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] collector_db/AsyncDatabaseClient.py#L1587 <102>

Missing docstring in public method
Raw output
./collector_db/AsyncDatabaseClient.py:1587:1: D102 Missing docstring in public method
# Get relevant autosuggestions and agency info, if an associated agency exists

statement = (
select(
AutomatedUrlAgencySuggestion.agency_id,
AutomatedUrlAgencySuggestion.is_unknown,
Agency.name,
Agency.state,
Agency.county,
Agency.locality
)
.join(Agency, isouter=True)
.where(AutomatedUrlAgencySuggestion.url_id == url_id)
)
raw_autosuggestions = await session.execute(statement)
autosuggestions = raw_autosuggestions.all()
agency_suggestions = []
for autosuggestion in autosuggestions:
agency_id = autosuggestion[0]
is_unknown = autosuggestion[1]
name = autosuggestion[2]
state = autosuggestion[3]
county = autosuggestion[4]
locality = autosuggestion[5]
agency_suggestions.append(GetNextURLForAgencyAgencyInfo(
suggestion_type=SuggestionType.AUTO_SUGGESTION if not is_unknown else SuggestionType.UNKNOWN,
pdap_agency_id=agency_id,
agency_name=name,
state=state,
county=county,
locality=locality
))
return agency_suggestions

@session_manager
async def get_next_url_for_all_annotations(self, session, batch_id: Optional[int] = None) -> GetNextURLForAllAnnotationResponse:

Check warning on line 1623 in collector_db/AsyncDatabaseClient.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] collector_db/AsyncDatabaseClient.py#L1623 <102>

Missing docstring in public method
Raw output
./collector_db/AsyncDatabaseClient.py:1623:1: D102 Missing docstring in public method
query = (
Select(URL)
.where(
and_(
URL.outcome == URLStatus.PENDING.value,
StatementComposer.user_suggestion_not_exists(UserUrlAgencySuggestion),
StatementComposer.user_suggestion_not_exists(UserRecordTypeSuggestion),
StatementComposer.user_suggestion_not_exists(UserRelevantSuggestion),
)
)
)
if batch_id is not None:
query = query.where(URL.batch_id == batch_id)

load_options = [
URL.html_content,
URL.automated_agency_suggestions,
URL.auto_relevant_suggestion,
URL.auto_record_type_suggestion
]
select_in_loads = [selectinload(load_option) for load_option in load_options]

# Add load options
query = query.options(
*select_in_loads
)

query = query.order_by(URL.id.asc()).limit(1)
raw_results = await session.execute(query)
url = raw_results.scalars().one_or_none()
if url is None:
return GetNextURLForAllAnnotationResponse(
next_annotation=None
)

html_response_info = DTOConverter.html_content_list_to_html_response_info(
url.html_content
)

if url.auto_relevant_suggestion is not None:
auto_relevant = url.auto_relevant_suggestion.relevant
else:
auto_relevant = None

if url.auto_record_type_suggestion is not None:
auto_record_type = url.auto_record_type_suggestion.record_type
else:
auto_record_type = None

agency_suggestions = await self.get_agency_suggestions(session, url_id=url.id)

return GetNextURLForAllAnnotationResponse(
next_annotation=GetNextURLForAllAnnotationInnerResponse(
url_id=url.id,
url=url.url,
html_info=html_response_info,
suggested_relevant=auto_relevant,
suggested_record_type=auto_record_type,
agency_suggestions=agency_suggestions
)
)

@session_manager
async def add_all_annotations_to_url(

Check warning on line 1687 in collector_db/AsyncDatabaseClient.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] collector_db/AsyncDatabaseClient.py#L1687 <102>

Missing docstring in public method
Raw output
./collector_db/AsyncDatabaseClient.py:1687:1: D102 Missing docstring in public method
self,
session,
user_id: int,
url_id: int,
post_info: AllAnnotationPostInfo
):

# Add relevant annotation
relevant_suggestion = UserRelevantSuggestion(
url_id=url_id,
user_id=user_id,
relevant=post_info.is_relevant
)
session.add(relevant_suggestion)

# If not relevant, do nothing else
if not post_info.is_relevant:
return

record_type_suggestion = UserRecordTypeSuggestion(
url_id=url_id,
user_id=user_id,
record_type=post_info.record_type.value
)
session.add(record_type_suggestion)

agency_suggestion = UserUrlAgencySuggestion(
url_id=url_id,
user_id=user_id,
agency_id=post_info.agency.suggested_agency,
is_new=post_info.agency.is_new
)
session.add(agency_suggestion)




Loading