diff --git a/api/main.py b/api/main.py index ae74c914..1b80716e 100644 --- a/api/main.py +++ b/api/main.py @@ -11,6 +11,7 @@ from api.routes.collector import collector_router from api.routes.review import review_router from api.routes.root import root_router +from api.routes.search import search_router from api.routes.task import task_router from api.routes.url import url_router from collector_db.AsyncDatabaseClient import AsyncDatabaseClient @@ -128,7 +129,8 @@ async def redirect_docs(): annotate_router, url_router, task_router, - review_router + review_router, + search_router ] for router in routers: diff --git a/api/routes/search.py b/api/routes/search.py new file mode 100644 index 00000000..4513bb2f --- /dev/null +++ b/api/routes/search.py @@ -0,0 +1,20 @@ +from fastapi import APIRouter, Query, Depends + +from api.dependencies import get_async_core +from core.AsyncCore import AsyncCore +from core.DTOs.SearchURLResponse import SearchURLResponse +from security_manager.SecurityManager import get_access_info, AccessInfo + +search_router = APIRouter(prefix="/search", tags=["search"]) + + +@search_router.get("/url") +async def search_url( + url: str = Query(description="The URL to search for"), + access_info: AccessInfo = Depends(get_access_info), + async_core: AsyncCore = Depends(get_async_core), +) -> SearchURLResponse: + """ + Search for a URL in the database + """ + return await async_core.search_for_url(url) \ No newline at end of file diff --git a/collector_db/AsyncDatabaseClient.py b/collector_db/AsyncDatabaseClient.py index 52ab2c9c..85d74146 100644 --- a/collector_db/AsyncDatabaseClient.py +++ b/collector_db/AsyncDatabaseClient.py @@ -43,6 +43,7 @@ GetURLsResponseInnerInfo from core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO from core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO +from core.DTOs.SearchURLResponse import SearchURLResponse from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo from core.DTOs.task_data_objects.AgencyIdentificationTDO import AgencyIdentificationTDO from core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmitApprovedURLTDO, SubmittedURLInfo @@ -1778,3 +1779,18 @@ async def upload_manual_batch( duplicate_urls=duplicate_urls ) + @session_manager + async def search_for_url(self, session: AsyncSession, url: str) -> SearchURLResponse: + query = select(URL).where(URL.url == url) + raw_results = await session.execute(query) + url = raw_results.scalars().one_or_none() + if url is None: + return SearchURLResponse( + found=False, + url_id=None + ) + return SearchURLResponse( + found=True, + url_id=url.id + ) + diff --git a/core/AsyncCore.py b/core/AsyncCore.py index 59a892ef..f1d69fb2 100644 --- a/core/AsyncCore.py +++ b/core/AsyncCore.py @@ -25,6 +25,7 @@ from core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO from core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO from core.DTOs.MessageResponse import MessageResponse +from core.DTOs.SearchURLResponse import SearchURLResponse from core.TaskManager import TaskManager from core.enums import BatchStatus, RecordType @@ -282,3 +283,5 @@ async def upload_manual_batch( dto=dto ) + async def search_for_url(self, url: str) -> SearchURLResponse: + return await self.adb_client.search_for_url(url) diff --git a/core/DTOs/SearchURLResponse.py b/core/DTOs/SearchURLResponse.py new file mode 100644 index 00000000..1a46c0be --- /dev/null +++ b/core/DTOs/SearchURLResponse.py @@ -0,0 +1,8 @@ +from typing import Optional + +from pydantic import BaseModel + + +class SearchURLResponse(BaseModel): + found: bool + url_id: Optional[int] = None \ No newline at end of file diff --git a/tests/test_automated/integration/api/helpers/RequestValidator.py b/tests/test_automated/integration/api/helpers/RequestValidator.py index 07de3c95..c2d246f5 100644 --- a/tests/test_automated/integration/api/helpers/RequestValidator.py +++ b/tests/test_automated/integration/api/helpers/RequestValidator.py @@ -30,6 +30,7 @@ from core.DTOs.MessageResponse import MessageResponse from core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo from core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo +from core.DTOs.SearchURLResponse import SearchURLResponse from core.enums import BatchStatus from util.helper_functions import update_if_not_none @@ -385,4 +386,11 @@ async def submit_manual_batch( url="/collector/manual", json=dto.model_dump(mode='json'), ) - return ManualBatchResponseDTO(**data) \ No newline at end of file + return ManualBatchResponseDTO(**data) + + async def search_url(self, url: str) -> SearchURLResponse: + data = self.get( + url=f"/search/url", + params={"url": url} + ) + return SearchURLResponse(**data) \ No newline at end of file diff --git a/tests/test_automated/integration/api/test_search.py b/tests/test_automated/integration/api/test_search.py new file mode 100644 index 00000000..917690fc --- /dev/null +++ b/tests/test_automated/integration/api/test_search.py @@ -0,0 +1,23 @@ +import pytest + +from core.DTOs.SearchURLResponse import SearchURLResponse + + +@pytest.mark.asyncio +async def test_search_url(api_test_helper): + ath = api_test_helper + + # Create a batch with 1 URL + creation_info = await ath.db_data_creator.batch_and_urls(url_count=1, with_html_content=False) + + # Search for that URL and locate it + response: SearchURLResponse = await ath.request_validator.search_url(url=creation_info.urls[0]) + + assert response.found + assert response.url_id == creation_info.url_ids[0] + + # Search for a non-existent URL + response: SearchURLResponse = await ath.request_validator.search_url(url="http://doesnotexist.com") + + assert not response.found + assert response.url_id is None \ No newline at end of file