Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
[pytest]
timeout = 300
asyncio_default_fixture_loop_scope=function
asyncio_default_fixture_loop_scope=function
markers =
manual: mark test as manual-only (excluded from default test runs)
6 changes: 4 additions & 2 deletions src/core/tasks/operators/url_html/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,14 @@
await self.get_raw_html_data_for_urls(tdos)
success_subset, error_subset = await self.separate_success_and_error_subsets(tdos)
non_404_error_subset, is_404_error_subset = await self.separate_error_and_404_subsets(error_subset)
await self.process_html_data(success_subset)
await self.update_database(is_404_error_subset, non_404_error_subset, success_subset)

async def update_database(self, is_404_error_subset, non_404_error_subset, success_subset):

Check warning on line 43 in src/core/tasks/operators/url_html/core.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/operators/url_html/core.py#L43 <102>

Missing docstring in public method
Raw output
./src/core/tasks/operators/url_html/core.py:43:1: D102 Missing docstring in public method
await self.update_errors_in_database(non_404_error_subset)
await self.update_404s_in_database(is_404_error_subset)
await self.process_html_data(success_subset)
await self.update_html_data_in_database(success_subset)


async def get_just_urls(self, tdos: list[UrlHtmlTDO]):
return [task_info.url_info.url for task_info in tdos]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ async def fetch_and_render(self, rr: RequestResources, url: str) -> Optional[URL
if simple_response.content_type != HTML_CONTENT_TYPE:
return simple_response

await self.get_dynamic_html_content(rr, url)
return await self.get_dynamic_html_content(rr, url)

async def get_dynamic_html_content(self, rr, url):
# For HTML responses, attempt to load the page to check for dynamic html content
Expand Down
27 changes: 1 addition & 26 deletions tests/automated/integration/api/conftest.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,18 @@
import asyncio
from dataclasses import dataclass
from typing import Generator, Any, AsyncGenerator
from unittest.mock import AsyncMock

import pytest
import pytest_asyncio
from starlette.testclient import TestClient

from src.api.endpoints.batch.dtos.get.status import GetBatchStatusResponse
from src.api.endpoints.review.routes import requires_final_review_permission
from src.api.main import app
from src.core.core import AsyncCore
from src.core.enums import BatchStatus
from src.security.manager import get_access_info
from src.security.dtos.access_info import AccessInfo
from src.security.enums import Permissions
from tests.automated.integration.api.helpers.RequestValidator import RequestValidator
from tests.helpers.db_data_creator import DBDataCreator


@dataclass
class APITestHelper:
request_validator: RequestValidator
async_core: AsyncCore
db_data_creator: DBDataCreator

def adb_client(self):
return self.db_data_creator.adb_client

async def wait_for_all_batches_to_complete(self):
for i in range(20):
data: GetBatchStatusResponse = self.request_validator.get_batch_statuses(
status=BatchStatus.IN_PROCESS
)
if len(data.results) == 0:
return
print("Waiting...")
await asyncio.sleep(0.1)
raise ValueError("Batches did not complete in expected time")
from tests.helpers.api_test_helper import APITestHelper

MOCK_USER_ID = 1

Expand Down
2 changes: 1 addition & 1 deletion tests/automated/integration/api/test_task.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from src.db.enums import TaskType
from tests.automated.integration.api.conftest import APITestHelper
from tests.helpers.api_test_helper import APITestHelper


async def task_setup(ath: APITestHelper) -> int:
Expand Down
6 changes: 4 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Any, Generator

Check warning on line 1 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/conftest.py#L1 <100>

Missing docstring in public module
Raw output
./tests/conftest.py:1:1: D100 Missing docstring in public module

import pytest
from alembic.config import Config
from sqlalchemy import create_engine, inspect, MetaData
Expand Down Expand Up @@ -97,15 +99,15 @@


@pytest.fixture
def db_client_test(wipe_database) -> DatabaseClient:
def db_client_test(wipe_database) -> Generator[DatabaseClient, Any, None]:

Check warning on line 102 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/conftest.py#L102 <103>

Missing docstring in public function
Raw output
./tests/conftest.py:102:1: D103 Missing docstring in public function

Check warning on line 102 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/conftest.py#L102 <100>

Unused argument 'wipe_database'
Raw output
./tests/conftest.py:102:20: U100 Unused argument 'wipe_database'
# Drop pre-existing table
conn = get_postgres_connection_string()
db_client = DatabaseClient(db_url=conn)
yield db_client
db_client.engine.dispose()

@pytest.fixture
def adb_client_test(wipe_database) -> AsyncDatabaseClient:
def adb_client_test(wipe_database) -> Generator[AsyncDatabaseClient, Any, None]:

Check warning on line 110 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/conftest.py#L110 <103>

Missing docstring in public function
Raw output
./tests/conftest.py:110:1: D103 Missing docstring in public function

Check warning on line 110 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/conftest.py#L110 <100>

Unused argument 'wipe_database'
Raw output
./tests/conftest.py:110:21: U100 Unused argument 'wipe_database'
conn = get_postgres_connection_string(is_async=True)
adb_client = AsyncDatabaseClient(db_url=conn)
yield adb_client
Expand Down
29 changes: 29 additions & 0 deletions tests/helpers/api_test_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import asyncio

Check warning on line 1 in tests/helpers/api_test_helper.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/helpers/api_test_helper.py#L1 <100>

Missing docstring in public module
Raw output
./tests/helpers/api_test_helper.py:1:1: D100 Missing docstring in public module
from dataclasses import dataclass

from src.api.endpoints.batch.dtos.get.status import GetBatchStatusResponse
from src.core.core import AsyncCore
from src.core.enums import BatchStatus
from tests.automated.integration.api.helpers.RequestValidator import RequestValidator
from tests.helpers.db_data_creator import DBDataCreator


@dataclass
class APITestHelper:

Check warning on line 12 in tests/helpers/api_test_helper.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/helpers/api_test_helper.py#L12 <101>

Missing docstring in public class
Raw output
./tests/helpers/api_test_helper.py:12:1: D101 Missing docstring in public class
request_validator: RequestValidator
async_core: AsyncCore
db_data_creator: DBDataCreator

def adb_client(self):

Check warning on line 17 in tests/helpers/api_test_helper.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/helpers/api_test_helper.py#L17 <102>

Missing docstring in public method
Raw output
./tests/helpers/api_test_helper.py:17:1: D102 Missing docstring in public method
return self.db_data_creator.adb_client

async def wait_for_all_batches_to_complete(self):

Check warning on line 20 in tests/helpers/api_test_helper.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/helpers/api_test_helper.py#L20 <102>

Missing docstring in public method
Raw output
./tests/helpers/api_test_helper.py:20:1: D102 Missing docstring in public method
for i in range(20):
data: GetBatchStatusResponse = self.request_validator.get_batch_statuses(
status=BatchStatus.IN_PROCESS
)
if len(data.results) == 0:
return
print("Waiting...")
await asyncio.sleep(0.1)
raise ValueError("Batches did not complete in expected time")
Empty file.
47 changes: 47 additions & 0 deletions tests/manual/core/tasks/test_url_html_task_operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from unittest.mock import patch

Check warning on line 1 in tests/manual/core/tasks/test_url_html_task_operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/manual/core/tasks/test_url_html_task_operator.py#L1 <100>

Missing docstring in public module
Raw output
./tests/manual/core/tasks/test_url_html_task_operator.py:1:1: D100 Missing docstring in public module

Check warning on line 1 in tests/manual/core/tasks/test_url_html_task_operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/manual/core/tasks/test_url_html_task_operator.py#L1 <401>

'unittest.mock.patch' imported but unused
Raw output
./tests/manual/core/tasks/test_url_html_task_operator.py:1:1: F401 'unittest.mock.patch' imported but unused

import pytest

from src.api.endpoints.collector.dtos.manual_batch.post import ManualBatchInputDTO, ManualBatchInnerInputDTO
from src.core.tasks.operators.url_html.core import URLHTMLTaskOperator
from src.core.tasks.operators.url_html.scraper.parser.core import HTMLResponseParser
from src.core.tasks.operators.url_html.scraper.request_interface.core import URLRequestInterface
from src.core.tasks.operators.url_html.scraper.root_url_cache.core import RootURLCache


@pytest.mark.asyncio
@pytest.mark.manual
async def test_url_html_task_operator(

Check warning on line 14 in tests/manual/core/tasks/test_url_html_task_operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/manual/core/tasks/test_url_html_task_operator.py#L14 <103>

Missing docstring in public function
Raw output
./tests/manual/core/tasks/test_url_html_task_operator.py:14:1: D103 Missing docstring in public function
adb_client_test,
):
urls_to_insert = [
"https://www.albanyca.org/departments/fire-department/programs-classes-events",
"https://www.albanyca.gov/Departments/Police-Department/Crime-Mapping",
"https://www.facebook.com/AlbanyPoliceCa/",
"https://www.governmentjobs.com/careers/albanyca/jobs/3395149/police-officer?pagetype=jobOpportunitiesJobs",
"https://www.albanyca.org/",
"https://www.albanyca.gov/Departments/Police-Department",
"https://www.joinalbanypd.us/",
"https://www.albanyca.gov/Departments/Police-Department/Contact-Albany-Police",
"https://www.albanyca.org/departments/police-department/policies-procedures-training-sb978",
"https://www.yelp.com/biz/albany-police-department-albany-3",
]
parser = HTMLResponseParser(
root_url_cache=RootURLCache(
adb_client=adb_client_test
)
)
manual_batch_dto = ManualBatchInputDTO(
name="Test Batch",
entries=[
ManualBatchInnerInputDTO(url=url) for url in urls_to_insert
]
)
await adb_client_test.upload_manual_batch(dto=manual_batch_dto, user_id=1)
operator = URLHTMLTaskOperator(
adb_client=adb_client_test,
url_request_interface=URLRequestInterface(),
html_parser=parser
)
run_info = await operator.run_task(1)

Check warning on line 46 in tests/manual/core/tasks/test_url_html_task_operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/manual/core/tasks/test_url_html_task_operator.py#L46 <841>

local variable 'run_info' is assigned to but never used
Raw output
./tests/manual/core/tasks/test_url_html_task_operator.py:46:5: F841 local variable 'run_info' is assigned to but never used
pass

Check warning on line 47 in tests/manual/core/tasks/test_url_html_task_operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/manual/core/tasks/test_url_html_task_operator.py#L47 <292>

no newline at end of file
Raw output
./tests/manual/core/tasks/test_url_html_task_operator.py:47:9: W292 no newline at end of file
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import pytest

from src.core.tasks.operators.url_html import URLHTMLTaskOperator
from src.core.tasks.operators.url_html.core import URLHTMLTaskOperator
from src.core.tasks.operators.url_html.scraper.parser.core import HTMLResponseParser
from src.core.tasks.operators.url_html.scraper.request_interface.core import URLRequestInterface
from src.core.tasks.operators.url_html.scraper.root_url_cache.core import RootURLCache
from src.db.client.async_ import AsyncDatabaseClient
from src.db.dtos.url_info import URLInfo
from tests.helpers.db_data_creator import DBDataCreator
from src.core.tasks.operators.url_html.scraper import HTMLResponseParser
from src.core.tasks.operators.url_html.scraper.request_interface import URLRequestInterface

URLS = [
"https://pdap.io",
Expand Down Expand Up @@ -71,7 +74,6 @@ async def test_url_html_cycle(
url_infos.append(URLInfo(url=url))
await adb_client.insert_urls(url_infos=url_infos, batch_id=batch_id)


operator = URLHTMLTaskOperator(
adb_client=adb_client,
url_request_interface=URLRequestInterface(),
Expand Down