Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 2 additions & 2 deletions alembic/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from sqlalchemy import engine_from_config
from sqlalchemy import pool

from src.db.helper_functions import get_postgres_connection_string
from src.db.models import Base
from src.db.helpers import get_postgres_connection_string
from src.db.models.templates import Base

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
Expand Down
2 changes: 1 addition & 1 deletion apply_migrations.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from alembic import command
from alembic.config import Config

from src.db.helper_functions import get_postgres_connection_string
from src.db.helpers import get_postgres_connection_string

def apply_migrations():
print("Applying migrations...")
Expand Down
8 changes: 1 addition & 7 deletions src/api/dependencies.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
from src.core.AsyncCore import AsyncCore
from src.core.SourceCollectorCore import SourceCollectorCore


def get_core() -> SourceCollectorCore:
from src.api.main import app
return app.state.core
from src.core.core import AsyncCore

Check warning on line 1 in src/api/dependencies.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/dependencies.py#L1 <100>

Missing docstring in public module
Raw output
./src/api/dependencies.py:1:1: D100 Missing docstring in public module


def get_async_core() -> AsyncCore:
Expand Down
File renamed without changes.
8 changes: 8 additions & 0 deletions src/api/endpoints/annotate/dtos/agency/post.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from typing import Optional

Check warning on line 1 in src/api/endpoints/annotate/dtos/agency/post.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/dtos/agency/post.py#L1 <100>

Missing docstring in public module
Raw output
./src/api/endpoints/annotate/dtos/agency/post.py:1:1: D100 Missing docstring in public module

from pydantic import BaseModel


class URLAgencyAnnotationPostInfo(BaseModel):

Check warning on line 6 in src/api/endpoints/annotate/dtos/agency/post.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/dtos/agency/post.py#L6 <101>

Missing docstring in public class
Raw output
./src/api/endpoints/annotate/dtos/agency/post.py:6:1: D101 Missing docstring in public class
is_new: bool = False
suggested_agency: Optional[int] = None
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pydantic import BaseModel

from src.core.enums import SuggestionType
from src.html_tag_collector.DataClassTags import ResponseHTMLInfo
from src.core.tasks.operators.url_html.scraper.parser.dtos.response_html import ResponseHTMLInfo

class GetNextURLForAgencyAgencyInfo(BaseModel):
suggestion_type: SuggestionType
Expand All @@ -24,6 +24,3 @@ class GetNextURLForAgencyAnnotationInnerResponse(BaseModel):
class GetNextURLForAgencyAnnotationResponse(BaseModel):
next_annotation: Optional[GetNextURLForAgencyAnnotationInnerResponse]

class URLAgencyAnnotationPostInfo(BaseModel):
is_new: bool = False
suggested_agency: Optional[int] = None
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic import BaseModel, model_validator

from src.core.DTOs.GetNextURLForAgencyAnnotationResponse import URLAgencyAnnotationPostInfo
from src.api.endpoints.annotate.dtos.agency.post import URLAgencyAnnotationPostInfo
from src.core.enums import RecordType, SuggestedStatus
from src.core.exceptions import FailedValidationException

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

from pydantic import Field, BaseModel

from src.core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAgencyInfo
from src.api.endpoints.annotate.dtos.agency.response import GetNextURLForAgencyAgencyInfo
from src.core.enums import RecordType
from src.html_tag_collector.DataClassTags import ResponseHTMLInfo
from src.core.tasks.operators.url_html.scraper.parser.dtos.response_html import ResponseHTMLInfo


class GetNextURLForAllAnnotationInnerResponse(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

from pydantic import Field, BaseModel

from src.db.DTOs.URLMapping import URLMapping
from src.db.dtos.url_mapping import URLMapping
from src.core.enums import RecordType
from src.html_tag_collector.DataClassTags import ResponseHTMLInfo
from src.core.tasks.operators.url_html.scraper.parser.dtos.response_html import ResponseHTMLInfo


class GetNextRecordTypeAnnotationResponseInfo(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from pydantic import BaseModel, Field

from src.db.DTOs.URLMapping import URLMapping
from src.html_tag_collector.DataClassTags import ResponseHTMLInfo
from src.db.dtos.url_mapping import URLMapping
from src.core.tasks.operators.url_html.scraper.parser.dtos.response_html import ResponseHTMLInfo


class GetNextRelevanceAnnotationResponseInfo(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@
from fastapi import APIRouter, Depends, Path, Query

from src.api.dependencies import get_async_core
from src.core.AsyncCore import AsyncCore
from src.core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo
from src.core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo
from src.core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo
from src.core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \
URLAgencyAnnotationPostInfo
from src.core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse
from src.core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo
from src.core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo
from src.security_manager.SecurityManager import get_access_info, AccessInfo
from src.api.endpoints.annotate.dtos.agency.post import URLAgencyAnnotationPostInfo
from src.api.endpoints.annotate.dtos.agency.response import GetNextURLForAgencyAnnotationResponse
from src.api.endpoints.annotate.dtos.all.post import AllAnnotationPostInfo
from src.api.endpoints.annotate.dtos.all.response import GetNextURLForAllAnnotationResponse
from src.api.endpoints.annotate.dtos.record_type.post import RecordTypeAnnotationPostInfo
from src.api.endpoints.annotate.dtos.record_type.response import GetNextRecordTypeAnnotationResponseOuterInfo
from src.api.endpoints.annotate.dtos.relevance.post import RelevanceAnnotationPostInfo
from src.api.endpoints.annotate.dtos.relevance.response import GetNextRelevanceAnnotationResponseOuterInfo
from src.core.core import AsyncCore
from src.security.manager import get_access_info
from src.security.dtos.access_info import AccessInfo

annotate_router = APIRouter(
prefix="/annotate",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic import BaseModel

from src.db.DTOs.DuplicateInfo import DuplicateInfo
from src.db.dtos.duplicate_info import DuplicateInfo


class GetDuplicatesByBatchResponse(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pydantic import BaseModel

from src.db.DTOs.LogInfo import LogOutputInfo
from src.db.dtos.log_info import LogOutputInfo


class GetBatchLogsResponse(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pydantic import BaseModel

from src.db.DTOs.BatchInfo import BatchInfo
from src.db.dtos.batch_info import BatchInfo


class GetBatchStatusResponse(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pydantic import BaseModel

from src.db.DTOs.URLInfo import URLInfo
from src.db.dtos.url_info import URLInfo


class GetURLsByBatchResponse(BaseModel):
Expand Down
19 changes: 10 additions & 9 deletions src/api/routes/batch.py → src/api/endpoints/batch/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,17 @@
from fastapi.params import Query, Depends

from src.api.dependencies import get_async_core
from src.db.DTOs.BatchInfo import BatchInfo
from src.collector_manager.enums import CollectorType
from src.core.AsyncCore import AsyncCore
from src.core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse
from src.core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse
from src.core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse
from src.core.DTOs.GetURLsByBatchResponse import GetURLsByBatchResponse
from src.core.DTOs.MessageResponse import MessageResponse
from src.api.endpoints.batch.dtos.get.duplicates import GetDuplicatesByBatchResponse
from src.api.endpoints.batch.dtos.get.logs import GetBatchLogsResponse
from src.api.endpoints.batch.dtos.get.status import GetBatchStatusResponse
from src.api.endpoints.batch.dtos.get.urls import GetURLsByBatchResponse
from src.api.endpoints.batch.dtos.post.abort import MessageResponse
from src.db.dtos.batch_info import BatchInfo
from src.collectors.enums import CollectorType
from src.core.core import AsyncCore
from src.core.enums import BatchStatus
from src.security_manager.SecurityManager import AccessInfo, get_access_info
from src.security.manager import get_access_info
from src.security.dtos.access_info import AccessInfo

batch_router = APIRouter(
prefix="/batch",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from pydantic import BaseModel, Field


class CollectorStartInfo(BaseModel):
batch_id: int = Field(
description="The batch id of the collector"
)
message: str = Field(
description="The status message"
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@
from fastapi.params import Depends

from src.api.dependencies import get_async_core
from src.collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO
from src.collector_manager.enums import CollectorType
from src.core.AsyncCore import AsyncCore
from src.core.DTOs.CollectorStartInfo import CollectorStartInfo
from src.core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO
from src.core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO
from src.security_manager.SecurityManager import AccessInfo, get_access_info
from src.source_collectors.auto_googler.DTOs import AutoGooglerInputDTO
from src.source_collectors.ckan.DTOs import CKANInputDTO
from src.source_collectors.common_crawler.DTOs import CommonCrawlerInputDTO
from src.source_collectors.muckrock.DTOs import MuckrockCountySearchCollectorInputDTO, \
MuckrockAllFOIARequestsCollectorInputDTO, MuckrockSimpleSearchCollectorInputDTO
from src.api.endpoints.collector.dtos.collector_start import CollectorStartInfo
from src.api.endpoints.collector.dtos.manual_batch.post import ManualBatchInputDTO
from src.api.endpoints.collector.dtos.manual_batch.response import ManualBatchResponseDTO
from src.collectors.source_collectors.auto_googler.dtos.input import AutoGooglerInputDTO
from src.collectors.source_collectors.common_crawler.input import CommonCrawlerInputDTO
from src.collectors.source_collectors.example.dtos.input import ExampleInputDTO
from src.collectors.enums import CollectorType
from src.core.core import AsyncCore
from src.security.manager import get_access_info
from src.security.dtos.access_info import AccessInfo
from src.collectors.source_collectors.ckan.dtos.input import CKANInputDTO
from src.collectors.source_collectors.muckrock.collectors.all_foia.dto import MuckrockAllFOIARequestsCollectorInputDTO
from src.collectors.source_collectors.muckrock.collectors.county.dto import MuckrockCountySearchCollectorInputDTO
from src.collectors.source_collectors.muckrock.collectors.simple.dto import MuckrockSimpleSearchCollectorInputDTO

collector_router = APIRouter(
prefix="/collector",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic import BaseModel

from src.collector_manager.enums import CollectorType
from src.collectors.enums import CollectorType


class GetMetricsBatchesAggregatedInnerResponseDTO(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic import BaseModel

from src.collector_manager.enums import CollectorType
from src.collectors.enums import CollectorType
from src.core.enums import BatchStatus


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
from fastapi.params import Query, Depends

from src.api.dependencies import get_async_core
from src.core.AsyncCore import AsyncCore
from src.core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO
from src.core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO
from src.core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO
from src.core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO
from src.core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO
from src.core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO
from src.security_manager.SecurityManager import AccessInfo, get_access_info
from src.api.endpoints.metrics.dtos.get.backlog import GetMetricsBacklogResponseDTO
from src.api.endpoints.metrics.dtos.get.batches.aggregated import GetMetricsBatchesAggregatedResponseDTO
from src.api.endpoints.metrics.dtos.get.batches.breakdown import GetMetricsBatchesBreakdownResponseDTO
from src.api.endpoints.metrics.dtos.get.urls.aggregated import GetMetricsURLsAggregatedResponseDTO
from src.api.endpoints.metrics.dtos.get.urls.breakdown.pending import GetMetricsURLsBreakdownPendingResponseDTO
from src.api.endpoints.metrics.dtos.get.urls.breakdown.submitted import GetMetricsURLsBreakdownSubmittedResponseDTO
from src.core.core import AsyncCore
from src.security.manager import get_access_info
from src.security.dtos.access_info import AccessInfo

metrics_router = APIRouter(
prefix="/metrics",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,10 @@
from enum import Enum
from typing import Optional

from pydantic import BaseModel, Field
from pydantic import Field

from src.api.endpoints.review.dtos.base import FinalReviewBaseInfo
from src.core.enums import RecordType

class FinalReviewBaseInfo(BaseModel):
url_id: int = Field(
title="The id of the URL."
)

class RejectionReason(Enum):
NOT_RELEVANT = "NOT_RELEVANT"
BROKEN_PAGE_404 = "BROKEN_PAGE"
INDIVIDUAL_RECORD = "INDIVIDUAL_RECORD"

class FinalReviewRejectionInfo(FinalReviewBaseInfo):
rejection_reason: RejectionReason = RejectionReason.NOT_RELEVANT

class FinalReviewApprovalInfo(FinalReviewBaseInfo):
record_type: Optional[RecordType] = Field(
Expand Down Expand Up @@ -54,4 +42,3 @@ class FinalReviewApprovalInfo(FinalReviewBaseInfo):
"If none, defers to an existing supplying entity only if that exists.",
default=None
)

7 changes: 7 additions & 0 deletions src/api/endpoints/review/dtos/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from pydantic import BaseModel, Field

Check warning on line 1 in src/api/endpoints/review/dtos/base.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/review/dtos/base.py#L1 <100>

Missing docstring in public module
Raw output
./src/api/endpoints/review/dtos/base.py:1:1: D100 Missing docstring in public module


class FinalReviewBaseInfo(BaseModel):

Check warning on line 4 in src/api/endpoints/review/dtos/base.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/review/dtos/base.py#L4 <101>

Missing docstring in public class
Raw output
./src/api/endpoints/review/dtos/base.py:4:1: D101 Missing docstring in public class
url_id: int = Field(
title="The id of the URL."
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

from pydantic import BaseModel, Field

from src.core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAgencyInfo
from src.api.endpoints.annotate.dtos.agency.response import GetNextURLForAgencyAgencyInfo
from src.core.enums import RecordType, SuggestedStatus
from src.html_tag_collector.DataClassTags import ResponseHTMLInfo
from src.core.tasks.operators.url_html.scraper.parser.dtos.response_html import ResponseHTMLInfo

class FinalReviewAnnotationRelevantInfo(BaseModel):
auto: Optional[bool] = Field(title="Whether the auto-labeler has marked the URL as relevant")
Expand Down
6 changes: 6 additions & 0 deletions src/api/endpoints/review/dtos/reject.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from src.api.endpoints.review.dtos.base import FinalReviewBaseInfo

Check warning on line 1 in src/api/endpoints/review/dtos/reject.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/review/dtos/reject.py#L1 <100>

Missing docstring in public module
Raw output
./src/api/endpoints/review/dtos/reject.py:1:1: D100 Missing docstring in public module
from src.api.endpoints.review.enums import RejectionReason


class FinalReviewRejectionInfo(FinalReviewBaseInfo):

Check warning on line 5 in src/api/endpoints/review/dtos/reject.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/review/dtos/reject.py#L5 <101>

Missing docstring in public class
Raw output
./src/api/endpoints/review/dtos/reject.py:5:1: D101 Missing docstring in public class
rejection_reason: RejectionReason = RejectionReason.NOT_RELEVANT
7 changes: 7 additions & 0 deletions src/api/endpoints/review/enums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from enum import Enum

Check warning on line 1 in src/api/endpoints/review/enums.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/review/enums.py#L1 <100>

Missing docstring in public module
Raw output
./src/api/endpoints/review/enums.py:1:1: D100 Missing docstring in public module


class RejectionReason(Enum):

Check warning on line 4 in src/api/endpoints/review/enums.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/review/enums.py#L4 <101>

Missing docstring in public class
Raw output
./src/api/endpoints/review/enums.py:4:1: D101 Missing docstring in public class
NOT_RELEVANT = "NOT_RELEVANT"
BROKEN_PAGE_404 = "BROKEN_PAGE"
INDIVIDUAL_RECORD = "INDIVIDUAL_RECORD"
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
from fastapi import APIRouter, Depends, Query

from src.api.dependencies import get_async_core
from src.core.AsyncCore import AsyncCore
from src.core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewRejectionInfo
from src.core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewOuterResponse
from src.security_manager.SecurityManager import AccessInfo, require_permission, Permissions
from src.api.endpoints.review.dtos.approve import FinalReviewApprovalInfo
from src.api.endpoints.review.dtos.get import GetNextURLForFinalReviewOuterResponse
from src.api.endpoints.review.dtos.reject import FinalReviewRejectionInfo
from src.core.core import AsyncCore
from src.security.manager import require_permission
from src.security.dtos.access_info import AccessInfo
from src.security.enums import Permissions

review_router = APIRouter(
prefix="/review",
Expand Down
3 changes: 2 additions & 1 deletion src/api/routes/root.py → src/api/endpoints/root.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from fastapi import APIRouter, Query, Depends

from src.security_manager.SecurityManager import AccessInfo, get_access_info
from src.security.manager import get_access_info
from src.security.dtos.access_info import AccessInfo

root_router = APIRouter(prefix="", tags=["root"])

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from fastapi import APIRouter, Query, Depends

from src.api.dependencies import get_async_core
from src.core.AsyncCore import AsyncCore
from src.core.DTOs.SearchURLResponse import SearchURLResponse
from src.security_manager.SecurityManager import get_access_info, AccessInfo
from src.api.endpoints.search.dtos.response import SearchURLResponse
from src.core.core import AsyncCore
from src.security.manager import get_access_info
from src.security.dtos.access_info import AccessInfo

search_router = APIRouter(prefix="/search", tags=["search"])

Expand Down
Empty file.
Empty file.
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

from pydantic import BaseModel

from src.db.DTOs.URLErrorInfos import URLErrorPydanticInfo
from src.db.DTOs.URLInfo import URLInfo
from src.db.dtos.url_error_info import URLErrorPydanticInfo
from src.db.dtos.url_info import URLInfo
from src.db.enums import TaskType
from src.core.enums import BatchStatus

Expand Down
Loading