Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
13 changes: 1 addition & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,14 @@ RUN uv sync --locked --no-dev
RUN playwright install-deps chromium
RUN playwright install chromium


# Copy project files
COPY api ./api
COPY db ./collector_db
COPY collector_manager ./collector_manager
COPY core ./core
COPY html_tag_collector ./html_tag_collector
COPY source_collectors ./source_collectors
COPY util ./util
COPY src ./src
COPY alembic.ini ./alembic.ini
COPY alembic ./alembic
COPY apply_migrations.py ./apply_migrations.py
COPY security_manager ./security_manager
COPY pdap_api_client ./pdap_api_client
COPY execute.sh ./execute.sh
COPY .project-root ./.project-root

COPY llm_api_logic ./llm_api_logic

# Expose the application port
EXPOSE 80

Expand Down
4 changes: 2 additions & 2 deletions alembic/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from sqlalchemy import engine_from_config
from sqlalchemy import pool

from db.helper_functions import get_postgres_connection_string
from db.models import Base
from src.db.helper_functions import get_postgres_connection_string
from src.db.models import Base

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from alembic import op
import sqlalchemy as sa

from db.enums import PGEnum
from src.db.enums import PGEnum

# revision identifiers, used by Alembic.
revision: str = '072b32a45b1c'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from alembic import op
import sqlalchemy as sa
from db.enums import PGEnum
from src.db.enums import PGEnum
# revision identifiers, used by Alembic.
revision: str = '19bf57df581a'
down_revision: Union[str, None] = '072b32a45b1c'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import sqlalchemy as sa
from sqlalchemy import UniqueConstraint

from util.alembic_helpers import switch_enum_type
from src.util.alembic_helpers import switch_enum_type

# revision identifiers, used by Alembic.
revision: str = '33421c0590bb'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from alembic import op
import sqlalchemy as sa

from util.alembic_helpers import switch_enum_type
from src.util.alembic_helpers import switch_enum_type

# revision identifiers, used by Alembic.
revision: str = '6eb8084e2f48'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import Sequence, Union


from util.alembic_helpers import switch_enum_type
from src.util.alembic_helpers import switch_enum_type

# revision identifiers, used by Alembic.
revision: str = '4c70177eba78'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import Sequence, Union


from util.alembic_helpers import switch_enum_type
from src.util.alembic_helpers import switch_enum_type

# revision identifiers, used by Alembic.
revision: str = 'b363794fa4e9'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,7 @@
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa

from util.alembic_helpers import switch_enum_type, alter_enum_value
from src.util.alembic_helpers import alter_enum_value

# revision identifiers, used by Alembic.
revision: str = 'e285e6e7cf71'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa

from util.alembic_helpers import switch_enum_type
from src.util.alembic_helpers import switch_enum_type

# revision identifiers, used by Alembic.
revision: str = '028565b77b9e'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from alembic import op
import sqlalchemy as sa

from util.alembic_helpers import switch_enum_type
from src.util.alembic_helpers import switch_enum_type

# revision identifiers, used by Alembic.
revision: str = '864107b703ae'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from alembic import op
import sqlalchemy as sa

from util.alembic_helpers import switch_enum_type
from src.util.alembic_helpers import switch_enum_type

# revision identifiers, used by Alembic.
revision: str = 'b5f079b6b8cb'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from alembic import op
import sqlalchemy as sa

from util.alembic_helpers import switch_enum_type
from src.util.alembic_helpers import switch_enum_type

# revision identifiers, used by Alembic.
revision: str = '00cc949e0347'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import sqlalchemy as sa
from alembic import op

from db.enums import PGEnum
from src.db.enums import PGEnum

# revision identifiers, used by Alembic.
revision: str = '9afd8a5633c9'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from alembic import op
import sqlalchemy as sa

from db.enums import PGEnum
from src.db.enums import PGEnum

# revision identifiers, used by Alembic.
revision: str = 'd7eb670edaf0'
Expand Down
12 changes: 0 additions & 12 deletions api/dependencies.py

This file was deleted.

2 changes: 1 addition & 1 deletion apply_migrations.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from alembic import command
from alembic.config import Config

from db.helper_functions import get_postgres_connection_string
from src.db.helper_functions import get_postgres_connection_string

def apply_migrations():
print("Applying migrations...")
Expand Down
2 changes: 1 addition & 1 deletion local_database/DockerInfos.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from local_database.DTOs import DockerInfo, DockerfileInfo, HealthCheckInfo, VolumeInfo
from util.helper_functions import get_from_env, project_path
from src.util import get_from_env, project_path


def get_database_docker_info() -> DockerInfo:
Expand Down

This file was deleted.

File renamed without changes.
File renamed without changes.
File renamed without changes.
12 changes: 12 additions & 0 deletions src/api/dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from src.core.AsyncCore import AsyncCore

Check warning on line 1 in src/api/dependencies.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/dependencies.py#L1 <100>

Missing docstring in public module
Raw output
./src/api/dependencies.py:1:1: D100 Missing docstring in public module
from src.core.SourceCollectorCore import SourceCollectorCore


def get_core() -> SourceCollectorCore:

Check warning on line 5 in src/api/dependencies.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/dependencies.py#L5 <103>

Missing docstring in public function
Raw output
./src/api/dependencies.py:5:1: D103 Missing docstring in public function
from src.api.main import app
return app.state.core


def get_async_core() -> AsyncCore:

Check warning on line 10 in src/api/dependencies.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/dependencies.py#L10 <103>

Missing docstring in public function
Raw output
./src/api/dependencies.py:10:1: D103 Missing docstring in public function
from src.api.main import app
return app.state.async_core

Check warning on line 12 in src/api/dependencies.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/dependencies.py#L12 <292>

no newline at end of file
Raw output
./src/api/dependencies.py:12:32: W292 no newline at end of file
44 changes: 22 additions & 22 deletions api/main.py → src/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,29 @@
from fastapi import FastAPI
from starlette.responses import RedirectResponse

from api.routes.annotate import annotate_router
from api.routes.batch import batch_router
from api.routes.collector import collector_router
from api.routes.metrics import metrics_router
from api.routes.review import review_router
from api.routes.root import root_router
from api.routes.search import search_router
from api.routes.task import task_router
from api.routes.url import url_router
from db.AsyncDatabaseClient import AsyncDatabaseClient
from db.DatabaseClient import DatabaseClient
from collector_manager.AsyncCollectorManager import AsyncCollectorManager
from core.AsyncCore import AsyncCore
from core.AsyncCoreLogger import AsyncCoreLogger
from core.EnvVarManager import EnvVarManager
from core.ScheduledTaskManager import AsyncScheduledTaskManager
from core.SourceCollectorCore import SourceCollectorCore
from core.TaskManager import TaskManager
from html_tag_collector.ResponseParser import HTMLResponseParser
from html_tag_collector.RootURLCache import RootURLCache
from html_tag_collector.URLRequestInterface import URLRequestInterface
from src.api.routes.annotate import annotate_router
from src.api.routes.batch import batch_router
from src.api.routes.collector import collector_router
from src.api.routes.metrics import metrics_router
from src.api.routes.review import review_router
from src.api.routes.root import root_router
from src.api.routes.search import search_router
from src.api.routes.task import task_router
from src.api.routes.url import url_router
from src.db.AsyncDatabaseClient import AsyncDatabaseClient
from src.db.DatabaseClient import DatabaseClient
from src.collector_manager.AsyncCollectorManager import AsyncCollectorManager
from src.core.AsyncCore import AsyncCore
from src.core.AsyncCoreLogger import AsyncCoreLogger
from src.core.EnvVarManager import EnvVarManager
from src.core.ScheduledTaskManager import AsyncScheduledTaskManager
from src.core.SourceCollectorCore import SourceCollectorCore
from src.core.TaskManager import TaskManager
from src.html_tag_collector.ResponseParser import HTMLResponseParser
from src.html_tag_collector.RootURLCache import RootURLCache
from src.html_tag_collector.URLRequestInterface import URLRequestInterface
from pdap_access_manager import AccessManager
from pdap_api_client.PDAPClient import PDAPClient
from src.pdap_api_client.PDAPClient import PDAPClient
from discord_poster import DiscordPoster


Expand Down
File renamed without changes.
20 changes: 10 additions & 10 deletions api/routes/annotate.py → src/api/routes/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@

from fastapi import APIRouter, Depends, Path, Query

from api.dependencies import get_async_core
from core.AsyncCore import AsyncCore
from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo
from core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo
from core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo
from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \
from src.api.dependencies import get_async_core
from src.core.AsyncCore import AsyncCore
from src.core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo
from src.core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo
from src.core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo
from src.core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \
URLAgencyAnnotationPostInfo
from core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse
from core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo
from core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo
from security_manager.SecurityManager import get_access_info, AccessInfo
from src.core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse
from src.core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo
from src.core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo
from src.security_manager.SecurityManager import get_access_info, AccessInfo

annotate_router = APIRouter(
prefix="/annotate",
Expand Down
23 changes: 11 additions & 12 deletions api/routes/batch.py → src/api/routes/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,17 @@
from fastapi import Path, APIRouter
from fastapi.params import Query, Depends

from api.dependencies import get_core, get_async_core
from db.DTOs.BatchInfo import BatchInfo
from collector_manager.enums import CollectorType
from core.AsyncCore import AsyncCore
from core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse
from core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse
from core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse
from core.DTOs.GetURLsByBatchResponse import GetURLsByBatchResponse
from core.DTOs.MessageResponse import MessageResponse
from core.SourceCollectorCore import SourceCollectorCore
from core.enums import BatchStatus
from security_manager.SecurityManager import AccessInfo, get_access_info
from src.api.dependencies import get_async_core
from src.db.DTOs.BatchInfo import BatchInfo
from src.collector_manager.enums import CollectorType
from src.core.AsyncCore import AsyncCore
from src.core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse
from src.core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse
from src.core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse
from src.core.DTOs.GetURLsByBatchResponse import GetURLsByBatchResponse
from src.core.DTOs.MessageResponse import MessageResponse
from src.core.enums import BatchStatus
from src.security_manager.SecurityManager import AccessInfo, get_access_info

batch_router = APIRouter(
prefix="/batch",
Expand Down
24 changes: 12 additions & 12 deletions api/routes/collector.py → src/api/routes/collector.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
from fastapi import APIRouter
from fastapi.params import Depends

from api.dependencies import get_async_core
from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO
from collector_manager.enums import CollectorType
from core.AsyncCore import AsyncCore
from core.DTOs.CollectorStartInfo import CollectorStartInfo
from core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO
from core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO
from security_manager.SecurityManager import AccessInfo, get_access_info
from source_collectors.auto_googler.DTOs import AutoGooglerInputDTO
from source_collectors.ckan.DTOs import CKANInputDTO
from source_collectors.common_crawler.DTOs import CommonCrawlerInputDTO
from source_collectors.muckrock.DTOs import MuckrockCountySearchCollectorInputDTO, \
from src.api.dependencies import get_async_core
from src.collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO
from src.collector_manager.enums import CollectorType
from src.core.AsyncCore import AsyncCore
from src.core.DTOs.CollectorStartInfo import CollectorStartInfo
from src.core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO
from src.core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO
from src.security_manager.SecurityManager import AccessInfo, get_access_info
from src.source_collectors.auto_googler.DTOs import AutoGooglerInputDTO
from src.source_collectors.ckan.DTOs import CKANInputDTO
from src.source_collectors.common_crawler.DTOs import CommonCrawlerInputDTO
from src.source_collectors.muckrock.DTOs import MuckrockCountySearchCollectorInputDTO, \
MuckrockAllFOIARequestsCollectorInputDTO, MuckrockSimpleSearchCollectorInputDTO

collector_router = APIRouter(
Expand Down
18 changes: 9 additions & 9 deletions api/routes/metrics.py → src/api/routes/metrics.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from fastapi import APIRouter
from fastapi.params import Query, Depends

from api.dependencies import get_async_core
from core.AsyncCore import AsyncCore
from core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO
from core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO
from core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO
from core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO
from core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO
from core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO
from security_manager.SecurityManager import AccessInfo, get_access_info
from src.api.dependencies import get_async_core
from src.core.AsyncCore import AsyncCore
from src.core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO
from src.core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO
from src.core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO
from src.core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO
from src.core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO
from src.core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO
from src.security_manager.SecurityManager import AccessInfo, get_access_info

metrics_router = APIRouter(
prefix="/metrics",
Expand Down
11 changes: 5 additions & 6 deletions api/routes/review.py → src/api/routes/review.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@

from fastapi import APIRouter, Depends, Query

from api.dependencies import get_async_core
from core.AsyncCore import AsyncCore
from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewBaseInfo, FinalReviewRejectionInfo
from core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewResponse, \
GetNextURLForFinalReviewOuterResponse
from security_manager.SecurityManager import AccessInfo, get_access_info, require_permission, Permissions
from src.api.dependencies import get_async_core
from src.core.AsyncCore import AsyncCore
from src.core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewRejectionInfo
from src.core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewOuterResponse
from src.security_manager.SecurityManager import AccessInfo, require_permission, Permissions

review_router = APIRouter(
prefix="/review",
Expand Down
Loading