Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Remove unused batches columns

Revision ID: f708c6a8ae5d
Revises: 445d8858b23a
Create Date: 2025-10-04 16:40:11.064794

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa

Check warning on line 11 in alembic/versions/2025_10_04_1640-f708c6a8ae5d_remove_unused_batches_columns.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_10_04_1640-f708c6a8ae5d_remove_unused_batches_columns.py#L11 <401>

'sqlalchemy as sa' imported but unused
Raw output
./alembic/versions/2025_10_04_1640-f708c6a8ae5d_remove_unused_batches_columns.py:11:1: F401 'sqlalchemy as sa' imported but unused


# revision identifiers, used by Alembic.
revision: str = 'f708c6a8ae5d'
down_revision: Union[str, None] = '445d8858b23a'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None

TABLE_NAME = "batches"

def upgrade() -> None:

Check warning on line 22 in alembic/versions/2025_10_04_1640-f708c6a8ae5d_remove_unused_batches_columns.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_10_04_1640-f708c6a8ae5d_remove_unused_batches_columns.py#L22 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_10_04_1640-f708c6a8ae5d_remove_unused_batches_columns.py:22:1: D103 Missing docstring in public function
op.drop_column(TABLE_NAME, "strategy_success_rate")
op.drop_column(TABLE_NAME, "metadata_success_rate")
op.drop_column(TABLE_NAME, "agency_match_rate")
op.drop_column(TABLE_NAME, "record_type_match_rate")
op.drop_column(TABLE_NAME, "record_category_match_rate")


def downgrade() -> None:

Check warning on line 30 in alembic/versions/2025_10_04_1640-f708c6a8ae5d_remove_unused_batches_columns.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_10_04_1640-f708c6a8ae5d_remove_unused_batches_columns.py#L30 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_10_04_1640-f708c6a8ae5d_remove_unused_batches_columns.py:30:1: D103 Missing docstring in public function
pass
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ dev = [
"pytest-asyncio~=0.25.2",
"pytest-mock==3.12.0",
"pytest-timeout~=2.3.1",
"vulture>=2.14",
]


Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,3 @@ async def classify_url(self, content_infos: list[URLHTMLContentInfo]) -> str:
response_format=self.response_format
)
return self.post_process_response(response)

result_str = response.choices[0].message.content

result_dict = json.loads(result_str)
return result_dict["record_type"]
5 changes: 0 additions & 5 deletions src/db/client/async_.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,11 +722,6 @@ async def insert_batch(
status=batch_info.status.value,
parameters=batch_info.parameters,
compute_time=batch_info.compute_time,
strategy_success_rate=0,
metadata_success_rate=0,
agency_match_rate=0,
record_type_match_rate=0,
record_category_match_rate=0,
)
if batch_info.date_generated is not None:
batch.date_generated = batch_info.date_generated
Expand Down
5 changes: 0 additions & 5 deletions src/db/client/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,6 @@ def insert_batch(self, session: Session, batch_info: BatchInfo) -> int:
status=batch_info.status.value,
parameters=batch_info.parameters,
compute_time=batch_info.compute_time,
strategy_success_rate=0,
metadata_success_rate=0,
agency_match_rate=0,
record_type_match_rate=0,
record_category_match_rate=0,
)
if batch_info.date_generated is not None:
batch.date_generated = batch_info.date_generated
Expand Down
11 changes: 1 addition & 10 deletions src/db/models/impl/batch/sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,7 @@ class Batch(WithIDBase):
nullable=False
)
date_generated = Column(TIMESTAMP, nullable=False, server_default=CURRENT_TIME_SERVER_DEFAULT)
# How often URLs ended up approved in the database
strategy_success_rate = Column(Float)
# Percentage of metadata identified by models
metadata_success_rate = Column(Float)
# Rate of matching to agencies
agency_match_rate = Column(Float)
# Rate of matching to record types
record_type_match_rate = Column(Float)
# Rate of matching to record categories
record_category_match_rate = Column(Float)

# Time taken to generate the batch
# TODO: Add means to update after execution
compute_time = Column(Float)
Expand Down
10 changes: 0 additions & 10 deletions src/db/statement_composer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,3 @@ def user_suggestion_not_exists(
@staticmethod
def count_distinct(field, label):
return func.count(func.distinct(field)).label(label)

@staticmethod
def add_limit_and_page_offset(query: Select, page: int):
zero_offset_page = page - 1
rows_offset = zero_offset_page * STANDARD_ROW_LIMIT
return query.offset(
rows_offset
).limit(
STANDARD_ROW_LIMIT
)
11 changes: 11 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.