diff --git a/Dockerfile b/Dockerfile
index 85931528..e96272b0 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -14,6 +14,8 @@ RUN uv sync --locked --no-dev
 # Must call from the root directory because uv does not add playwright to path
 RUN playwright install-deps chromium
 RUN playwright install chromium
+# Download Spacy Model
+RUN python -m spacy download en_core_web_sm
 
 # Copy project files
 COPY src ./src
diff --git a/ENV.md b/ENV.md
index 4085fcd6..c0df0c2d 100644
--- a/ENV.md
+++ b/ENV.md
@@ -53,7 +53,10 @@ The following flags are available:
 | `RUN_URL_TASKS_TASK_FLAG`           | Runs URL tasks.                                        |
 | `IA_PROBE_TASK_FLAG`            | Extracts and links Internet Archives metadata to URLs. |
 | `IA_SAVE_TASK_FLAG`             | Saves URLs to Internet Archives.                       |
-
+| `AGENCY_ID_HOMEPAGE_MATCH_FLAG` | Enables the homepage match subtask for agency identification. |
+| `AGENCY_ID_NLP_LOCATION_MATCH_FLAG` | Enables the NLP location match subtask for agency identification. |
+| `AGENCY_ID_CKAN_FLAG` | Enables the CKAN subtask for agency identification. |
+| `AGENCY_ID_MUCKROCK_FLAG` | Enables the MuckRock subtask for agency identification. |
 
 
 ## Foreign Data Wrapper (FDW)
diff --git a/alembic/versions/2025_08_09_2031-8cd5aa7670ff_remove_functional_duplicates.py b/alembic/versions/2025_08_09_2031-8cd5aa7670ff_remove_functional_duplicates.py
index 846329ca..201d2448 100644
--- a/alembic/versions/2025_08_09_2031-8cd5aa7670ff_remove_functional_duplicates.py
+++ b/alembic/versions/2025_08_09_2031-8cd5aa7670ff_remove_functional_duplicates.py
@@ -52,7 +52,7 @@ def downgrade() -> None:
     _remove_cascade_foreign_key(URL_ERROR_INFO_TABLE_NAME, foreign_key_name=URL_ERROR_INFO_FOREIGN_KEY_NAME)
     _remove_cascade_foreign_key(COMPRESSED_HTML_TABLE_NAME, foreign_key_name=COMPRESSED_HTML_FOREIGN_KEY_NAME)
     _remove_cascade_foreign_key(URL_HTML_CONTENT_TABLE_NAME, foreign_key_name=URL_HTML_CONTENT_FOREIGN_KEY_NAME)
-    _remove_cascade_foreign_key(AUTOMATED_URL_AGENCY_SUGGESTION_TABLE_NAME, foreign_key_name=AUTOMATED_URL_AGENCY_SUGGESTION_FOREIGN_KEY_NAME)
+    # _remove_cascade_foreign_key(AUTOMATED_URL_AGENCY_SUGGESTION_TABLE_NAME, foreign_key_name=AUTOMATED_URL_AGENCY_SUGGESTION_FOREIGN_KEY_NAME)
 
 def _delete_duplicate_urls() -> None:
     op.execute('delete from urls where id in (2341,2343,2344,2347,2348,2349,2354,2359,2361,2501,2504,2505,2506,2507)')
diff --git a/alembic/versions/2025_08_19_0803-b741b65a1431_augment_auto_agency_suggestions.py b/alembic/versions/2025_08_19_0803-b741b65a1431_augment_auto_agency_suggestions.py
new file mode 100644
index 00000000..de3069e2
--- /dev/null
+++ b/alembic/versions/2025_08_19_0803-b741b65a1431_augment_auto_agency_suggestions.py
@@ -0,0 +1,254 @@
+"""Augment auto_agency_suggestions
+
+Revision ID: b741b65a1431
+Revises: 8a70ee509a74
+Create Date: 2025-08-19 08:03:12.106575
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+from src.util.alembic_helpers import created_at_column, updated_at_column, id_column, url_id_column, switch_enum_type
+
+# revision identifiers, used by Alembic.
+revision: str = 'b741b65a1431'
+down_revision: Union[str, None] = '8a70ee509a74'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+OLD_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME = "automated_url_agency_suggestions"
+NEW_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME = "url_auto_agency_suggestions"
+
+OLD_LINK_URLS_AGENCY_TABLE_NAME = "link_urls_agencies"
+NEW_LINK_URLS_AGENCY_TABLE_NAME = "link_urls_agency"
+
+AGENCY_AUTO_SUGGESTION_METHOD_ENUM = sa.Enum(
+    "homepage_match",
+    "nlp_location_match",
+    "muckrock_match",
+    "ckan_match",
+    name="agency_auto_suggestion_method",
+)
+
+FLAG_URL_VALIDATED_TABLE_NAME = "flag_url_validated"
+
+VALIDATED_URL_TYPE_ENUM = sa.Enum(
+    "data source",
+    "meta url",
+    "not relevant",
+    "individual record",
+    name="validated_url_type"
+)
+
+
+
+
+def upgrade() -> None:
+    op.rename_table(OLD_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME, NEW_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME)
+    op.rename_table(OLD_LINK_URLS_AGENCY_TABLE_NAME, NEW_LINK_URLS_AGENCY_TABLE_NAME)
+    _alter_auto_agency_suggestions_table()
+    _create_flag_url_validated_table()
+    _add_urls_to_flag_url_validated_table()
+    _remove_validated_and_submitted_url_statuses()
+    _reset_agencies_sync_state()
+
+
+def downgrade() -> None:
+    op.rename_table(NEW_LINK_URLS_AGENCY_TABLE_NAME, OLD_LINK_URLS_AGENCY_TABLE_NAME)
+    _revert_auto_agency_suggestions_table()
+    op.rename_table(NEW_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME, OLD_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME)
+    _revert_url_statuses()
+    _update_validated_and_submitted_url_statuses()
+    op.drop_table(FLAG_URL_VALIDATED_TABLE_NAME)
+    _drop_validated_url_type_enum()
+
+def _reset_agencies_sync_state():
+    op.execute(
+        """
+        UPDATE agencies_sync_state
+        set
+            last_full_sync_at = null,
+            current_cutoff_date = null,
+            current_page = null
+        """
+    )
+
+def _remove_validated_and_submitted_url_statuses():
+    switch_enum_type(
+        table_name="urls",
+        column_name="status",
+        enum_name="url_status",
+        new_enum_values=[
+            'ok',
+            'duplicate',
+            'error',
+            '404 not found',
+        ],
+        check_constraints_to_drop=['url_name_not_null_when_validated'],
+        conversion_mappings={
+            'validated': 'ok',
+            'submitted': 'ok',
+            'pending': 'ok',
+            'not relevant': 'ok',
+            'individual record': 'ok'
+        }
+    )
+
+def _add_urls_to_flag_url_validated_table():
+    op.execute("""
+    INSERT INTO flag_url_validated (url_id, type)
+    SELECT 
+        urls.id, 
+           CASE urls.status::text
+               WHEN 'validated' THEN 'data source'
+               WHEN 'submitted' THEN 'data source'
+               ELSE urls.status::text
+           END::validated_url_type
+    FROM urls
+    WHERE urls.status in ('validated', 'submitted', 'individual record', 'not relevant')""")
+
+def _revert_url_statuses():
+    switch_enum_type(
+        table_name="urls",
+        column_name="status",
+        enum_name="url_status",
+        new_enum_values=[
+            'pending',
+            'validated',
+            'submitted',
+            'duplicate',
+            'not relevant',
+            'error',
+            '404 not found',
+            'individual record'
+        ],
+        conversion_mappings={
+            'ok': 'pending',
+        }
+    )
+    op.create_check_constraint(
+        "url_name_not_null_when_validated",
+        "urls",
+        "(name IS NOT NULL) OR (status <> 'validated'::url_status)"
+    )
+
+def _update_validated_and_submitted_url_statuses():
+    op.execute("""
+    UPDATE urls
+    SET status = 'not relevant'
+    FROM flag_url_validated
+    WHERE urls.id = flag_url_validated.id
+    AND flag_url_validated.type = 'not relevant'
+    """)
+
+    op.execute("""
+    UPDATE urls
+    SET status = 'individual record'
+    FROM flag_url_validated
+    WHERE urls.id = flag_url_validated.id
+    AND flag_url_validated.type = 'individual record'
+    """)
+
+    op.execute("""
+    UPDATE urls
+    SET status = 'validated'
+    FROM flag_url_validated
+    left join url_data_source on flag_url_validated.url_id = url_data_source.url_id
+    WHERE urls.id = flag_url_validated.id
+    AND flag_url_validated.type = 'data source'
+    AND url_data_source.url_id is NULL
+    """)
+
+    op.execute("""
+    UPDATE urls
+    SET status = 'validated'
+    FROM flag_url_validated
+    left join url_data_source on flag_url_validated.url_id = url_data_source.url_id
+    WHERE urls.id = flag_url_validated.id
+    AND flag_url_validated.type = 'data source'
+    AND url_data_source.url_id is not NULL
+    """)
+
+
+def _create_flag_url_validated_table():
+    op.create_table(
+        FLAG_URL_VALIDATED_TABLE_NAME,
+        id_column(),
+        url_id_column(),
+        sa.Column(
+            'type',
+            VALIDATED_URL_TYPE_ENUM,
+            nullable=False,
+        ),
+        created_at_column(),
+        updated_at_column(),
+        sa.UniqueConstraint('url_id', name='uq_flag_url_validated_url_id')
+    )
+
+def _drop_validated_url_type_enum():
+    VALIDATED_URL_TYPE_ENUM.drop(op.get_bind())
+
+def _alter_auto_agency_suggestions_table():
+    AGENCY_AUTO_SUGGESTION_METHOD_ENUM.create(op.get_bind())
+    # Created At
+    op.add_column(
+        NEW_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME,
+        created_at_column()
+    )
+    # Updated At
+    op.add_column(
+        NEW_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME,
+        updated_at_column()
+    )
+    # Method
+    op.add_column(
+        NEW_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME,
+        sa.Column(
+            'method',
+            AGENCY_AUTO_SUGGESTION_METHOD_ENUM,
+            nullable=True
+        )
+    )
+    # Confidence
+    op.add_column(
+        NEW_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME,
+        sa.Column(
+            'confidence',
+            sa.Float(),
+            server_default=sa.text('0.0'),
+            nullable=False
+        )
+    )
+    # Check constraint that confidence is between 0 and 1
+    op.create_check_constraint(
+        "auto_url_agency_suggestions_check_confidence_between_0_and_1",
+        NEW_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME,
+        "confidence BETWEEN 0 AND 1"
+    )
+
+
+def _revert_auto_agency_suggestions_table():
+    # Created At
+    op.drop_column(
+        NEW_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME,
+        'created_at'
+    )
+    # Updated At
+    op.drop_column(
+        NEW_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME,
+        'updated_at'
+    )
+    # Method
+    op.drop_column(
+        NEW_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME,
+        'method'
+    )
+    # Confidence
+    op.drop_column(
+        NEW_AUTO_URL_AGENCY_SUGGESTIONS_TABLE_NAME,
+        'confidence'
+    )
+    AGENCY_AUTO_SUGGESTION_METHOD_ENUM.drop(op.get_bind())
+
diff --git a/alembic/versions/2025_08_31_1930-70baaee0dd79_overhaul_agency_identification.py b/alembic/versions/2025_08_31_1930-70baaee0dd79_overhaul_agency_identification.py
new file mode 100644
index 00000000..39703fde
--- /dev/null
+++ b/alembic/versions/2025_08_31_1930-70baaee0dd79_overhaul_agency_identification.py
@@ -0,0 +1,267 @@
+"""Overhaul agency identification
+
+Revision ID: 70baaee0dd79
+Revises: b741b65a1431
+Create Date: 2025-08-31 19:30:20.690369
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+from src.util.alembic_helpers import id_column, url_id_column, created_at_column, agency_id_column, updated_at_column, \
+    task_id_column
+
+# revision identifiers, used by Alembic.
+revision: str = '70baaee0dd79'
+down_revision: Union[str, None] = 'b741b65a1431'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+URL_HAS_AGENCY_SUGGESTIONS_VIEW_NAME: str = "url_has_agency_auto_suggestions_view"
+URL_UNKNOWN_AGENCIES_VIEW_NAME: str = "url_unknown_agencies_view"
+
+URL_AUTO_AGENCY_SUBTASK_TABLE_NAME: str = "url_auto_agency_id_subtasks"
+LINK_AGENCY_ID_SUBTASK_AGENCIES_TABLE_NAME: str = "agency_id_subtask_suggestions"
+
+META_URL_VIEW_NAME: str = "meta_url_view"
+UNVALIDATED_URL_VIEW_NAME: str = "unvalidated_url_view"
+
+URL_AUTO_AGENCY_SUGGESTIONS_TABLE_NAME: str = "url_auto_agency_suggestions"
+
+AGENCY_AUTO_SUGGESTION_METHOD_ENUM = sa.dialects.postgresql.ENUM(
+    name="agency_auto_suggestion_method",
+    create_type=False
+)
+
+SUBTASK_DETAIL_CODE_ENUM = sa.Enum(
+    'no details',
+    'retrieval error',
+    'homepage-single agency',
+    'homepage-multi agency',
+    name="agency_id_subtask_detail_code",
+)
+
+
+
+
+
+def upgrade() -> None:
+    _create_url_auto_agency_subtask_table()
+    _create_url_unknown_agencies_view()
+    _create_meta_url_view()
+    _create_link_agency_id_subtask_agencies_table()
+    _drop_url_annotation_flags_view()
+    _create_new_url_annotation_flags_view()
+    _drop_url_auto_agency_suggestions_table()
+    _create_unvalidated_urls_view()
+
+
+def downgrade() -> None:
+    _drop_url_unknown_agencies_view()
+    _create_url_auto_agency_suggestions_table()
+    _drop_url_annotation_flags_view()
+    _create_old_url_annotation_flags_view()
+    _drop_link_agency_id_subtask_agencies_table()
+    _drop_url_auto_agency_subtask_table()
+    _drop_meta_url_view()
+    SUBTASK_DETAIL_CODE_ENUM.drop(op.get_bind())
+    _drop_unvalidated_urls_view()
+
+def _create_unvalidated_urls_view():
+    op.execute(f"""
+        CREATE OR REPLACE VIEW {UNVALIDATED_URL_VIEW_NAME} as 
+        select
+            u.id as url_id
+        from
+            urls u
+            left join flag_url_validated fuv
+                      on fuv.url_id = u.id
+        where
+            fuv.type is null
+    """)
+
+def _drop_unvalidated_urls_view():
+    op.execute(f"DROP VIEW IF EXISTS {UNVALIDATED_URL_VIEW_NAME}")
+
+
+def _drop_url_annotation_flags_view():
+    op.execute(f"DROP VIEW IF EXISTS url_annotation_flags")
+
+
+def _drop_meta_url_view():
+    op.execute(f"DROP VIEW IF EXISTS {META_URL_VIEW_NAME}")
+
+
+def _create_meta_url_view():
+    op.execute(f"""
+    CREATE OR REPLACE VIEW {META_URL_VIEW_NAME} AS 
+        SELECT
+            urls.id as url_id
+        FROM urls
+        INNER JOIN flag_url_validated fuv on fuv.url_id = urls.id
+        where fuv.type = 'meta url'
+    """)
+
+def _drop_url_auto_agency_suggestions_table():
+    op.drop_table(URL_AUTO_AGENCY_SUGGESTIONS_TABLE_NAME)
+
+
+def _create_new_url_annotation_flags_view():
+
+    op.execute(
+        f"""
+        CREATE OR REPLACE VIEW url_annotation_flags AS
+        (
+        SELECT u.id as url_id,
+                EXISTS (SELECT 1 FROM public.auto_record_type_suggestions    a WHERE a.url_id = u.id) AS has_auto_record_type_suggestion,
+                EXISTS (SELECT 1 FROM public.auto_relevant_suggestions       a WHERE a.url_id = u.id) AS has_auto_relevant_suggestion,
+                EXISTS (SELECT 1 FROM public.{URL_AUTO_AGENCY_SUBTASK_TABLE_NAME} a WHERE a.url_id = u.id) AS has_auto_agency_suggestion,
+                EXISTS (SELECT 1 FROM public.user_record_type_suggestions    a WHERE a.url_id = u.id) AS has_user_record_type_suggestion,
+                EXISTS (SELECT 1 FROM public.user_relevant_suggestions       a WHERE a.url_id = u.id) AS has_user_relevant_suggestion,
+                EXISTS (SELECT 1 FROM public.user_url_agency_suggestions     a WHERE a.url_id = u.id) AS has_user_agency_suggestion,
+                EXISTS (SELECT 1 FROM public.link_urls_agency                a WHERE a.url_id = u.id) AS has_confirmed_agency,
+                EXISTS (SELECT 1 FROM public.reviewing_user_url              a WHERE a.url_id = u.id) AS was_reviewed
+        FROM urls u
+            )
+        """
+    )
+
+
+def _create_url_unknown_agencies_view():
+    op.execute(
+        f"""
+    CREATE OR REPLACE VIEW {URL_UNKNOWN_AGENCIES_VIEW_NAME} AS
+    SELECT 
+        u.id
+    FROM urls u
+    LEFT JOIN {URL_AUTO_AGENCY_SUBTASK_TABLE_NAME} uas ON u.id = uas.url_id
+    GROUP BY u.id
+    HAVING bool_or(uas.agencies_found) = false
+    """
+        )
+
+
+def _create_url_auto_agency_subtask_table():
+    op.create_table(
+        URL_AUTO_AGENCY_SUBTASK_TABLE_NAME,
+        id_column(),
+        task_id_column(),
+        url_id_column(),
+        sa.Column(
+            "type",
+            AGENCY_AUTO_SUGGESTION_METHOD_ENUM,
+            nullable=False
+        ),
+        sa.Column(
+            "agencies_found",
+            sa.Boolean(),
+            nullable=False
+        ),
+        sa.Column(
+            "detail",
+            SUBTASK_DETAIL_CODE_ENUM,
+            server_default=sa.text("'no details'"),
+            nullable=False
+        ),
+        created_at_column()
+    )
+
+
+def _create_link_agency_id_subtask_agencies_table():
+    op.create_table(
+        LINK_AGENCY_ID_SUBTASK_AGENCIES_TABLE_NAME,
+        id_column(),
+        sa.Column(
+            "subtask_id",
+            sa.Integer(),
+            sa.ForeignKey(
+                f'{URL_AUTO_AGENCY_SUBTASK_TABLE_NAME}.id',
+                ondelete='CASCADE'
+            ),
+            nullable=False,
+            comment='A foreign key to the `url_auto_agency_subtask` table.'
+        ),
+        sa.Column(
+            "confidence",
+            sa.Integer,
+            sa.CheckConstraint(
+                "confidence BETWEEN 0 and 100"
+            ),
+            nullable=False,
+        ),
+        agency_id_column(),
+        created_at_column()
+    )
+
+
+def _drop_link_agency_id_subtask_agencies_table():
+    op.drop_table(LINK_AGENCY_ID_SUBTASK_AGENCIES_TABLE_NAME)
+
+
+def _drop_url_auto_agency_subtask_table():
+    op.drop_table(URL_AUTO_AGENCY_SUBTASK_TABLE_NAME)
+
+
+def _create_url_auto_agency_suggestions_table():
+    op.create_table(
+        URL_AUTO_AGENCY_SUGGESTIONS_TABLE_NAME,
+        id_column(),
+        agency_id_column(),
+        url_id_column(),
+        sa.Column(
+            "is_unknown",
+            sa.Boolean(),
+            nullable=False
+        ),
+        created_at_column(),
+        updated_at_column(),
+        sa.Column(
+            'method',
+            AGENCY_AUTO_SUGGESTION_METHOD_ENUM,
+            nullable=True
+        ),
+        sa.Column(
+            'confidence',
+            sa.Float(),
+            server_default=sa.text('0.0'),
+            nullable=False
+        ),
+        sa.UniqueConstraint("agency_id", "url_id")
+    )
+
+
+def _drop_url_unknown_agencies_view():
+    op.execute(f"DROP VIEW IF EXISTS {URL_UNKNOWN_AGENCIES_VIEW_NAME}")
+
+def _drop_url_annotation_flags_view():
+    op.execute("DROP VIEW url_annotation_flags;")
+
+
+def _create_old_url_annotation_flags_view():
+    op.execute(
+        f"""
+        CREATE OR REPLACE VIEW url_annotation_flags AS
+        (
+        SELECT u.id,
+               CASE WHEN arts.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_auto_record_type_suggestion,
+               CASE WHEN ars.url_id IS NOT NULL THEN TRUE ELSE FALSE END  AS has_auto_relevant_suggestion,
+               CASE WHEN auas.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_auto_agency_suggestion,
+               CASE WHEN urts.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_user_record_type_suggestion,
+               CASE WHEN urs.url_id IS NOT NULL THEN TRUE ELSE FALSE END  AS has_user_relevant_suggestion,
+               CASE WHEN uuas.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_user_agency_suggestion,
+               CASE WHEN cua.url_id IS NOT NULL THEN TRUE ELSE FALSE END  AS has_confirmed_agency,
+               CASE WHEN ruu.url_id IS NOT NULL THEN TRUE ELSE FALSE END  AS was_reviewed
+        FROM urls u
+                 LEFT JOIN public.auto_record_type_suggestions arts ON u.id = arts.url_id
+                 LEFT JOIN public.auto_relevant_suggestions ars ON u.id = ars.url_id
+                 LEFT JOIN public.{URL_AUTO_AGENCY_SUGGESTIONS_TABLE_NAME} auas ON u.id = auas.url_id
+                 LEFT JOIN public.user_record_type_suggestions urts ON u.id = urts.url_id
+                 LEFT JOIN public.user_relevant_suggestions urs ON u.id = urs.url_id
+                 LEFT JOIN public.user_url_agency_suggestions uuas ON u.id = uuas.url_id
+                 LEFT JOIN public.reviewing_user_url ruu ON u.id = ruu.url_id
+                 LEFT JOIN public.link_urls_agency cua on u.id = cua.url_id
+            )
+               """
+    )
diff --git a/alembic/versions/d7eb670edaf0_revise_agency_identification_logic.py b/alembic/versions/d7eb670edaf0_revise_agency_identification_logic.py
index cd68a4b5..6ba6f7c9 100644
--- a/alembic/versions/d7eb670edaf0_revise_agency_identification_logic.py
+++ b/alembic/versions/d7eb670edaf0_revise_agency_identification_logic.py
@@ -118,7 +118,7 @@ def upgrade():
 def downgrade():
     # Drop constraints first
     op.drop_constraint("uq_confirmed_url_agency", "confirmed_url_agency", type_="unique")
-    op.drop_constraint("uq_automated_url_agency_suggestions", "automated_url_agency_suggestions", type_="unique")
+    # op.drop_constraint("uq_automated_url_agency_suggestions", "automated_url_agency_suggestions", type_="unique")
     op.drop_constraint("uq_user_url_agency_suggestions", "user_url_agency_suggestions", type_="unique")
 
     # Drop tables
diff --git a/pyproject.toml b/pyproject.toml
index 3eb1446d..afe4a89a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ dependencies = [
     "marshmallow~=3.23.2",
     "openai~=1.60.1",
     "pdap-access-manager==0.3.6",
+    "pip>=25.2",
     "playwright~=1.49.1",
     "psycopg2-binary~=2.9.6",
     "psycopg[binary]~=3.1.20",
@@ -31,6 +32,8 @@ dependencies = [
     "pyjwt~=2.10.1",
     "python-dotenv~=1.0.1",
     "requests~=2.32.3",
+    "side-effects>=1.6.dev0",
+    "spacy>=3.8.7",
     "sqlalchemy~=2.0.36",
     "starlette~=0.45.3",
     "tqdm>=4.64.1",
diff --git a/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py b/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py
index 9b3ffdeb..5a56cf32 100644
--- a/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py
+++ b/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py
@@ -5,7 +5,7 @@
 
 from src.api.endpoints.annotate.dtos.shared.batch import AnnotationBatchInfo
 from src.collectors.enums import URLStatus
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.queries.base.builder import QueryBuilderBase
 from src.db.statement_composer import StatementComposer
@@ -42,7 +42,7 @@ async def run(
         )
 
         common_where_clause = [
-            URL.status == URLStatus.PENDING.value,
+            URL.status == URLStatus.OK.value,
             LinkBatchURL.batch_id == self.batch_id,
         ]
 
diff --git a/src/api/endpoints/annotate/_shared/queries/get_next_url_for_user_annotation.py b/src/api/endpoints/annotate/_shared/queries/get_next_url_for_user_annotation.py
index a6a5b69d..6eed4b07 100644
--- a/src/api/endpoints/annotate/_shared/queries/get_next_url_for_user_annotation.py
+++ b/src/api/endpoints/annotate/_shared/queries/get_next_url_for_user_annotation.py
@@ -5,7 +5,8 @@
 from src.collectors.enums import URLStatus
 from src.core.enums import SuggestedStatus
 from src.db.client.types import UserSuggestionModel
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion
 from src.db.queries.base.builder import QueryBuilderBase
@@ -32,6 +33,10 @@ async def run(self, session: AsyncSession):
             select(
                 URL,
             )
+            .outerjoin(
+                FlagURLValidated,
+                FlagURLValidated.url_id == URL.id
+            )
         )
 
         if self.batch_id is not None:
@@ -43,7 +48,7 @@ async def run(self, session: AsyncSession):
 
         query = (
             query
-            .where(URL.status == URLStatus.PENDING.value)
+            .where(FlagURLValidated.url_id.is_(None))
             # URL must not have user suggestion
             .where(
                 StatementComposer.user_suggestion_not_exists(self.user_suggestion_model_to_exclude)
diff --git a/src/api/endpoints/annotate/agency/get/queries/agency_suggestion.py b/src/api/endpoints/annotate/agency/get/queries/agency_suggestion.py
deleted file mode 100644
index 1f202263..00000000
--- a/src/api/endpoints/annotate/agency/get/queries/agency_suggestion.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo
-from src.core.enums import SuggestionType
-from src.db.models.impl.agency.sqlalchemy import Agency
-from src.db.models.impl.url.suggestion.agency.auto import AutomatedUrlAgencySuggestion
-from src.db.queries.base.builder import QueryBuilderBase
-
-
-class GetAgencySuggestionsQueryBuilder(QueryBuilderBase):
-
-    def __init__(
-        self,
-        url_id: int
-    ):
-        super().__init__()
-        self.url_id = url_id
-
-    async def run(self, session: AsyncSession) -> list[GetNextURLForAgencyAgencyInfo]:
-        # Get relevant autosuggestions and agency info, if an associated agency exists
-
-        statement = (
-            select(
-                AutomatedUrlAgencySuggestion.agency_id,
-                AutomatedUrlAgencySuggestion.is_unknown,
-                Agency.name,
-                Agency.state,
-                Agency.county,
-                Agency.locality
-            )
-            .join(Agency, isouter=True)
-            .where(AutomatedUrlAgencySuggestion.url_id == self.url_id)
-        )
-        raw_autosuggestions = await session.execute(statement)
-        autosuggestions = raw_autosuggestions.all()
-        agency_suggestions = []
-        for autosuggestion in autosuggestions:
-            agency_id = autosuggestion[0]
-            is_unknown = autosuggestion[1]
-            name = autosuggestion[2]
-            state = autosuggestion[3]
-            county = autosuggestion[4]
-            locality = autosuggestion[5]
-            agency_suggestions.append(
-                GetNextURLForAgencyAgencyInfo(
-                    suggestion_type=SuggestionType.AUTO_SUGGESTION if not is_unknown else SuggestionType.UNKNOWN,
-                    pdap_agency_id=agency_id,
-                    agency_name=name,
-                    state=state,
-                    county=county,
-                    locality=locality
-                )
-            )
-        return agency_suggestions
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/queries/__init__.py b/src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/__init__.py
similarity index 100%
rename from src/core/tasks/url/operators/agency_identification/queries/__init__.py
rename to src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/__init__.py
diff --git a/src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/core.py b/src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/core.py
new file mode 100644
index 00000000..a9a33e84
--- /dev/null
+++ b/src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/core.py
@@ -0,0 +1,73 @@
+from typing import Sequence
+
+from sqlalchemy import select, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo
+from src.api.endpoints.annotate.agency.get.queries.agency_suggestion_.suggestions_with_highest_confidence import \
+    SuggestionsWithHighestConfidenceCTE
+from src.core.enums import SuggestionType
+from src.db.models.impl.agency.sqlalchemy import Agency
+from src.db.queries.base.builder import QueryBuilderBase
+
+from src.db.helpers.session import session_helper as sh
+
+class GetAgencySuggestionsQueryBuilder(QueryBuilderBase):
+
+    def __init__(
+        self,
+        url_id: int
+    ):
+        super().__init__()
+        self.url_id = url_id
+
+    async def run(self, session: AsyncSession) -> list[GetNextURLForAgencyAgencyInfo]:
+        # Get relevant autosuggestions and agency info, if an associated agency exists
+
+        cte = SuggestionsWithHighestConfidenceCTE()
+
+        query = (
+            select(
+                cte.agency_id,
+                cte.confidence,
+                Agency.name,
+                Agency.state,
+                Agency.county,
+                Agency.locality
+            )
+            .outerjoin(
+                Agency,
+                Agency.agency_id == cte.agency_id
+            )
+            .where(
+                cte.url_id == self.url_id
+            )
+        )
+
+        raw_autosuggestions: Sequence[RowMapping] = await sh.mappings(session, query=query)
+        if len(raw_autosuggestions) == 0:
+            # Unknown agency
+            return [
+                GetNextURLForAgencyAgencyInfo(
+                    suggestion_type=SuggestionType.UNKNOWN,
+                )
+            ]
+
+        agency_suggestions: list[GetNextURLForAgencyAgencyInfo] = []
+        for autosuggestion in raw_autosuggestions:
+            agency_id: int = autosuggestion["agency_id"]
+            name: str = autosuggestion["name"]
+            state: str | None = autosuggestion["state"]
+            county: str | None = autosuggestion["county"]
+            locality: str | None = autosuggestion["locality"]
+            agency_suggestions.append(
+                GetNextURLForAgencyAgencyInfo(
+                    suggestion_type=SuggestionType.AUTO_SUGGESTION,
+                    pdap_agency_id=agency_id,
+                    agency_name=name,
+                    state=state,
+                    county=county,
+                    locality=locality
+                )
+            )
+        return agency_suggestions
\ No newline at end of file
diff --git a/src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/suggestions_with_highest_confidence.py b/src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/suggestions_with_highest_confidence.py
new file mode 100644
index 00000000..6d389b11
--- /dev/null
+++ b/src/api/endpoints/annotate/agency/get/queries/agency_suggestion_/suggestions_with_highest_confidence.py
@@ -0,0 +1,62 @@
+from sqlalchemy import CTE, select, func, Column
+
+from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask
+from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion
+
+SUGGESTIONS_WITH_HIGHEST_CONFIDENCE_CTE: CTE = (
+    select(
+        URLAutoAgencyIDSubtask.url_id,
+        AgencyIDSubtaskSuggestion.agency_id,
+        func.max(AgencyIDSubtaskSuggestion.confidence)
+    )
+    .select_from(URLAutoAgencyIDSubtask)
+    .join(
+        AgencyIDSubtaskSuggestion,
+        URLAutoAgencyIDSubtask.id == AgencyIDSubtaskSuggestion.subtask_id
+    )
+    .group_by(
+        URLAutoAgencyIDSubtask.url_id,
+        AgencyIDSubtaskSuggestion.agency_id
+    )
+    .cte("suggestions_with_highest_confidence")
+)
+
+class SuggestionsWithHighestConfidenceCTE:
+
+    def __init__(self):
+        self._cte = (
+            select(
+                URLAutoAgencyIDSubtask.url_id,
+                AgencyIDSubtaskSuggestion.agency_id,
+                func.max(AgencyIDSubtaskSuggestion.confidence).label("confidence")
+            )
+            .select_from(URLAutoAgencyIDSubtask)
+            .join(
+                AgencyIDSubtaskSuggestion,
+                URLAutoAgencyIDSubtask.id == AgencyIDSubtaskSuggestion.subtask_id
+            )
+            .where(
+                AgencyIDSubtaskSuggestion.agency_id.isnot(None)
+            )
+            .group_by(
+                URLAutoAgencyIDSubtask.url_id,
+                AgencyIDSubtaskSuggestion.agency_id
+            )
+            .cte("suggestions_with_highest_confidence")
+        )
+
+    @property
+    def cte(self) -> CTE:
+        return self._cte
+
+    @property
+    def url_id(self) -> Column[int]:
+        return self._cte.columns.url_id
+
+    @property
+    def agency_id(self) -> Column[int]:
+        return self._cte.columns.agency_id
+
+    @property
+    def confidence(self) -> Column[float]:
+        return self._cte.columns.confidence
\ No newline at end of file
diff --git a/src/api/endpoints/annotate/agency/get/queries/next_for_annotation.py b/src/api/endpoints/annotate/agency/get/queries/next_for_annotation.py
index 70ae112a..e8fdc6b2 100644
--- a/src/api/endpoints/annotate/agency/get/queries/next_for_annotation.py
+++ b/src/api/endpoints/annotate/agency/get/queries/next_for_annotation.py
@@ -4,17 +4,17 @@
 from src.api.endpoints.annotate._shared.queries.get_annotation_batch_info import GetAnnotationBatchInfoQueryBuilder
 from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAnnotationResponse, \
     GetNextURLForAgencyAnnotationInnerResponse
-from src.api.endpoints.annotate.agency.get.queries.agency_suggestion import GetAgencySuggestionsQueryBuilder
+from src.api.endpoints.annotate.agency.get.queries.agency_suggestion_.core import GetAgencySuggestionsQueryBuilder
 from src.collectors.enums import URLStatus
 from src.core.enums import SuggestedStatus
 from src.core.tasks.url.operators.html.scraper.parser.util import convert_to_response_html_info
 from src.db.dtos.url.mapping import URLMapping
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
 from src.db.models.impl.url.core.sqlalchemy import URL
-from src.db.models.impl.url.suggestion.agency.auto import AutomatedUrlAgencySuggestion
 from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion
 from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion
+from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView
 from src.db.queries.base.builder import QueryBuilderBase
 from src.db.queries.implementations.core.get.html_content_info import GetHTMLContentInfoQueryBuilder
 
@@ -48,30 +48,20 @@ async def run(
 
         # Must not have confirmed agencies
         query = query.where(
-            URL.status == URLStatus.PENDING.value
+            URL.status == URLStatus.OK.value
         )
 
-
-        # Must not have been annotated by a user
         query = (
-            query.join(UserUrlAgencySuggestion, isouter=True)
-            .where(
-                ~exists(
-                    select(UserUrlAgencySuggestion).
-                    where(UserUrlAgencySuggestion.url_id == URL.id).
-                    correlate(URL)
-                )
+            query.join(
+                URLAnnotationFlagsView,
+                URLAnnotationFlagsView.url_id == URL.id
             )
-            # Must have extant autosuggestions
-            .join(AutomatedUrlAgencySuggestion, isouter=True)
+            # Must not have been annotated by a user
             .where(
-                exists(
-                    select(AutomatedUrlAgencySuggestion).
-                    where(AutomatedUrlAgencySuggestion.url_id == URL.id).
-                    correlate(URL)
-                )
+                URLAnnotationFlagsView.has_user_agency_suggestion.is_(False),
+                # Must have extant autosuggestions
+                URLAnnotationFlagsView.has_auto_agency_suggestion.is_(True)
             )
-            # Must not have confirmed agencies
             .join(LinkURLAgency, isouter=True)
             .where(
                 ~exists(
diff --git a/src/api/endpoints/annotate/all/get/query.py b/src/api/endpoints/annotate/all/get/query.py
index a2afafd9..05855578 100644
--- a/src/api/endpoints/annotate/all/get/query.py
+++ b/src/api/endpoints/annotate/all/get/query.py
@@ -3,14 +3,14 @@
 from sqlalchemy.orm import selectinload
 
 from src.api.endpoints.annotate._shared.queries.get_annotation_batch_info import GetAnnotationBatchInfoQueryBuilder
-from src.api.endpoints.annotate.agency.get.queries.agency_suggestion import GetAgencySuggestionsQueryBuilder
+from src.api.endpoints.annotate.agency.get.queries.agency_suggestion_.core import GetAgencySuggestionsQueryBuilder
 from src.api.endpoints.annotate.all.get.dto import GetNextURLForAllAnnotationResponse, \
     GetNextURLForAllAnnotationInnerResponse
 from src.api.endpoints.annotate.relevance.get.dto import RelevanceAnnotationResponseInfo
 from src.collectors.enums import URLStatus
 from src.db.dto_converter import DTOConverter
 from src.db.dtos.url.mapping import URLMapping
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion
 from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion
@@ -39,7 +39,7 @@ async def run(
             query
             .where(
                 and_(
-                    URL.status == URLStatus.PENDING.value,
+                    URL.status == URLStatus.OK.value,
                     StatementComposer.user_suggestion_not_exists(UserUrlAgencySuggestion),
                     StatementComposer.user_suggestion_not_exists(UserRecordTypeSuggestion),
                     StatementComposer.user_suggestion_not_exists(UserRelevantSuggestion),
@@ -50,7 +50,7 @@ async def run(
 
         load_options = [
             URL.html_content,
-            URL.automated_agency_suggestions,
+            URL.auto_agency_subtasks,
             URL.auto_relevant_suggestion,
             URL.auto_record_type_suggestion
         ]
diff --git a/src/api/endpoints/batch/duplicates/query.py b/src/api/endpoints/batch/duplicates/query.py
index 2d8edff9..b09b6e5d 100644
--- a/src/api/endpoints/batch/duplicates/query.py
+++ b/src/api/endpoints/batch/duplicates/query.py
@@ -5,7 +5,7 @@
 from src.db.models.impl.duplicate.pydantic.info import DuplicateInfo
 from src.db.models.impl.batch.sqlalchemy import Batch
 from src.db.models.impl.duplicate.sqlalchemy import Duplicate
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.queries.base.builder import QueryBuilderBase
 
diff --git a/src/api/endpoints/batch/urls/query.py b/src/api/endpoints/batch/urls/query.py
index 6a88448f..391a265f 100644
--- a/src/api/endpoints/batch/urls/query.py
+++ b/src/api/endpoints/batch/urls/query.py
@@ -1,7 +1,7 @@
 from sqlalchemy import Select
 from sqlalchemy.ext.asyncio import AsyncSession
 
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.url.core.pydantic.info import URLInfo
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.queries.base.builder import QueryBuilderBase
diff --git a/src/api/endpoints/collector/manual/query.py b/src/api/endpoints/collector/manual/query.py
index 12b17ad3..73e3edb8 100644
--- a/src/api/endpoints/collector/manual/query.py
+++ b/src/api/endpoints/collector/manual/query.py
@@ -6,7 +6,7 @@
 from src.collectors.enums import CollectorType, URLStatus
 from src.core.enums import BatchStatus
 from src.db.models.impl.batch.sqlalchemy import Batch
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.url.core.enums import URLSource
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata
@@ -47,7 +47,7 @@ async def run(self, session: AsyncSession) -> ManualBatchResponseDTO:
                 name=entry.name,
                 description=entry.description,
                 collector_metadata=entry.collector_metadata,
-                status=URLStatus.PENDING.value,
+                status=URLStatus.OK.value,
                 record_type=entry.record_type.value if entry.record_type is not None else None,
                 source=URLSource.MANUAL
             )
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/__init__.py b/src/api/endpoints/metrics/backlog/__init__.py
similarity index 100%
rename from tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/__init__.py
rename to src/api/endpoints/metrics/backlog/__init__.py
diff --git a/src/api/endpoints/metrics/backlog/query.py b/src/api/endpoints/metrics/backlog/query.py
new file mode 100644
index 00000000..788ef424
--- /dev/null
+++ b/src/api/endpoints/metrics/backlog/query.py
@@ -0,0 +1,53 @@
+from sqlalchemy import func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.api.endpoints.metrics.dtos.get.backlog import GetMetricsBacklogResponseDTO, GetMetricsBacklogResponseInnerDTO
+from src.db.models.impl.backlog_snapshot import BacklogSnapshot
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class GetBacklogMetricsQueryBuilder(QueryBuilderBase):
+
+    async def run(self, session: AsyncSession) -> GetMetricsBacklogResponseDTO:
+        month = func.date_trunc('month', BacklogSnapshot.created_at)
+
+        # 1. Create a subquery that assigns row_number() partitioned by month
+        monthly_snapshot_subq = (
+            select(
+                BacklogSnapshot.id,
+                BacklogSnapshot.created_at,
+                BacklogSnapshot.count_pending_total,
+                month.label("month_start"),
+                func.row_number()
+                .over(
+                    partition_by=month,
+                    order_by=BacklogSnapshot.created_at.desc()
+                )
+                .label("row_number")
+            )
+            .subquery()
+        )
+
+        # 2. Filter for the top (most recent) row in each month
+        stmt = (
+            select(
+                monthly_snapshot_subq.c.month_start,
+                monthly_snapshot_subq.c.created_at,
+                monthly_snapshot_subq.c.count_pending_total
+            )
+            .where(monthly_snapshot_subq.c.row_number == 1)
+            .order_by(monthly_snapshot_subq.c.month_start)
+        )
+
+        raw_result = await session.execute(stmt)
+        results = raw_result.all()
+        final_results = []
+        for result in results:
+            final_results.append(
+                GetMetricsBacklogResponseInnerDTO(
+                    month=result.month_start.strftime("%B %Y"),
+                    count_pending_total=result.count_pending_total,
+                )
+            )
+
+        return GetMetricsBacklogResponseDTO(entries=final_results)
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/batches/aggregated/query.py b/src/api/endpoints/metrics/batches/aggregated/query.py
deleted file mode 100644
index e7de65fb..00000000
--- a/src/api/endpoints/metrics/batches/aggregated/query.py
+++ /dev/null
@@ -1,117 +0,0 @@
-from sqlalchemy import case, select
-from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.sql.functions import coalesce
-
-from src.api.endpoints.metrics.batches.aggregated.dto import GetMetricsBatchesAggregatedResponseDTO, \
-    GetMetricsBatchesAggregatedInnerResponseDTO
-from src.collectors.enums import URLStatus, CollectorType
-from src.core.enums import BatchStatus
-from src.db.models.impl.batch.sqlalchemy import Batch
-from src.db.models.impl.link.batch_url import LinkBatchURL
-from src.db.models.impl.url.core.sqlalchemy import URL
-from src.db.queries.base.builder import QueryBuilderBase
-from src.db.statement_composer import StatementComposer
-
-
-class GetBatchesAggregatedMetricsQueryBuilder(QueryBuilderBase):
-
-    async def run(
-        self,
-        session: AsyncSession
-    ) -> GetMetricsBatchesAggregatedResponseDTO:
-        sc = StatementComposer
-
-        # First, get all batches broken down by collector type and status
-        def batch_column(status: BatchStatus, label):
-            return sc.count_distinct(
-                case(
-                    (
-                        Batch.status == status.value,
-                        Batch.id
-                    )
-                ),
-                label=label
-            )
-
-        batch_count_subquery = select(
-            batch_column(BatchStatus.READY_TO_LABEL, label="done_count"),
-            batch_column(BatchStatus.ERROR, label="error_count"),
-            Batch.strategy,
-        ).group_by(Batch.strategy).subquery("batch_count")
-
-        def url_column(status: URLStatus, label):
-            return sc.count_distinct(
-                case(
-                    (
-                        URL.status == status.value,
-                        URL.id
-                    )
-                ),
-                label=label
-            )
-
-        # Next, count urls
-        url_count_subquery = select(
-            Batch.strategy,
-            url_column(URLStatus.PENDING, label="pending_count"),
-            url_column(URLStatus.ERROR, label="error_count"),
-            url_column(URLStatus.VALIDATED, label="validated_count"),
-            url_column(URLStatus.SUBMITTED, label="submitted_count"),
-            url_column(URLStatus.NOT_RELEVANT, label="rejected_count"),
-
-        ).join(
-            LinkBatchURL,
-            LinkBatchURL.url_id == URL.id
-        ).outerjoin(
-            Batch, Batch.id == LinkBatchURL.batch_id
-        ).group_by(
-            Batch.strategy
-        ).subquery("url_count")
-
-        # Combine
-        query = select(
-            Batch.strategy,
-            batch_count_subquery.c.done_count.label("batch_done_count"),
-            batch_count_subquery.c.error_count.label("batch_error_count"),
-            coalesce(url_count_subquery.c.pending_count, 0).label("pending_count"),
-            coalesce(url_count_subquery.c.error_count, 0).label("error_count"),
-            coalesce(url_count_subquery.c.submitted_count, 0).label("submitted_count"),
-            coalesce(url_count_subquery.c.rejected_count, 0).label("rejected_count"),
-            coalesce(url_count_subquery.c.validated_count, 0).label("validated_count")
-        ).join(
-            batch_count_subquery,
-            Batch.strategy == batch_count_subquery.c.strategy
-        ).outerjoin(
-            url_count_subquery,
-            Batch.strategy == url_count_subquery.c.strategy
-        )
-        raw_results = await session.execute(query)
-        results = raw_results.all()
-        d: dict[CollectorType, GetMetricsBatchesAggregatedInnerResponseDTO] = {}
-        for result in results:
-            d[CollectorType(result.strategy)] = GetMetricsBatchesAggregatedInnerResponseDTO(
-                count_successful_batches=result.batch_done_count,
-                count_failed_batches=result.batch_error_count,
-                count_urls=result.pending_count + result.submitted_count +
-                           result.rejected_count + result.error_count +
-                           result.validated_count,
-                count_urls_pending=result.pending_count,
-                count_urls_validated=result.validated_count,
-                count_urls_submitted=result.submitted_count,
-                count_urls_rejected=result.rejected_count,
-                count_urls_errors=result.error_count
-            )
-
-        total_batch_query = await session.execute(
-            select(
-                sc.count_distinct(Batch.id, label="count")
-            )
-        )
-        total_batch_count = total_batch_query.scalars().one_or_none()
-        if total_batch_count is None:
-            total_batch_count = 0
-
-        return GetMetricsBatchesAggregatedResponseDTO(
-            total_batches=total_batch_count,
-            by_strategy=d
-        )
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/queries/__init__.py b/src/api/endpoints/metrics/batches/aggregated/query/__init__.py
similarity index 100%
rename from tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/queries/__init__.py
rename to src/api/endpoints/metrics/batches/aggregated/query/__init__.py
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/__init__.py b/src/api/endpoints/metrics/batches/aggregated/query/all_urls/__init__.py
similarity index 100%
rename from tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/__init__.py
rename to src/api/endpoints/metrics/batches/aggregated/query/all_urls/__init__.py
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/all_urls/query.py b/src/api/endpoints/metrics/batches/aggregated/query/all_urls/query.py
new file mode 100644
index 00000000..7eed215a
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/aggregated/query/all_urls/query.py
@@ -0,0 +1,28 @@
+from typing import Sequence
+
+from sqlalchemy import func, select, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.api.endpoints.metrics.batches.aggregated.query.models.strategy_count import CountByBatchStrategyResponse
+from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class CountAllURLsByBatchStrategyQueryBuilder(QueryBuilderBase):
+
+    async def run(self, session: AsyncSession) -> list[CountByBatchStrategyResponse]:
+
+        query = (
+            select(
+                Batch.strategy,
+                func.count(LinkBatchURL.url_id).label("count")
+            )
+            .join(LinkBatchURL)
+            .group_by(Batch.strategy)
+        )
+
+        mappings: Sequence[RowMapping] = await sh.mappings(session, query=query)
+        results = [CountByBatchStrategyResponse(**mapping) for mapping in mappings]
+        return results
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/__init__.py b/src/api/endpoints/metrics/batches/aggregated/query/batch_status_/__init__.py
similarity index 100%
rename from tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/__init__.py
rename to src/api/endpoints/metrics/batches/aggregated/query/batch_status_/__init__.py
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/batch_status_/query.py b/src/api/endpoints/metrics/batches/aggregated/query/batch_status_/query.py
new file mode 100644
index 00000000..f8587b68
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/aggregated/query/batch_status_/query.py
@@ -0,0 +1,37 @@
+from typing import Sequence
+
+from sqlalchemy import CTE, select, func, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.api.endpoints.metrics.batches.aggregated.query.batch_status_.response import \
+    BatchStatusCountByBatchStrategyResponseDTO
+from src.collectors.enums import CollectorType
+from src.core.enums import BatchStatus
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.queries.base.builder import QueryBuilderBase
+
+from src.db.helpers.session import session_helper as sh
+
+class BatchStatusByBatchStrategyQueryBuilder(QueryBuilderBase):
+
+    async def run(self, session: AsyncSession) -> list[BatchStatusCountByBatchStrategyResponseDTO]:
+        query = (
+            select(
+                Batch.strategy,
+                Batch.status,
+                func.count(Batch.id).label("count")
+            )
+            .group_by(Batch.strategy, Batch.status)
+        )
+        mappings: Sequence[RowMapping] = await sh.mappings(session, query=query)
+
+        results: list[BatchStatusCountByBatchStrategyResponseDTO] = []
+        for mapping in mappings:
+            results.append(
+                BatchStatusCountByBatchStrategyResponseDTO(
+                    strategy=CollectorType(mapping["strategy"]),
+                    status=BatchStatus(mapping["status"]),
+                    count=mapping["count"]
+                )
+            )
+        return results
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/batch_status_/response.py b/src/api/endpoints/metrics/batches/aggregated/query/batch_status_/response.py
new file mode 100644
index 00000000..79c1b2dd
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/aggregated/query/batch_status_/response.py
@@ -0,0 +1,10 @@
+from pydantic import BaseModel
+
+from src.collectors.enums import CollectorType
+from src.core.enums import BatchStatus
+
+
+class BatchStatusCountByBatchStrategyResponseDTO(BaseModel):
+    strategy: CollectorType
+    status: BatchStatus
+    count: int
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/core.py b/src/api/endpoints/metrics/batches/aggregated/query/core.py
new file mode 100644
index 00000000..2642f002
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/aggregated/query/core.py
@@ -0,0 +1,79 @@
+from sqlalchemy import case, select
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.sql.functions import coalesce, func
+
+from src.api.endpoints.metrics.batches.aggregated.dto import GetMetricsBatchesAggregatedResponseDTO, \
+    GetMetricsBatchesAggregatedInnerResponseDTO
+from src.api.endpoints.metrics.batches.aggregated.query.all_urls.query import CountAllURLsByBatchStrategyQueryBuilder
+from src.api.endpoints.metrics.batches.aggregated.query.batch_status_.query import \
+    BatchStatusByBatchStrategyQueryBuilder
+from src.api.endpoints.metrics.batches.aggregated.query.requester_.requester import \
+    GetBatchesAggregatedMetricsQueryRequester
+from src.api.endpoints.metrics.batches.aggregated.query.submitted_.query import \
+    CountSubmittedByBatchStrategyQueryBuilder
+from src.api.endpoints.metrics.batches.aggregated.query.url_error.query import URLErrorByBatchStrategyQueryBuilder
+from src.api.endpoints.metrics.batches.aggregated.query.validated_.query import \
+    ValidatedURLCountByBatchStrategyQueryBuilder
+from src.collectors.enums import URLStatus, CollectorType
+from src.core.enums import BatchStatus
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource
+from src.db.queries.base.builder import QueryBuilderBase
+from src.db.statement_composer import StatementComposer
+
+
+class GetBatchesAggregatedMetricsQueryBuilder(QueryBuilderBase):
+
+    async def run(
+        self,
+        session: AsyncSession
+    ) -> GetMetricsBatchesAggregatedResponseDTO:
+
+        requester = GetBatchesAggregatedMetricsQueryRequester(session=session)
+
+        url_error_count_dict: dict[CollectorType, int] = await requester.url_error_by_collector_strategy()
+        url_pending_count_dict: dict[CollectorType, int] = await requester.pending_url_count_by_collector_strategy()
+        url_submitted_count_dict: dict[CollectorType, int] = await requester.submitted_url_count_by_collector_strategy()
+        url_validated_count_dict: dict[CollectorType, int] = await requester.validated_url_count_by_collector_strategy()
+        url_rejected_count_dict: dict[CollectorType, int] = await requester.rejected_url_count_by_collector_strategy()
+        url_total_count_dict: dict[CollectorType, int] = await requester.url_count_by_collector_strategy()
+        batch_status_count_dict:  dict[
+            CollectorType,
+            dict[BatchStatus, int]
+        ] = await requester.batch_status_by_collector_strategy()
+
+
+
+
+
+        d: dict[CollectorType, GetMetricsBatchesAggregatedInnerResponseDTO] = {}
+        for collector_type in CollectorType:
+            inner_response = GetMetricsBatchesAggregatedInnerResponseDTO(
+                count_successful_batches=batch_status_count_dict[collector_type][BatchStatus.READY_TO_LABEL],
+                count_failed_batches=batch_status_count_dict[collector_type][BatchStatus.ERROR],
+                count_urls=url_total_count_dict[collector_type],
+                count_urls_pending=url_pending_count_dict[collector_type],
+                count_urls_validated=url_validated_count_dict[collector_type],
+                count_urls_submitted=url_submitted_count_dict[collector_type],
+                count_urls_rejected=url_rejected_count_dict[collector_type],
+                count_urls_errors=url_error_count_dict[collector_type],
+            )
+            d[collector_type] = inner_response
+
+        total_batch_query = await session.execute(
+            select(
+                func.count(Batch.id, label="count")
+            )
+        )
+        total_batch_count = total_batch_query.scalars().one_or_none()
+        if total_batch_count is None:
+            total_batch_count = 0
+
+        return GetMetricsBatchesAggregatedResponseDTO(
+            total_batches=total_batch_count,
+            by_strategy=d
+        )
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/__init__.py b/src/api/endpoints/metrics/batches/aggregated/query/models/__init__.py
similarity index 100%
rename from tests/automated/integration/tasks/url/impl/agency_identification/happy_path/__init__.py
rename to src/api/endpoints/metrics/batches/aggregated/query/models/__init__.py
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/models/strategy_count.py b/src/api/endpoints/metrics/batches/aggregated/query/models/strategy_count.py
new file mode 100644
index 00000000..9ceb7781
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/aggregated/query/models/strategy_count.py
@@ -0,0 +1,8 @@
+from pydantic import BaseModel
+
+from src.collectors.enums import CollectorType
+
+
+class CountByBatchStrategyResponse(BaseModel):
+    strategy: CollectorType
+    count: int
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/pending/__init__.py b/src/api/endpoints/metrics/batches/aggregated/query/pending/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/pending/query.py b/src/api/endpoints/metrics/batches/aggregated/query/pending/query.py
new file mode 100644
index 00000000..224d3bad
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/aggregated/query/pending/query.py
@@ -0,0 +1,37 @@
+from typing import Sequence
+
+from sqlalchemy import select, func, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.api.endpoints.metrics.batches.aggregated.query.models.strategy_count import CountByBatchStrategyResponse
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.queries.base.builder import QueryBuilderBase
+from src.db.helpers.session import session_helper as sh
+
+class PendingURLCountByBatchStrategyQueryBuilder(QueryBuilderBase):
+    async def run(
+        self, session: AsyncSession
+    ) -> list[CountByBatchStrategyResponse]:
+
+        query = (
+            select(
+                Batch.strategy,
+                func.count(LinkBatchURL.url_id).label("count")
+            )
+            .join(
+                LinkBatchURL,
+                LinkBatchURL.batch_id == Batch.id
+            )
+            .outerjoin(
+                FlagURLValidated,
+                FlagURLValidated.url_id == LinkBatchURL.url_id
+            )
+            .where(FlagURLValidated.url_id.is_(None))
+            .group_by(Batch.strategy)
+        )
+
+        mappings: Sequence[RowMapping] = await sh.mappings(session, query=query)
+        results = [CountByBatchStrategyResponse(**mapping) for mapping in mappings]
+        return results
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/rejected/__init__.py b/src/api/endpoints/metrics/batches/aggregated/query/rejected/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/rejected/query.py b/src/api/endpoints/metrics/batches/aggregated/query/rejected/query.py
new file mode 100644
index 00000000..6c1d9e0f
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/aggregated/query/rejected/query.py
@@ -0,0 +1,39 @@
+from typing import Sequence
+
+from sqlalchemy import select, func, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.api.endpoints.metrics.batches.aggregated.query.models.strategy_count import CountByBatchStrategyResponse
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.queries.base.builder import QueryBuilderBase
+from src.db.helpers.session import session_helper as sh
+
+class RejectedURLCountByBatchStrategyQueryBuilder(QueryBuilderBase):
+
+    async def run(
+        self, session: AsyncSession
+    ) -> list[CountByBatchStrategyResponse]:
+
+        query = (
+            select(
+                Batch.strategy,
+                func.count(FlagURLValidated.url_id).label("count")
+            )
+            .join(
+                LinkBatchURL,
+                LinkBatchURL.batch_id == Batch.id
+            )
+            .join(
+                FlagURLValidated,
+                FlagURLValidated.url_id == LinkBatchURL.url_id
+            )
+            .where(FlagURLValidated.type == URLValidatedType.NOT_RELEVANT)
+            .group_by(Batch.strategy)
+        )
+
+        mappings: Sequence[RowMapping] = await sh.mappings(session, query=query)
+        results = [CountByBatchStrategyResponse(**mapping) for mapping in mappings]
+        return results
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/requester_/__init__.py b/src/api/endpoints/metrics/batches/aggregated/query/requester_/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/requester_/convert.py b/src/api/endpoints/metrics/batches/aggregated/query/requester_/convert.py
new file mode 100644
index 00000000..4a129dfb
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/aggregated/query/requester_/convert.py
@@ -0,0 +1,11 @@
+from src.api.endpoints.metrics.batches.aggregated.query.models.strategy_count import CountByBatchStrategyResponse
+from src.collectors.enums import CollectorType
+
+
+def convert_strategy_counts_to_strategy_count_dict(
+    responses: list[CountByBatchStrategyResponse]
+) -> dict[CollectorType, int]:
+    result: dict[CollectorType, int] = {collector_type: 0 for collector_type in CollectorType}
+    for response in responses:
+        result[response.strategy] = response.count
+    return result
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/requester_/requester.py b/src/api/endpoints/metrics/batches/aggregated/query/requester_/requester.py
new file mode 100644
index 00000000..ac4c6dfa
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/aggregated/query/requester_/requester.py
@@ -0,0 +1,75 @@
+
+from src.api.endpoints.metrics.batches.aggregated.query.all_urls.query import CountAllURLsByBatchStrategyQueryBuilder
+from src.api.endpoints.metrics.batches.aggregated.query.batch_status_.query import \
+    BatchStatusByBatchStrategyQueryBuilder
+from src.api.endpoints.metrics.batches.aggregated.query.batch_status_.response import \
+    BatchStatusCountByBatchStrategyResponseDTO
+from src.api.endpoints.metrics.batches.aggregated.query.models.strategy_count import CountByBatchStrategyResponse
+from src.api.endpoints.metrics.batches.aggregated.query.pending.query import PendingURLCountByBatchStrategyQueryBuilder
+from src.api.endpoints.metrics.batches.aggregated.query.rejected.query import \
+    RejectedURLCountByBatchStrategyQueryBuilder
+from src.api.endpoints.metrics.batches.aggregated.query.requester_.convert import \
+    convert_strategy_counts_to_strategy_count_dict
+from src.api.endpoints.metrics.batches.aggregated.query.submitted_.query import \
+    CountSubmittedByBatchStrategyQueryBuilder
+from src.api.endpoints.metrics.batches.aggregated.query.url_error.query import URLErrorByBatchStrategyQueryBuilder
+from src.api.endpoints.metrics.batches.aggregated.query.validated_.query import \
+    ValidatedURLCountByBatchStrategyQueryBuilder
+from src.collectors.enums import CollectorType
+from src.core.enums import BatchStatus
+from src.db.queries.base.builder import QueryBuilderBase
+from src.db.templates.requester import RequesterBase
+
+
+class GetBatchesAggregatedMetricsQueryRequester(RequesterBase):
+
+    async def _run_strategy_count_query_builder(
+        self, query_builder: type[QueryBuilderBase]) -> dict[CollectorType, int]:
+        responses: list[CountByBatchStrategyResponse] = \
+            await query_builder().run(self.session)
+
+        return convert_strategy_counts_to_strategy_count_dict(responses)
+
+    async def url_error_by_collector_strategy(self) -> dict[CollectorType, int]:
+        return await self._run_strategy_count_query_builder(URLErrorByBatchStrategyQueryBuilder)
+
+    async def url_count_by_collector_strategy(self) -> dict[CollectorType, int]:
+        return await self._run_strategy_count_query_builder(CountAllURLsByBatchStrategyQueryBuilder)
+
+    async def submitted_url_count_by_collector_strategy(self) -> dict[CollectorType, int]:
+        return await self._run_strategy_count_query_builder(CountSubmittedByBatchStrategyQueryBuilder)
+
+    async def validated_url_count_by_collector_strategy(self) -> dict[CollectorType, int]:
+        return await self._run_strategy_count_query_builder(ValidatedURLCountByBatchStrategyQueryBuilder)
+
+    async def rejected_url_count_by_collector_strategy(self) -> dict[CollectorType, int]:
+        return await self._run_strategy_count_query_builder(RejectedURLCountByBatchStrategyQueryBuilder)
+
+    async def pending_url_count_by_collector_strategy(self) -> dict[CollectorType, int]:
+        return await self._run_strategy_count_query_builder(PendingURLCountByBatchStrategyQueryBuilder)
+
+    async def batch_status_by_collector_strategy(self) -> dict[
+        CollectorType,
+        dict[BatchStatus, int]
+    ]:
+
+        responses: list[BatchStatusCountByBatchStrategyResponseDTO] = \
+            await BatchStatusByBatchStrategyQueryBuilder().run(self.session)
+
+        result: dict[CollectorType, dict[BatchStatus, int]] = {
+            collector_type: {
+                BatchStatus.ERROR: 0,
+                BatchStatus.READY_TO_LABEL: 0,
+            }
+            for collector_type in CollectorType
+        }
+        for response in responses:
+            if response.status not in (
+                BatchStatus.ERROR,
+                BatchStatus.READY_TO_LABEL
+            ):
+                continue
+            result[response.strategy][response.status] = response.count
+
+        return result
+
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/submitted_/__init__.py b/src/api/endpoints/metrics/batches/aggregated/query/submitted_/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/submitted_/query.py b/src/api/endpoints/metrics/batches/aggregated/query/submitted_/query.py
new file mode 100644
index 00000000..ee8f8065
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/aggregated/query/submitted_/query.py
@@ -0,0 +1,45 @@
+from typing import Sequence
+
+from sqlalchemy import select, func, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.api.endpoints.metrics.batches.aggregated.query.models.strategy_count import CountByBatchStrategyResponse
+from src.collectors.enums import CollectorType
+from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class CountSubmittedByBatchStrategyQueryBuilder(QueryBuilderBase):
+
+    async def run(self, session: AsyncSession) -> list[
+        CountByBatchStrategyResponse
+    ]:
+        query = (
+            select(
+                Batch.strategy,
+                func.count(URLDataSource.id).label("count")
+            )
+            .join(
+                LinkBatchURL,
+                LinkBatchURL.batch_id == Batch.id
+            )
+            .join(
+                URLDataSource,
+                URLDataSource.url_id == LinkBatchURL.url_id
+            )
+            .group_by(Batch.strategy)
+        )
+
+        mappings: Sequence[RowMapping] = await sh.mappings(session, query=query)
+        results: list[CountByBatchStrategyResponse] = []
+        for mapping in mappings:
+            results.append(
+                CountByBatchStrategyResponse(
+                    strategy=CollectorType(mapping["strategy"]),
+                    count=mapping["count"]
+                )
+            )
+        return results
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/url_error/__init__.py b/src/api/endpoints/metrics/batches/aggregated/query/url_error/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py b/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py
new file mode 100644
index 00000000..9bcc3a57
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py
@@ -0,0 +1,34 @@
+from typing import Sequence
+
+from sqlalchemy import select, func, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.api.endpoints.metrics.batches.aggregated.query.models.strategy_count import CountByBatchStrategyResponse
+from src.collectors.enums import URLStatus
+from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class URLErrorByBatchStrategyQueryBuilder(QueryBuilderBase):
+
+    async def run(self, session: AsyncSession) -> list[CountByBatchStrategyResponse]:
+        query = (
+            select(
+                Batch.strategy,
+                func.count(URL.id).label("count")
+            )
+            .select_from(Batch)
+            .join(LinkBatchURL)
+            .join(URL)
+            .where(URL.status == URLStatus.ERROR)
+            .group_by(Batch.strategy, URL.status)
+        )
+
+        mappings: Sequence[RowMapping] = await sh.mappings(session, query=query)
+        results = [CountByBatchStrategyResponse(**mapping) for mapping in mappings]
+        return results
+
+
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/validated_/__init__.py b/src/api/endpoints/metrics/batches/aggregated/query/validated_/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/batches/aggregated/query/validated_/query.py b/src/api/endpoints/metrics/batches/aggregated/query/validated_/query.py
new file mode 100644
index 00000000..155cbcb0
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/aggregated/query/validated_/query.py
@@ -0,0 +1,38 @@
+from typing import Sequence
+
+from sqlalchemy import select, func, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.api.endpoints.metrics.batches.aggregated.query.models.strategy_count import CountByBatchStrategyResponse
+from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class ValidatedURLCountByBatchStrategyQueryBuilder(QueryBuilderBase):
+
+    async def run(
+        self, session: AsyncSession
+    ) -> list[CountByBatchStrategyResponse]:
+
+        query = (
+            select(
+                Batch.strategy,
+                func.count(FlagURLValidated.url_id).label("count")
+            )
+            .join(
+                LinkBatchURL,
+                LinkBatchURL.batch_id == Batch.id
+            )
+            .join(
+                FlagURLValidated,
+                FlagURLValidated.url_id == LinkBatchURL.url_id
+            )
+            .group_by(Batch.strategy)
+        )
+
+        mappings: Sequence[RowMapping] = await sh.mappings(session, query=query)
+        results = [CountByBatchStrategyResponse(**mapping) for mapping in mappings]
+        return results
diff --git a/src/api/endpoints/metrics/batches/breakdown/error/__init__.py b/src/api/endpoints/metrics/batches/breakdown/error/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/batches/breakdown/error/cte_.py b/src/api/endpoints/metrics/batches/breakdown/error/cte_.py
new file mode 100644
index 00000000..ed2ff44f
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/breakdown/error/cte_.py
@@ -0,0 +1,25 @@
+from sqlalchemy import select, func, CTE, Column
+
+from src.collectors.enums import URLStatus
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.api.endpoints.metrics.batches.breakdown.templates.cte_ import BatchesBreakdownURLCTE
+from src.db.models.impl.url.core.sqlalchemy import URL
+
+URL_ERROR_CTE = BatchesBreakdownURLCTE(
+    select(
+        Batch.id,
+        func.count(LinkBatchURL.url_id).label("count_error")
+    )
+    .join(
+        LinkBatchURL,
+        LinkBatchURL.batch_id == Batch.id
+    )
+    .join(
+        URL,
+        URL.id == LinkBatchURL.url_id
+   )
+    .where(URL.status == URLStatus.ERROR)
+    .group_by(Batch.id)
+    .cte("error")
+)
diff --git a/src/api/endpoints/metrics/batches/breakdown/not_relevant/__init__.py b/src/api/endpoints/metrics/batches/breakdown/not_relevant/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/batches/breakdown/not_relevant/cte_.py b/src/api/endpoints/metrics/batches/breakdown/not_relevant/cte_.py
new file mode 100644
index 00000000..14403e86
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/breakdown/not_relevant/cte_.py
@@ -0,0 +1,27 @@
+from sqlalchemy import select, func
+
+from src.api.endpoints.metrics.batches.breakdown.templates.cte_ import BatchesBreakdownURLCTE
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+
+NOT_RELEVANT_CTE = BatchesBreakdownURLCTE(
+    select(
+        Batch.id,
+        func.count(FlagURLValidated.url_id).label("count_rejected")
+    )
+    .join(
+        LinkBatchURL,
+        LinkBatchURL.batch_id == Batch.id
+    )
+    .join(
+        FlagURLValidated,
+        FlagURLValidated.url_id == LinkBatchURL.url_id
+    )
+    .where(
+        FlagURLValidated.type == URLValidatedType.NOT_RELEVANT
+    )
+    .group_by(Batch.id)
+    .cte("not_relevant")
+)
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/batches/breakdown/pending/__init__.py b/src/api/endpoints/metrics/batches/breakdown/pending/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/batches/breakdown/pending/cte_.py b/src/api/endpoints/metrics/batches/breakdown/pending/cte_.py
new file mode 100644
index 00000000..bf09f345
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/breakdown/pending/cte_.py
@@ -0,0 +1,26 @@
+from sqlalchemy import select, func
+
+from src.api.endpoints.metrics.batches.breakdown.templates.cte_ import BatchesBreakdownURLCTE
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+
+PENDING_CTE = BatchesBreakdownURLCTE(
+    select(
+        Batch.id,
+        func.count(LinkBatchURL.url_id).label("count_pending")
+    )
+    .join(
+        LinkBatchURL,
+        LinkBatchURL.batch_id == Batch.id
+    )
+    .outerjoin(
+        FlagURLValidated,
+        FlagURLValidated.url_id == LinkBatchURL.url_id
+    )
+    .where(
+        FlagURLValidated.url_id.is_(None)
+    )
+    .group_by(Batch.id)
+    .cte("pending")
+)
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/batches/breakdown/query.py b/src/api/endpoints/metrics/batches/breakdown/query.py
index 6fe0eb71..5847e309 100644
--- a/src/api/endpoints/metrics/batches/breakdown/query.py
+++ b/src/api/endpoints/metrics/batches/breakdown/query.py
@@ -1,13 +1,20 @@
-from sqlalchemy import select, case
+from sqlalchemy import select, case, Column
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.sql.functions import coalesce
 
 from src.api.endpoints.metrics.batches.breakdown.dto import GetMetricsBatchesBreakdownResponseDTO, \
     GetMetricsBatchesBreakdownInnerResponseDTO
+from src.api.endpoints.metrics.batches.breakdown.error.cte_ import URL_ERROR_CTE
+from src.api.endpoints.metrics.batches.breakdown.not_relevant.cte_ import NOT_RELEVANT_CTE
+from src.api.endpoints.metrics.batches.breakdown.pending.cte_ import PENDING_CTE
+from src.api.endpoints.metrics.batches.breakdown.submitted.cte_ import SUBMITTED_CTE
+from src.api.endpoints.metrics.batches.breakdown.templates.cte_ import BatchesBreakdownURLCTE
+from src.api.endpoints.metrics.batches.breakdown.total.cte_ import TOTAL_CTE
+from src.api.endpoints.metrics.batches.breakdown.validated.cte_ import VALIDATED_CTE
 from src.collectors.enums import URLStatus, CollectorType
 from src.core.enums import BatchStatus
 from src.db.models.impl.batch.sqlalchemy import Batch
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.queries.base.builder import QueryBuilderBase
 from src.db.statement_composer import StatementComposer
@@ -32,28 +39,32 @@ async def run(self, session: AsyncSession) -> GetMetricsBatchesBreakdownResponse
             Batch.date_generated.label("created_at"),
         )
 
-        def url_column(status: URLStatus, label):
-            return sc.count_distinct(
-                case(
-                    (
-                        URL.status == status.value,
-                        URL.id
-                    )
-                ),
-                label=label
-            )
+        all_ctes: list[BatchesBreakdownURLCTE] = [
+            URL_ERROR_CTE,
+            NOT_RELEVANT_CTE,
+            PENDING_CTE,
+            SUBMITTED_CTE,
+            TOTAL_CTE,
+            VALIDATED_CTE
+        ]
+
+        count_columns: list[Column] = [
+            cte.count for cte in all_ctes
+        ]
+
 
         count_query = select(
-            LinkBatchURL.batch_id,
-            sc.count_distinct(URL.id, label="count_total"),
-            url_column(URLStatus.PENDING, label="count_pending"),
-            url_column(URLStatus.SUBMITTED, label="count_submitted"),
-            url_column(URLStatus.NOT_RELEVANT, label="count_rejected"),
-            url_column(URLStatus.ERROR, label="count_error"),
-            url_column(URLStatus.VALIDATED, label="count_validated"),
-        ).join(URL, LinkBatchURL.url_id == URL.id).group_by(
-            LinkBatchURL.batch_id
-        ).subquery("url_count")
+            Batch.id.label("batch_id"),
+            *count_columns
+        )
+        for cte in all_ctes:
+            count_query = count_query.outerjoin(
+                cte.query,
+                Batch.id == cte.batch_id
+            )
+
+        count_query = count_query.cte("url_count")
+
 
         query = (select(
             main_query.c.strategy,
diff --git a/src/api/endpoints/metrics/batches/breakdown/submitted/__init__.py b/src/api/endpoints/metrics/batches/breakdown/submitted/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/batches/breakdown/submitted/cte_.py b/src/api/endpoints/metrics/batches/breakdown/submitted/cte_.py
new file mode 100644
index 00000000..face1891
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/breakdown/submitted/cte_.py
@@ -0,0 +1,23 @@
+from sqlalchemy import select, func
+
+from src.api.endpoints.metrics.batches.breakdown.templates.cte_ import BatchesBreakdownURLCTE
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource
+
+SUBMITTED_CTE = BatchesBreakdownURLCTE(
+    select(
+        Batch.id,
+        func.count(URLDataSource.id).label("count_submitted")
+    )
+    .join(
+        LinkBatchURL,
+        LinkBatchURL.batch_id == Batch.id
+    )
+    .join(
+        URLDataSource,
+        URLDataSource.url_id == LinkBatchURL.url_id
+    )
+    .group_by(Batch.id)
+    .cte("submitted")
+)
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/batches/breakdown/templates/__init__.py b/src/api/endpoints/metrics/batches/breakdown/templates/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/batches/breakdown/templates/cte_.py b/src/api/endpoints/metrics/batches/breakdown/templates/cte_.py
new file mode 100644
index 00000000..3fdd7521
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/breakdown/templates/cte_.py
@@ -0,0 +1,20 @@
+from psycopg import Column
+from sqlalchemy import CTE
+
+
+class BatchesBreakdownURLCTE:
+
+    def __init__(self, query: CTE):
+        self._query = query
+
+    @property
+    def query(self) -> CTE:
+        return self._query
+
+    @property
+    def batch_id(self) -> Column:
+        return self._query.columns[0]
+
+    @property
+    def count(self) -> Column:
+        return self._query.columns[1]
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/batches/breakdown/total/__init__.py b/src/api/endpoints/metrics/batches/breakdown/total/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/batches/breakdown/total/cte_.py b/src/api/endpoints/metrics/batches/breakdown/total/cte_.py
new file mode 100644
index 00000000..33cf0c84
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/breakdown/total/cte_.py
@@ -0,0 +1,15 @@
+from sqlalchemy import select, func
+
+from src.api.endpoints.metrics.batches.breakdown.templates.cte_ import BatchesBreakdownURLCTE
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+
+TOTAL_CTE = BatchesBreakdownURLCTE(
+    select(
+        Batch.id,
+        func.count(LinkBatchURL.url_id).label("count_total")
+    )
+    .join(LinkBatchURL)
+    .group_by(Batch.id)
+    .cte("total")
+)
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/batches/breakdown/validated/__init__.py b/src/api/endpoints/metrics/batches/breakdown/validated/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/batches/breakdown/validated/cte_.py b/src/api/endpoints/metrics/batches/breakdown/validated/cte_.py
new file mode 100644
index 00000000..b6ff5ef1
--- /dev/null
+++ b/src/api/endpoints/metrics/batches/breakdown/validated/cte_.py
@@ -0,0 +1,23 @@
+from sqlalchemy import select, func
+
+from src.api.endpoints.metrics.batches.breakdown.templates.cte_ import BatchesBreakdownURLCTE
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+
+VALIDATED_CTE = BatchesBreakdownURLCTE(
+    select(
+        Batch.id,
+        func.count(FlagURLValidated.url_id).label("count_validated")
+    )
+    .join(
+        LinkBatchURL,
+        LinkBatchURL.batch_id == Batch.id
+    )
+    .join(
+        FlagURLValidated,
+        FlagURLValidated.url_id == LinkBatchURL.url_id
+    )
+    .group_by(Batch.id)
+    .cte("validated")
+)
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/urls/__init__.py b/src/api/endpoints/metrics/urls/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/urls/aggregated/__init__.py b/src/api/endpoints/metrics/urls/aggregated/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/urls/aggregated/query/__init__.py b/src/api/endpoints/metrics/urls/aggregated/query/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/urls/aggregated/query/core.py b/src/api/endpoints/metrics/urls/aggregated/query/core.py
new file mode 100644
index 00000000..57bc4211
--- /dev/null
+++ b/src/api/endpoints/metrics/urls/aggregated/query/core.py
@@ -0,0 +1,48 @@
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.api.endpoints.metrics.dtos.get.urls.aggregated.core import GetMetricsURLsAggregatedResponseDTO
+from src.api.endpoints.metrics.urls.aggregated.query.subqueries.all import ALL_SUBQUERY
+from src.api.endpoints.metrics.urls.aggregated.query.subqueries.error import ERROR_SUBQUERY
+from src.api.endpoints.metrics.urls.aggregated.query.subqueries.pending import PENDING_SUBQUERY
+from src.api.endpoints.metrics.urls.aggregated.query.subqueries.rejected import REJECTED_SUBQUERY
+from src.api.endpoints.metrics.urls.aggregated.query.subqueries.submitted import SUBMITTED_SUBQUERY
+from src.api.endpoints.metrics.urls.aggregated.query.subqueries.validated import VALIDATED_SUBQUERY
+from src.collectors.enums import URLStatus
+from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class GetURLsAggregatedMetricsQueryBuilder(QueryBuilderBase):
+
+    async def run(self, session: AsyncSession) -> GetMetricsURLsAggregatedResponseDTO:
+
+        oldest_pending_url_query = select(
+            URL.id,
+            URL.created_at
+        ).where(
+            URL.status == URLStatus.OK.value
+        ).order_by(
+            URL.created_at.asc()
+        ).limit(1)
+
+        oldest_pending_url = await session.execute(oldest_pending_url_query)
+        oldest_pending_url = oldest_pending_url.one_or_none()
+        if oldest_pending_url is None:
+            oldest_pending_url_id = None
+            oldest_pending_created_at = None
+        else:
+            oldest_pending_url_id = oldest_pending_url.id
+            oldest_pending_created_at = oldest_pending_url.created_at
+
+        return GetMetricsURLsAggregatedResponseDTO(
+            count_urls_total=await sh.scalar(session, query=ALL_SUBQUERY),
+            count_urls_pending=await sh.scalar(session, query=PENDING_SUBQUERY),
+            count_urls_submitted=await sh.scalar(session, query=SUBMITTED_SUBQUERY),
+            count_urls_validated=await sh.scalar(session, query=VALIDATED_SUBQUERY),
+            count_urls_rejected=await sh.scalar(session, query=REJECTED_SUBQUERY),
+            count_urls_errors=await sh.scalar(session, query=ERROR_SUBQUERY),
+            oldest_pending_url_id=oldest_pending_url_id,
+            oldest_pending_url_created_at=oldest_pending_created_at,
+        )
diff --git a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/__init__.py b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/all.py b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/all.py
new file mode 100644
index 00000000..a2d09217
--- /dev/null
+++ b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/all.py
@@ -0,0 +1,9 @@
+from sqlalchemy import select, func
+
+from src.db.models.impl.url.core.sqlalchemy import URL
+
+ALL_SUBQUERY = (
+    select(
+        func.count(URL.id).label("count")
+    )
+)
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/error.py b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/error.py
new file mode 100644
index 00000000..407b0e4b
--- /dev/null
+++ b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/error.py
@@ -0,0 +1,11 @@
+from sqlalchemy import select, func
+
+from src.collectors.enums import URLStatus
+from src.db.models.impl.url.core.sqlalchemy import URL
+
+ERROR_SUBQUERY = (
+    select(
+        func.count(URL.id).label("count")
+    )
+    .where(URL.status == URLStatus.ERROR)
+)
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/pending.py b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/pending.py
new file mode 100644
index 00000000..31d8e2b6
--- /dev/null
+++ b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/pending.py
@@ -0,0 +1,19 @@
+from sqlalchemy import select, func
+
+from src.collectors.enums import URLStatus
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.url.core.sqlalchemy import URL
+
+PENDING_SUBQUERY = (
+    select(
+        func.count(URL.id).label("count")
+    )
+    .outerjoin(
+        FlagURLValidated,
+        URL.id == FlagURLValidated.url_id,
+    )
+    .where(
+        URL.status == URLStatus.OK,
+        FlagURLValidated.url_id.is_(None),
+    )
+)
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/rejected.py b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/rejected.py
new file mode 100644
index 00000000..983554ab
--- /dev/null
+++ b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/rejected.py
@@ -0,0 +1,18 @@
+from sqlalchemy import select, func
+
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.url.core.sqlalchemy import URL
+
+REJECTED_SUBQUERY = (
+    select(
+        func.count(URL.id).label("count")
+    )
+    .join(
+        FlagURLValidated,
+        URL.id == FlagURLValidated.url_id,
+    )
+    .where(
+        FlagURLValidated.type == URLValidatedType.NOT_RELEVANT,
+    )
+)
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/submitted.py b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/submitted.py
new file mode 100644
index 00000000..34be5e26
--- /dev/null
+++ b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/submitted.py
@@ -0,0 +1,14 @@
+from sqlalchemy import func, select
+
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource
+
+SUBMITTED_SUBQUERY = (
+    select(
+        func.count(URL.id).label("count")
+    )
+    .join(
+        URLDataSource,
+        URL.id == URLDataSource.url_id,
+    )
+)
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/validated.py b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/validated.py
new file mode 100644
index 00000000..fb771db6
--- /dev/null
+++ b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/validated.py
@@ -0,0 +1,14 @@
+from sqlalchemy import select, func
+
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.url.core.sqlalchemy import URL
+
+VALIDATED_SUBQUERY = (
+    select(
+        func.count(URL.id).label("count")
+    )
+    .join(
+        FlagURLValidated,
+        URL.id == FlagURLValidated.url_id,
+    )
+)
\ No newline at end of file
diff --git a/src/api/endpoints/metrics/urls/breakdown/__init__.py b/src/api/endpoints/metrics/urls/breakdown/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/urls/breakdown/query/__init__.py b/src/api/endpoints/metrics/urls/breakdown/query/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/metrics/urls/breakdown/query/core.py b/src/api/endpoints/metrics/urls/breakdown/query/core.py
new file mode 100644
index 00000000..3fc52c3f
--- /dev/null
+++ b/src/api/endpoints/metrics/urls/breakdown/query/core.py
@@ -0,0 +1,91 @@
+from typing import Any
+
+from sqlalchemy import select, case, literal, func
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.api.endpoints.metrics.dtos.get.urls.breakdown.pending import GetMetricsURLsBreakdownPendingResponseInnerDTO, \
+    GetMetricsURLsBreakdownPendingResponseDTO
+from src.collectors.enums import URLStatus
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion
+from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion
+from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class GetURLsBreakdownPendingMetricsQueryBuilder(QueryBuilderBase):
+
+    async def run(self, session: AsyncSession) -> GetMetricsURLsBreakdownPendingResponseDTO:
+
+        flags = (
+            select(
+                URL.id.label("url_id"),
+                case((UserRecordTypeSuggestion.url_id != None, literal(True)), else_=literal(False)).label(
+                    "has_user_record_type_annotation"
+                ),
+                case((UserRelevantSuggestion.url_id != None, literal(True)), else_=literal(False)).label(
+                    "has_user_relevant_annotation"
+                ),
+                case((UserUrlAgencySuggestion.url_id != None, literal(True)), else_=literal(False)).label(
+                    "has_user_agency_annotation"
+                ),
+            )
+            .outerjoin(UserRecordTypeSuggestion, URL.id == UserRecordTypeSuggestion.url_id)
+            .outerjoin(UserRelevantSuggestion, URL.id == UserRelevantSuggestion.url_id)
+            .outerjoin(UserUrlAgencySuggestion, URL.id == UserUrlAgencySuggestion.url_id)
+        ).cte("flags")
+
+        month = func.date_trunc('month', URL.created_at)
+
+        # Build the query
+        query = (
+            select(
+                month.label('month'),
+                func.count(URL.id).label('count_total'),
+                func.count(
+                    case(
+                        (flags.c.has_user_record_type_annotation == True, 1)
+                    )
+                ).label('user_record_type_count'),
+                func.count(
+                    case(
+                        (flags.c.has_user_relevant_annotation == True, 1)
+                    )
+                ).label('user_relevant_count'),
+                func.count(
+                    case(
+                        (flags.c.has_user_agency_annotation == True, 1)
+                    )
+                ).label('user_agency_count'),
+            )
+            .outerjoin(flags, flags.c.url_id == URL.id)
+            .outerjoin(
+                FlagURLValidated,
+                FlagURLValidated.url_id == URL.id
+            )
+            .where(
+                FlagURLValidated.url_id.is_(None),
+                URL.status == URLStatus.OK
+            )
+            .group_by(month)
+            .order_by(month.asc())
+        )
+
+        # Execute the query and return the results
+        results = await session.execute(query)
+        all_results = results.all()
+        final_results: list[GetMetricsURLsBreakdownPendingResponseInnerDTO] = []
+
+        for result in all_results:
+            dto = GetMetricsURLsBreakdownPendingResponseInnerDTO(
+                month=result.month.strftime("%B %Y"),
+                count_pending_total=result.count_total,
+                count_pending_relevant_user=result.user_relevant_count,
+                count_pending_record_type_user=result.user_record_type_count,
+                count_pending_agency_user=result.user_agency_count,
+            )
+            final_results.append(dto)
+        return GetMetricsURLsBreakdownPendingResponseDTO(
+            entries=final_results,
+        )
\ No newline at end of file
diff --git a/src/api/endpoints/review/approve/query_/core.py b/src/api/endpoints/review/approve/query_/core.py
index af810a2b..86c0212c 100644
--- a/src/api/endpoints/review/approve/query_/core.py
+++ b/src/api/endpoints/review/approve/query_/core.py
@@ -9,6 +9,8 @@
 from src.collectors.enums import URLStatus
 from src.db.constants import PLACEHOLDER_AGENCY_NAME
 from src.db.models.impl.agency.sqlalchemy import Agency
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata
@@ -30,76 +32,38 @@ def __init__(
     async def run(self, session: AsyncSession) -> None:
         # Get URL
 
+        url = await self._get_url(session)
 
-        query = (
-            Select(URL)
-            .where(URL.id == self.approval_info.url_id)
-            .options(
-                joinedload(URL.optional_data_source_metadata),
-                joinedload(URL.confirmed_agencies),
-            )
-        )
-
-        url = await session.execute(query)
-        url = url.scalars().first()
-
-        update_if_not_none(
-            url,
-            "record_type",
-            self.approval_info.record_type.value
-            if self.approval_info.record_type is not None else None,
-            required=True
-        )
+        await self._optionally_update_record_type(url)
 
         # Get existing agency ids
         existing_agencies = url.confirmed_agencies or []
         existing_agency_ids = [agency.agency_id for agency in existing_agencies]
         new_agency_ids = self.approval_info.agency_ids or []
-        if len(existing_agency_ids) == 0 and len(new_agency_ids) == 0:
-            raise HTTPException(
-                status_code=HTTP_400_BAD_REQUEST,
-                detail="Must specify agency_id if URL does not already have a confirmed agency"
-            )
+        await self._check_for_unspecified_agency_ids(existing_agency_ids, new_agency_ids)
 
-        # Get any existing agency ids that are not in the new agency ids
-        # If new agency ids are specified, overwrite existing
-        if len(new_agency_ids) != 0:
-            for existing_agency in existing_agencies:
-                if existing_agency.id not in new_agency_ids:
-                    # If the existing agency id is not in the new agency ids, delete it
-                    await session.delete(existing_agency)
+        await self._overwrite_existing_agencies(existing_agencies, new_agency_ids, session)
         # Add any new agency ids that are not in the existing agency ids
-        for new_agency_id in new_agency_ids:
-            if new_agency_id not in existing_agency_ids:
-                # Check if the new agency exists in the database
-                query = (
-                    select(Agency)
-                    .where(Agency.agency_id == new_agency_id)
-                )
-                existing_agency = await session.execute(query)
-                existing_agency = existing_agency.scalars().first()
-                if existing_agency is None:
-                    # If not, create it
-                    agency = Agency(
-                        agency_id=new_agency_id,
-                        name=PLACEHOLDER_AGENCY_NAME,
-                    )
-                    session.add(agency)
-
-                # If the new agency id is not in the existing agency ids, add it
-                confirmed_url_agency = LinkURLAgency(
-                    url_id=self.approval_info.url_id,
-                    agency_id=new_agency_id
-                )
-                session.add(confirmed_url_agency)
+        await self._add_new_agencies(existing_agency_ids, new_agency_ids, session)
 
-        # If it does, do nothing
+        await self._add_validated_flag(session, url=url)
 
-        url.status = URLStatus.VALIDATED.value
+        await self._optionally_update_required_metadata(url)
+        await self._optionally_update_optional_metdata(url)
+        await self._add_approving_user(session)
 
+    async def _optionally_update_required_metadata(self, url: URL) -> None:
         update_if_not_none(url, "name", self.approval_info.name, required=True)
         update_if_not_none(url, "description", self.approval_info.description, required=False)
 
+    async def _add_approving_user(self, session: AsyncSession) -> None:
+        approving_user_url = ReviewingUserURL(
+            user_id=self.user_id,
+            url_id=self.approval_info.url_id
+        )
+        session.add(approving_user_url)
+
+    async def _optionally_update_optional_metdata(self, url: URL) -> None:
         optional_metadata = url.optional_data_source_metadata
         if optional_metadata is None:
             url.optional_data_source_metadata = URLOptionalDataSourceMetadata(
@@ -124,10 +88,85 @@ async def run(self, session: AsyncSession) -> None:
                 self.approval_info.supplying_entity
             )
 
-        # Add approving user
-        approving_user_url = ReviewingUserURL(
-            user_id=self.user_id,
-            url_id=self.approval_info.url_id
+    async def _optionally_update_record_type(self, url: URL) -> None:
+        update_if_not_none(
+            url,
+            "record_type",
+            self.approval_info.record_type.value
+            if self.approval_info.record_type is not None else None,
+            required=True
         )
 
-        session.add(approving_user_url)
\ No newline at end of file
+    async def _get_url(self, session: AsyncSession) -> URL:
+        query = (
+            Select(URL)
+            .where(URL.id == self.approval_info.url_id)
+            .options(
+                joinedload(URL.optional_data_source_metadata),
+                joinedload(URL.confirmed_agencies),
+            )
+        )
+        url = await session.execute(query)
+        url = url.scalars().first()
+        return url
+
+    async def _check_for_unspecified_agency_ids(
+        self,
+        existing_agency_ids: list[int],
+        new_agency_ids: list[int]
+    ) -> None:
+        """
+        raises:
+            HTTPException: If no agency ids are specified and no existing agency ids are found
+        """
+        if len(existing_agency_ids) == 0 and len(new_agency_ids) == 0:
+            raise HTTPException(
+                status_code=HTTP_400_BAD_REQUEST,
+                detail="Must specify agency_id if URL does not already have a confirmed agency"
+            )
+
+    async def _overwrite_existing_agencies(self, existing_agencies, new_agency_ids, session):
+        # Get any existing agency ids that are not in the new agency ids
+        # If new agency ids are specified, overwrite existing
+        if len(new_agency_ids) != 0:
+            for existing_agency in existing_agencies:
+                if existing_agency.id not in new_agency_ids:
+                    # If the existing agency id is not in the new agency ids, delete it
+                    await session.delete(existing_agency)
+
+    async def _add_new_agencies(self, existing_agency_ids, new_agency_ids, session):
+        for new_agency_id in new_agency_ids:
+            if new_agency_id in existing_agency_ids:
+                continue
+            # Check if the new agency exists in the database
+            query = (
+                select(Agency)
+                .where(Agency.agency_id == new_agency_id)
+            )
+            existing_agency = await session.execute(query)
+            existing_agency = existing_agency.scalars().first()
+            if existing_agency is None:
+                # If not, create it
+                agency = Agency(
+                    agency_id=new_agency_id,
+                    name=PLACEHOLDER_AGENCY_NAME,
+                )
+                session.add(agency)
+
+            # If the new agency id is not in the existing agency ids, add it
+            confirmed_url_agency = LinkURLAgency(
+                url_id=self.approval_info.url_id,
+                agency_id=new_agency_id
+            )
+            session.add(confirmed_url_agency)
+
+    async def _add_validated_flag(
+        self,
+        session: AsyncSession,
+        url: URL
+    ) -> None:
+        flag = FlagURLValidated(
+            url_id=url.id,
+            type=URLValidatedType.DATA_SOURCE
+        )
+        session.add(flag)
diff --git a/src/api/endpoints/review/next/convert.py b/src/api/endpoints/review/next/convert.py
new file mode 100644
index 00000000..ca087895
--- /dev/null
+++ b/src/api/endpoints/review/next/convert.py
@@ -0,0 +1,108 @@
+from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo
+from src.api.endpoints.review.next.dto import FinalReviewAnnotationAgencyInfo, FinalReviewAnnotationAgencyAutoInfo
+from src.core.enums import SuggestionType
+from src.db.models.impl.agency.sqlalchemy import Agency
+from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
+from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask
+from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion
+from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion
+
+
+def convert_agency_info_to_final_review_annotation_agency_info(
+    subtasks: list[URLAutoAgencyIDSubtask],
+    confirmed_agencies: list[LinkURLAgency],
+    user_agency_suggestion: UserUrlAgencySuggestion
+) -> FinalReviewAnnotationAgencyInfo:
+
+    confirmed_agency_info: list[GetNextURLForAgencyAgencyInfo] = (
+        _convert_confirmed_agencies_to_final_review_annotation_agency_info(
+            confirmed_agencies
+        )
+    )
+
+    agency_auto_info: FinalReviewAnnotationAgencyAutoInfo = (
+        _convert_url_auto_agency_suggestions_to_final_review_annotation_agency_auto_info(
+            subtasks
+        )
+    )
+
+    agency_user_info: GetNextURLForAgencyAgencyInfo | None = (
+        _convert_user_url_agency_suggestion_to_final_review_annotation_agency_user_info(
+            user_agency_suggestion
+        )
+    )
+
+    return FinalReviewAnnotationAgencyInfo(
+        confirmed=confirmed_agency_info,
+        user=agency_user_info,
+        auto=agency_auto_info
+    )
+
+def _convert_confirmed_agencies_to_final_review_annotation_agency_info(
+    confirmed_agencies: list[LinkURLAgency]
+) -> list[GetNextURLForAgencyAgencyInfo]:
+    results: list[GetNextURLForAgencyAgencyInfo] = []
+    for confirmed_agency in confirmed_agencies:
+        agency = confirmed_agency.agency
+        agency_info = _convert_agency_to_get_next_url_for_agency_agency_info(
+            suggestion_type=SuggestionType.CONFIRMED,
+            agency=agency
+        )
+        results.append(agency_info)
+    return results
+
+def _convert_user_url_agency_suggestion_to_final_review_annotation_agency_user_info(
+    user_url_agency_suggestion: UserUrlAgencySuggestion
+) -> GetNextURLForAgencyAgencyInfo | None:
+    suggestion = user_url_agency_suggestion
+    if suggestion is None:
+        return None
+    if suggestion.is_new:
+        return GetNextURLForAgencyAgencyInfo(
+            suggestion_type=SuggestionType.NEW_AGENCY,
+        )
+    return _convert_agency_to_get_next_url_for_agency_agency_info(
+        suggestion_type=SuggestionType.USER_SUGGESTION,
+        agency=suggestion.agency
+    )
+
+def _convert_agency_to_get_next_url_for_agency_agency_info(
+    suggestion_type: SuggestionType,
+    agency: Agency | None
+) -> GetNextURLForAgencyAgencyInfo:
+    if agency is None:
+        if suggestion_type == SuggestionType.UNKNOWN:
+            return GetNextURLForAgencyAgencyInfo(
+                suggestion_type=suggestion_type,
+            )
+        raise ValueError("agency cannot be None for suggestion type other than unknown")
+
+    return GetNextURLForAgencyAgencyInfo(
+        suggestion_type=suggestion_type,
+        pdap_agency_id=agency.agency_id,
+        agency_name=agency.name,
+        state=agency.state,
+        county=agency.county,
+        locality=agency.locality
+    )
+
+def _convert_url_auto_agency_suggestions_to_final_review_annotation_agency_auto_info(
+    subtasks: list[URLAutoAgencyIDSubtask]
+) -> FinalReviewAnnotationAgencyAutoInfo:
+    results: list[GetNextURLForAgencyAgencyInfo] = []
+    count_agencies_not_found: int = 0
+    for subtask in subtasks:
+        if not subtask.agencies_found:
+            count_agencies_not_found += 1
+            continue
+        suggestions: list[AgencyIDSubtaskSuggestion] = subtask.suggestions
+        for suggestion in suggestions:
+            info: GetNextURLForAgencyAgencyInfo = _convert_agency_to_get_next_url_for_agency_agency_info(
+                suggestion_type=SuggestionType.AUTO_SUGGESTION,
+                agency=suggestion.agency
+            )
+            results.append(info)
+    return FinalReviewAnnotationAgencyAutoInfo(
+        unknown=count_agencies_not_found == len(subtasks),
+        suggestions=results
+    )
diff --git a/src/api/endpoints/review/next/query.py b/src/api/endpoints/review/next/core.py
similarity index 60%
rename from src/api/endpoints/review/next/query.py
rename to src/api/endpoints/review/next/core.py
index 7cb4670b..1736a970 100644
--- a/src/api/endpoints/review/next/query.py
+++ b/src/api/endpoints/review/next/core.py
@@ -1,26 +1,28 @@
-from typing import Optional, Type
-
-from sqlalchemy import FromClause, select, and_, Select, desc, asc, func
+from sqlalchemy import FromClause, select, Select, desc, asc, func, CTE
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import joinedload
 
+from src.api.endpoints.review.next.convert import convert_agency_info_to_final_review_annotation_agency_info
 from src.api.endpoints.review.next.dto import FinalReviewOptionalMetadata, FinalReviewBatchInfo, \
     GetNextURLForFinalReviewOuterResponse, GetNextURLForFinalReviewResponse, FinalReviewAnnotationInfo
+from src.api.endpoints.review.next.extract import extract_html_content_infos, extract_optional_metadata
+from src.api.endpoints.review.next.queries.count_reviewed import COUNT_REVIEWED_CTE
+from src.api.endpoints.review.next.queries.eligible_urls import build_eligible_urls_cte
+from src.api.endpoints.review.next.templates.count_cte import CountCTE
 from src.collectors.enums import URLStatus
 from src.core.tasks.url.operators.html.scraper.parser.util import convert_to_response_html_info
 from src.db.constants import USER_ANNOTATION_MODELS
 from src.db.dto_converter import DTOConverter
 from src.db.dtos.url.html_content import URLHTMLContentInfo
 from src.db.exceptions import FailedQueryException
-from src.db.models.impl.batch.sqlalchemy import Batch
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
 from src.db.models.impl.url.core.sqlalchemy import URL
-from src.db.models.impl.url.suggestion.agency.auto import AutomatedUrlAgencySuggestion
+from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask
+from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion
 from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion
-from src.db.models.mixins import URLDependentMixin
 from src.db.queries.base.builder import QueryBuilderBase
-from src.db.queries.implementations.core.common.annotation_exists import AnnotationExistsCTEQueryBuilder
+from src.db.queries.implementations.core.common.annotation_exists_.core import AnnotationExistsCTEQueryBuilder
 
 TOTAL_DISTINCT_ANNOTATION_COUNT_LABEL = "total_distinct_annotation_count"
 
@@ -42,7 +44,6 @@ def __init__(self, batch_id: int | None = None):
         ]
         # The below relationships are joined to entities that are joined to the URL
         self.double_join_relationships = [
-            (URL.automated_agency_suggestions, AutomatedUrlAgencySuggestion.agency),
             (URL.user_agency_suggestion, UserUrlAgencySuggestion.agency),
             (URL.confirmed_agencies, LinkURLAgency.agency)
         ]
@@ -60,58 +61,26 @@ def _get_where_exist_clauses(
             where_clauses.append(where_clause)
         return where_clauses
 
-    def _build_base_query(
-        self,
-        anno_exists_query: FromClause,
-    ) -> Select:
-        builder = self.anno_exists_builder
-        where_exist_clauses = self._get_where_exist_clauses(
-            builder.query
-        )
+    def _build_base_query(self) -> Select:
+        eligible_urls: CTE = build_eligible_urls_cte(batch_id=self.batch_id)
 
         query = (
             select(
                 URL,
-                self._sum_exists_query(anno_exists_query, USER_ANNOTATION_MODELS)
             )
-            .select_from(anno_exists_query)
+            .select_from(
+                eligible_urls
+            )
             .join(
                 URL,
-                URL.id == builder.url_id
-            )
-        )
-        if self.batch_id is not None:
-            query = (
-                query.join(
-                    LinkBatchURL
-                )
-                .where(
-                    LinkBatchURL.batch_id == self.batch_id
-                )
+                URL.id == eligible_urls.c.url_id
             )
-
-        query = (
-            query.where(
-                and_(
-                    URL.status == URLStatus.PENDING.value,
-                    *where_exist_clauses
-                )
+            .where(
+                URL.status == URLStatus.OK.value
             )
         )
         return query
 
-
-    def _sum_exists_query(self, query, models: list[Type[URLDependentMixin]]):
-        return sum(
-            [getattr(query.c, self.anno_exists_builder.get_exists_label(model)) for model in models]
-        ).label(TOTAL_DISTINCT_ANNOTATION_COUNT_LABEL)
-
-
-    async def _apply_batch_id_filter(self, url_query: Select, batch_id: int | None):
-        if batch_id is None:
-            return url_query
-        return url_query.where(URL.batch_id == batch_id)
-
     async def _apply_options(
         self,
         url_query: Select
@@ -124,49 +93,30 @@ async def _apply_options(
             *[
                 joinedload(primary).joinedload(secondary)
                 for primary, secondary in self.double_join_relationships
-            ]
-        )
-
-    async def _apply_order_clause(self, url_query: Select):
-        return url_query.order_by(
-            desc(TOTAL_DISTINCT_ANNOTATION_COUNT_LABEL),
-            asc(URL.id)
+            ],
+            joinedload(URL.auto_agency_subtasks)
+            .joinedload(URLAutoAgencyIDSubtask.suggestions)
+            .contains_eager(AgencyIDSubtaskSuggestion.agency)
         )
 
-    async def _extract_html_content_infos(self, url: URL) -> list[URLHTMLContentInfo]:
-        html_content = url.html_content
-        html_content_infos = [
-            URLHTMLContentInfo(**html_info.__dict__)
-            for html_info in html_content
-        ]
-        return html_content_infos
-
-    async def _extract_optional_metadata(self, url: URL) -> FinalReviewOptionalMetadata:
-        if url.optional_data_source_metadata is None:
-            return FinalReviewOptionalMetadata()
-        return FinalReviewOptionalMetadata(
-            record_formats=url.optional_data_source_metadata.record_formats,
-            data_portal_type=url.optional_data_source_metadata.data_portal_type,
-            supplying_entity=url.optional_data_source_metadata.supplying_entity
-        )
 
     async def get_batch_info(self, session: AsyncSession) -> FinalReviewBatchInfo | None:
         if self.batch_id is None:
             return None
 
-        count_reviewed_query = await self.get_count_reviewed_query()
+        count_reviewed_query: CountCTE = COUNT_REVIEWED_CTE
 
         count_ready_query = await self.get_count_ready_query()
 
         full_query = (
             select(
-                func.coalesce(count_reviewed_query.c[self.count_label], 0).label("count_reviewed"),
+                func.coalesce(count_reviewed_query.count, 0).label("count_reviewed"),
                 func.coalesce(count_ready_query.c[self.count_label], 0).label("count_ready_for_review")
             )
             .select_from(
                 count_ready_query.outerjoin(
-                    count_reviewed_query,
-                    count_reviewed_query.c.batch_id == count_ready_query.c.batch_id
+                    count_reviewed_query.cte,
+                    count_reviewed_query.batch_id == count_ready_query.c.batch_id
                 )
             )
         )
@@ -175,6 +125,7 @@ async def get_batch_info(self, session: AsyncSession) -> FinalReviewBatchInfo |
         return FinalReviewBatchInfo(**raw_result.mappings().one())
 
     async def get_count_ready_query(self):
+        # TODO: Migrate to separate query builder
         builder = self.anno_exists_builder
         count_ready_query = (
             select(
@@ -189,7 +140,7 @@ async def get_count_ready_query(self):
             )
             .where(
                 LinkBatchURL.batch_id == self.batch_id,
-                URL.status == URLStatus.PENDING.value,
+                URL.status == URLStatus.OK.value,
                 *self._get_where_exist_clauses(
                     builder.query
                 )
@@ -199,31 +150,6 @@ async def get_count_ready_query(self):
         )
         return count_ready_query
 
-    async def get_count_reviewed_query(self):
-        count_reviewed_query = (
-            select(
-                Batch.id.label("batch_id"),
-                func.count(URL.id).label(self.count_label)
-            )
-            .select_from(Batch)
-            .join(LinkBatchURL)
-            .outerjoin(URL, URL.id == LinkBatchURL.url_id)
-            .where(
-                URL.status.in_(
-                    [
-                        URLStatus.VALIDATED.value,
-                        URLStatus.NOT_RELEVANT.value,
-                        URLStatus.SUBMITTED.value,
-                        URLStatus.INDIVIDUAL_RECORD.value
-                    ]
-                ),
-                LinkBatchURL.batch_id == self.batch_id
-            )
-            .group_by(Batch.id)
-            .subquery("count_reviewed")
-        )
-        return count_reviewed_query
-
     async def run(
         self,
         session: AsyncSession
@@ -251,8 +177,8 @@ async def run(
 
         result: URL = row[0]
 
-        html_content_infos = await self._extract_html_content_infos(result)
-        optional_metadata = await self._extract_optional_metadata(result)
+        html_content_infos: list[URLHTMLContentInfo] = await extract_html_content_infos(result)
+        optional_metadata: FinalReviewOptionalMetadata = await extract_optional_metadata(result)
 
         batch_info = await self.get_batch_info(session)
         try:
@@ -272,8 +198,8 @@ async def run(
                         user_suggestion=result.user_record_type_suggestion,
                         auto_suggestion=result.auto_record_type_suggestion
                     ),
-                    agency=DTOConverter.final_review_annotation_agency_info(
-                        automated_agency_suggestions=result.automated_agency_suggestions,
+                    agency=convert_agency_info_to_final_review_annotation_agency_info(
+                        subtasks=result.auto_agency_subtasks,
                         user_agency_suggestion=result.user_agency_suggestion,
                         confirmed_agencies=result.confirmed_agencies
                     )
@@ -289,9 +215,7 @@ async def run(
             raise FailedQueryException(f"Failed to convert result for url id {result.id} to response") from e
 
     async def build_url_query(self):
-        anno_exists_query = self.anno_exists_builder.query
-        url_query = self._build_base_query(anno_exists_query)
+        url_query = self._build_base_query()
         url_query = await self._apply_options(url_query)
-        url_query = await self._apply_order_clause(url_query)
 
         return url_query
diff --git a/src/api/endpoints/review/next/extract.py b/src/api/endpoints/review/next/extract.py
new file mode 100644
index 00000000..aca642e0
--- /dev/null
+++ b/src/api/endpoints/review/next/extract.py
@@ -0,0 +1,23 @@
+from src.api.endpoints.review.next.dto import FinalReviewOptionalMetadata
+from src.db.dtos.url.html_content import URLHTMLContentInfo
+from src.db.models.impl.url.core.sqlalchemy import URL
+
+
+async def extract_html_content_infos(
+    url: URL
+)-> list[URLHTMLContentInfo]:
+    html_content = url.html_content
+    html_content_infos = [
+        URLHTMLContentInfo(**html_info.__dict__)
+        for html_info in html_content
+    ]
+    return html_content_infos
+
+async def extract_optional_metadata(url: URL) -> FinalReviewOptionalMetadata:
+    if url.optional_data_source_metadata is None:
+        return FinalReviewOptionalMetadata()
+    return FinalReviewOptionalMetadata(
+        record_formats=url.optional_data_source_metadata.record_formats,
+        data_portal_type=url.optional_data_source_metadata.data_portal_type,
+        supplying_entity=url.optional_data_source_metadata.supplying_entity
+    )
\ No newline at end of file
diff --git a/src/api/endpoints/review/next/queries/__init__.py b/src/api/endpoints/review/next/queries/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/review/next/queries/count_reviewed.py b/src/api/endpoints/review/next/queries/count_reviewed.py
new file mode 100644
index 00000000..91349cb5
--- /dev/null
+++ b/src/api/endpoints/review/next/queries/count_reviewed.py
@@ -0,0 +1,18 @@
+from sqlalchemy import select, func
+
+from src.api.endpoints.review.next.templates.count_cte import CountCTE
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+
+COUNT_REVIEWED_CTE: CountCTE = CountCTE(
+    select(
+        Batch.id.label("batch_id"),
+        func.count(FlagURLValidated.url_id).label("count")
+    )
+    .select_from(Batch)
+    .join(LinkBatchURL)
+    .outerjoin(FlagURLValidated, FlagURLValidated.url_id == LinkBatchURL.url_id)
+    .group_by(Batch.id)
+    .cte("count_reviewed")
+)
\ No newline at end of file
diff --git a/src/api/endpoints/review/next/queries/eligible_urls.py b/src/api/endpoints/review/next/queries/eligible_urls.py
new file mode 100644
index 00000000..bee5cea2
--- /dev/null
+++ b/src/api/endpoints/review/next/queries/eligible_urls.py
@@ -0,0 +1,35 @@
+from sqlalchemy import CTE, select, Select
+
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView
+
+uafw = URLAnnotationFlagsView
+
+def build_eligible_urls_cte(batch_id: int | None = None) -> CTE:
+    query: Select = (
+        select(
+            uafw.url_id,
+        )
+        .where(
+            # uafw.has_auto_agency_suggestion.is_(True),
+            # uafw.has_auto_record_type_suggestion.is_(True),
+            # uafw.has_auto_relevant_suggestion.is_(True),
+            uafw.has_user_relevant_suggestion.is_(True),
+            uafw.has_user_agency_suggestion.is_(True),
+            uafw.has_user_record_type_suggestion.is_(True),
+            uafw.was_reviewed.is_(False)
+        )
+    )
+
+    if batch_id is not None:
+        query = (
+            query.join(
+                LinkBatchURL,
+                LinkBatchURL.url_id == uafw.url_id
+            )
+            .where(
+                LinkBatchURL.batch_id == batch_id
+            )
+        )
+
+    return query.cte("eligible_urls")
diff --git a/src/api/endpoints/review/next/templates/__init__.py b/src/api/endpoints/review/next/templates/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/api/endpoints/review/next/templates/count_cte.py b/src/api/endpoints/review/next/templates/count_cte.py
new file mode 100644
index 00000000..0abbbab4
--- /dev/null
+++ b/src/api/endpoints/review/next/templates/count_cte.py
@@ -0,0 +1,15 @@
+from sqlalchemy import CTE, Column
+
+
+class CountCTE:
+
+    def __init__(self, cte: CTE):
+        self.cte = cte
+
+    @property
+    def batch_id(self) -> Column[int]:
+        return self.cte.c['batch_id']
+
+    @property
+    def count(self) -> Column[int]:
+        return self.cte.c['count']
\ No newline at end of file
diff --git a/src/api/endpoints/review/reject/query.py b/src/api/endpoints/review/reject/query.py
index 7d603fe1..c187a2a8 100644
--- a/src/api/endpoints/review/reject/query.py
+++ b/src/api/endpoints/review/reject/query.py
@@ -5,6 +5,8 @@
 
 from src.api.endpoints.review.enums import RejectionReason
 from src.collectors.enums import URLStatus
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.models.impl.url.reviewing_user import ReviewingUserURL
 from src.db.queries.base.builder import QueryBuilderBase
@@ -33,19 +35,27 @@ async def run(self, session) -> None:
         url = await session.execute(query)
         url = url.scalars().first()
 
+        validation_type: URLValidatedType | None = None
         match self.rejection_reason:
             case RejectionReason.INDIVIDUAL_RECORD:
-                url.status = URLStatus.INDIVIDUAL_RECORD.value
+                validation_type = URLValidatedType.INDIVIDUAL_RECORD
             case RejectionReason.BROKEN_PAGE_404:
                 url.status = URLStatus.NOT_FOUND.value
             case RejectionReason.NOT_RELEVANT:
-                url.status = URLStatus.NOT_RELEVANT.value
+                validation_type = URLValidatedType.NOT_RELEVANT
             case _:
                 raise HTTPException(
                     status_code=HTTP_400_BAD_REQUEST,
                     detail="Invalid rejection reason"
                 )
 
+        if validation_type is not None:
+            flag_url_validated = FlagURLValidated(
+                url_id=self.url_id,
+                type=validation_type
+            )
+            session.add(flag_url_validated)
+
         # Add rejecting user
         rejecting_user_url = ReviewingUserURL(
             user_id=self.user_id,
diff --git a/src/api/main.py b/src/api/main.py
index b6679827..f17c147f 100644
--- a/src/api/main.py
+++ b/src/api/main.py
@@ -27,6 +27,10 @@
 from src.core.tasks.scheduled.registry.core import ScheduledJobRegistry
 from src.core.tasks.url.loader import URLTaskOperatorLoader
 from src.core.tasks.url.manager import TaskManager
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.core import \
+    NLPProcessor
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.enums import \
+    SpacyModelType
 from src.core.tasks.url.operators.html.scraper.parser.core import HTMLResponseParser
 from src.db.client.async_ import AsyncDatabaseClient
 from src.db.client.sync import DatabaseClient
@@ -83,6 +87,9 @@ async def lifespan(app: FastAPI):
                 session=session,
                 token=env_var_manager.hf_inference_api_key
             ),
+            nlp_processor=NLPProcessor(
+                model_type=SpacyModelType.EN_CORE_WEB_SM
+            )
         ),
     )
     async_collector_manager = AsyncCollectorManager(
diff --git a/src/collectors/enums.py b/src/collectors/enums.py
index 1732bd19..c357d6bf 100644
--- a/src/collectors/enums.py
+++ b/src/collectors/enums.py
@@ -11,11 +11,7 @@ class CollectorType(Enum):
     MANUAL = "manual"
 
 class URLStatus(Enum):
-    PENDING = "pending"
-    SUBMITTED = "submitted"
-    VALIDATED = "validated"
+    OK = "ok"
     ERROR = "error"
     DUPLICATE = "duplicate"
-    NOT_RELEVANT = "not relevant"
     NOT_FOUND = "404 not found"
-    INDIVIDUAL_RECORD = "individual record"
diff --git a/src/collectors/impl/muckrock/api_interface/lookup_response.py b/src/collectors/impl/muckrock/api_interface/lookup_response.py
index 47ea855b..d1fd9635 100644
--- a/src/collectors/impl/muckrock/api_interface/lookup_response.py
+++ b/src/collectors/impl/muckrock/api_interface/lookup_response.py
@@ -6,6 +6,6 @@
 
 
 class AgencyLookupResponse(BaseModel):
-    name: Optional[str]
+    name: str | None
     type: AgencyLookupResponseType
-    error: Optional[str] = None
+    error: str | None = None
diff --git a/src/collectors/queries/insert/url.py b/src/collectors/queries/insert/url.py
index 96365107..af72a3aa 100644
--- a/src/collectors/queries/insert/url.py
+++ b/src/collectors/queries/insert/url.py
@@ -1,6 +1,6 @@
 from sqlalchemy.ext.asyncio import AsyncSession
 
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.url.core.pydantic.info import URLInfo
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.queries.base.builder import QueryBuilderBase
diff --git a/src/core/core.py b/src/core/core.py
index c597a591..0938586a 100644
--- a/src/core/core.py
+++ b/src/core/core.py
@@ -35,7 +35,7 @@
 from src.api.endpoints.task.dtos.get.tasks import GetTasksResponse
 from src.api.endpoints.url.get.dto import GetURLsResponseInfo
 from src.db.client.async_ import AsyncDatabaseClient
-from src.db.models.impl.batch.pydantic import BatchInfo
+from src.db.models.impl.batch.pydantic.info import BatchInfo
 from src.api.endpoints.task.dtos.get.task_status import GetTaskStatusResponseInfo
 from src.db.enums import TaskType
 from src.collectors.manager import AsyncCollectorManager
diff --git a/src/core/enums.py b/src/core/enums.py
index c6f90c80..edc18425 100644
--- a/src/core/enums.py
+++ b/src/core/enums.py
@@ -16,6 +16,7 @@ class RecordType(Enum):
     """
     All available URL record types
     """
+    # Police and Public
     ACCIDENT_REPORTS = "Accident Reports"
     ARREST_RECORDS = "Arrest Records"
     CALLS_FOR_SERVICE = "Calls for Service"
@@ -31,16 +32,22 @@ class RecordType(Enum):
     SURVEYS = "Surveys"
     USE_OF_FORCE_REPORTS = "Use of Force Reports"
     VEHICLE_PURSUITS = "Vehicle Pursuits"
+
+    # Info About Officers
     COMPLAINTS_AND_MISCONDUCT = "Complaints & Misconduct"
     DAILY_ACTIVITY_LOGS = "Daily Activity Logs"
     TRAINING_AND_HIRING_INFO = "Training & Hiring Info"
     PERSONNEL_RECORDS = "Personnel Records"
+
+    # Info About Agencies
     ANNUAL_AND_MONTHLY_REPORTS = "Annual & Monthly Reports"
     BUDGETS_AND_FINANCES = "Budgets & Finances"
     CONTACT_INFO_AND_AGENCY_META = "Contact Info & Agency Meta"
     GEOGRAPHIC = "Geographic"
     LIST_OF_DATA_SOURCES = "List of Data Sources"
     POLICIES_AND_CONTRACTS = "Policies & Contracts"
+
+    # Agency-Published Resources
     CRIME_MAPS_AND_REPORTS = "Crime Maps & Reports"
     CRIME_STATISTICS = "Crime Statistics"
     MEDIA_BULLETINS = "Media Bulletins"
@@ -48,9 +55,13 @@ class RecordType(Enum):
     RESOURCES = "Resources"
     SEX_OFFENDER_REGISTRY = "Sex Offender Registry"
     WANTED_PERSONS = "Wanted Persons"
+
+    # Jails and Courts Specific
     BOOKING_REPORTS = "Booking Reports"
     COURT_CASES = "Court Cases"
     INCARCERATION_RECORDS = "Incarceration Records"
+
+    # Other
     OTHER = "Other"
 
 
diff --git a/src/core/exceptions.py b/src/core/exceptions.py
index d4f9c4a8..a361a24d 100644
--- a/src/core/exceptions.py
+++ b/src/core/exceptions.py
@@ -14,3 +14,4 @@ class MatchAgencyError(Exception):
 class FailedValidationException(HTTPException):
     def __init__(self, detail: str):
         super().__init__(status_code=HTTPStatus.BAD_REQUEST, detail=detail)
+
diff --git a/src/core/helpers.py b/src/core/helpers.py
deleted file mode 100644
index eeb951fe..00000000
--- a/src/core/helpers.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from src.core.enums import SuggestionType
-from src.core.exceptions import MatchAgencyError
-from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
-from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse
-from src.external.pdap.enums import MatchAgencyResponseStatus
-
-
-def process_match_agency_response_to_suggestions(
-    url_id: int,
-    match_agency_response: MatchAgencyResponse
-) -> list[URLAgencySuggestionInfo]:
-    if match_agency_response.status == MatchAgencyResponseStatus.EXACT_MATCH:
-        match = match_agency_response.matches[0]
-        return [
-            URLAgencySuggestionInfo(
-                url_id=url_id,
-                suggestion_type=SuggestionType.CONFIRMED,
-                pdap_agency_id=int(match.id),
-                agency_name=match.submitted_name,
-                state=match.state,
-                county=match.county,
-            )
-        ]
-    if match_agency_response.status == MatchAgencyResponseStatus.NO_MATCH:
-        return [
-            URLAgencySuggestionInfo(
-                url_id=url_id,
-                suggestion_type=SuggestionType.UNKNOWN,
-            )
-        ]
-
-    if match_agency_response.status != MatchAgencyResponseStatus.PARTIAL_MATCH:
-        raise MatchAgencyError(
-            f"Unknown Match Agency Response Status: {match_agency_response.status}"
-        )
-
-    return [
-        URLAgencySuggestionInfo(
-            url_id=url_id,
-            suggestion_type=SuggestionType.AUTO_SUGGESTION,
-            pdap_agency_id=match.id,
-            agency_name=match.submitted_name,
-            state=match.state,
-            county=match.county,
-            locality=match.locality
-        )
-        for match in match_agency_response.matches
-    ]
diff --git a/src/core/tasks/handler.py b/src/core/tasks/handler.py
index 7f488594..6ddca6eb 100644
--- a/src/core/tasks/handler.py
+++ b/src/core/tasks/handler.py
@@ -50,8 +50,11 @@ async def handle_task_error(self, run_info: TaskOperatorRunInfo):  #
             task_id=run_info.task_id,
             error=run_info.message
         )
+        msg: str = f"Task {run_info.task_id} ({run_info.task_type.value}) failed with error: {run_info.message}"
+        print(msg)
         self.discord_poster.post_to_discord(
-            message=f"Task {run_info.task_id} ({run_info.task_type.value}) failed with error.")
+            message=msg
+        )
 
     async def link_urls_to_task(self, task_id: int, url_ids: list[int]):
         await self.adb_client.link_urls_to_task(
diff --git a/src/core/tasks/scheduled/impl/huggingface/operator.py b/src/core/tasks/scheduled/impl/huggingface/operator.py
index 7d5324f5..9bb7a85e 100644
--- a/src/core/tasks/scheduled/impl/huggingface/operator.py
+++ b/src/core/tasks/scheduled/impl/huggingface/operator.py
@@ -1,12 +1,19 @@
 from itertools import count
 
+from src.core.tasks.mixins.prereq import HasPrerequisitesMixin
+from src.core.tasks.scheduled.impl.huggingface.queries.check.core import CheckValidURLsUpdatedQueryBuilder
+from src.core.tasks.scheduled.impl.huggingface.queries.get.core import GetForLoadingToHuggingFaceQueryBuilder
+from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput
 from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase
 from src.db.client.async_ import AsyncDatabaseClient
 from src.db.enums import TaskType
 from src.external.huggingface.hub.client import HuggingFaceHubClient
 
 
-class PushToHuggingFaceTaskOperator(ScheduledTaskOperatorBase):
+class PushToHuggingFaceTaskOperator(
+    ScheduledTaskOperatorBase,
+    HasPrerequisitesMixin
+):
 
     @property
     def task_type(self) -> TaskType:
@@ -20,21 +27,23 @@ def __init__(
         super().__init__(adb_client)
         self.hf_client = hf_client
 
-    async def inner_task_logic(self):
-        # Check if any valid urls have been updated
-        valid_urls_updated = await self.adb_client.check_valid_urls_updated()
-        print(f"Valid urls updated: {valid_urls_updated}")
-        if not valid_urls_updated:
-            print("No valid urls updated, skipping.")
-            return
-
+    async def meets_task_prerequisites(self) -> bool:
+        return await self.adb_client.run_query_builder(
+            CheckValidURLsUpdatedQueryBuilder()
+        )
 
-        # Otherwise, push to huggingface
+    async def inner_task_logic(self):
+        """Push raw data sources to huggingface."""
         run_dt = await self.adb_client.get_current_database_time()
         for idx in count(start=1):
-            outputs = await self.adb_client.get_data_sources_raw_for_huggingface(page=idx)
+            outputs: list[GetForLoadingToHuggingFaceOutput] = await self._get_data_sources_raw_for_huggingface(page=idx)
             if len(outputs) == 0:
                 break
             self.hf_client.push_data_sources_raw_to_hub(outputs, idx=idx)
 
         await self.adb_client.set_hugging_face_upload_state(run_dt.replace(tzinfo=None))
+
+    async def _get_data_sources_raw_for_huggingface(self, page: int) -> list[GetForLoadingToHuggingFaceOutput]:
+        return await self.adb_client.run_query_builder(
+            GetForLoadingToHuggingFaceQueryBuilder(page)
+        )
diff --git a/src/core/tasks/scheduled/impl/huggingface/queries/check/requester.py b/src/core/tasks/scheduled/impl/huggingface/queries/check/requester.py
index 23e0b0b6..25124c95 100644
--- a/src/core/tasks/scheduled/impl/huggingface/queries/check/requester.py
+++ b/src/core/tasks/scheduled/impl/huggingface/queries/check/requester.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from operator import or_
 
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
@@ -6,6 +7,7 @@
 
 from src.collectors.enums import URLStatus
 from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from src.db.models.impl.state.huggingface import HuggingFaceUploadState
 from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML
 from src.db.models.impl.url.core.sqlalchemy import URL
@@ -34,14 +36,12 @@ async def has_valid_urls(self, last_upload_at: datetime | None) -> bool:
                 URLCompressedHTML,
                 URL.id == URLCompressedHTML.url_id
             )
+            .outerjoin(
+                FlagURLValidated,
+                URL.id == FlagURLValidated.url_id
+            )
             .where(
-                URL.status.in_(
-                    [
-                        URLStatus.VALIDATED,
-                        URLStatus.NOT_RELEVANT.value,
-                        URLStatus.SUBMITTED.value,
-                    ]
-                ),
+                FlagURLValidated.url_id.isnot(None)
             )
         )
         if last_upload_at is not None:
diff --git a/src/core/tasks/scheduled/impl/huggingface/queries/get/convert.py b/src/core/tasks/scheduled/impl/huggingface/queries/get/convert.py
index 9d5c4135..5ad96115 100644
--- a/src/core/tasks/scheduled/impl/huggingface/queries/get/convert.py
+++ b/src/core/tasks/scheduled/impl/huggingface/queries/get/convert.py
@@ -1,8 +1,7 @@
-from src.collectors.enums import URLStatus
 from src.core.enums import RecordType
 from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse
-from src.core.tasks.scheduled.impl.huggingface.queries.get.mappings import FINE_COARSE_RECORD_TYPE_MAPPING, \
-    OUTCOME_RELEVANCY_MAPPING
+from src.core.tasks.scheduled.impl.huggingface.queries.get.mappings import FINE_COARSE_RECORD_TYPE_MAPPING
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
 
 
 def convert_fine_to_coarse_record_type(
@@ -10,7 +9,14 @@ def convert_fine_to_coarse_record_type(
 ) -> RecordTypeCoarse:
     return FINE_COARSE_RECORD_TYPE_MAPPING[fine_record_type]
 
-def convert_url_status_to_relevant(
-    url_status: URLStatus
+
+def convert_validated_type_to_relevant(
+    validated_type: URLValidatedType
 ) -> bool:
-    return OUTCOME_RELEVANCY_MAPPING[url_status]
\ No newline at end of file
+    match validated_type:
+        case URLValidatedType.NOT_RELEVANT:
+            return False
+        case URLValidatedType.DATA_SOURCE:
+            return True
+        case _:
+            raise ValueError(f"Disallowed validated type: {validated_type}")
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/huggingface/queries/get/core.py b/src/core/tasks/scheduled/impl/huggingface/queries/get/core.py
index 662f7fbb..d58cbdf7 100644
--- a/src/core/tasks/scheduled/impl/huggingface/queries/get/core.py
+++ b/src/core/tasks/scheduled/impl/huggingface/queries/get/core.py
@@ -1,16 +1,18 @@
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 
-from src.collectors.enums import URLStatus
-from src.core.tasks.scheduled.impl.huggingface.queries.get.convert import convert_url_status_to_relevant, \
-    convert_fine_to_coarse_record_type
+from src.core.tasks.scheduled.impl.huggingface.queries.get.convert import convert_fine_to_coarse_record_type, \
+    convert_validated_type_to_relevant
 from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput
 from src.db.client.helpers import add_standard_limit_and_offset
-from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML
+from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML
 from src.db.queries.base.builder import QueryBuilderBase
 from src.db.utils.compression import decompress_html
-from src.db.helpers.session import session_helper as sh
+
 
 class GetForLoadingToHuggingFaceQueryBuilder(QueryBuilderBase):
 
@@ -22,29 +24,32 @@ def __init__(self, page: int):
     async def run(self, session: AsyncSession) -> list[GetForLoadingToHuggingFaceOutput]:
         label_url_id = 'url_id'
         label_url = 'url'
-        label_url_status = 'url_status'
         label_record_type_fine = 'record_type_fine'
         label_html = 'html'
+        label_type = 'type'
 
 
         query = (
             select(
                 URL.id.label(label_url_id),
                 URL.url.label(label_url),
-                URL.status.label(label_url_status),
                 URL.record_type.label(label_record_type_fine),
-                URLCompressedHTML.compressed_html.label(label_html)
+                URLCompressedHTML.compressed_html.label(label_html),
+                FlagURLValidated.type.label(label_type)
             )
             .join(
                 URLCompressedHTML,
                 URL.id == URLCompressedHTML.url_id
             )
+            .outerjoin(
+                FlagURLValidated,
+                URL.id == FlagURLValidated.url_id
+            )
             .where(
-                URL.status.in_([
-                    URLStatus.VALIDATED,
-                    URLStatus.NOT_RELEVANT,
-                    URLStatus.SUBMITTED
-                ])
+                FlagURLValidated.type.in_(
+                    (URLValidatedType.DATA_SOURCE,
+                     URLValidatedType.NOT_RELEVANT)
+                )
             )
         )
         query = add_standard_limit_and_offset(page=self.page, statement=query)
@@ -57,7 +62,9 @@ async def run(self, session: AsyncSession) -> list[GetForLoadingToHuggingFaceOut
             output = GetForLoadingToHuggingFaceOutput(
                 url_id=result[label_url_id],
                 url=result[label_url],
-                relevant=convert_url_status_to_relevant(result[label_url_status]),
+                relevant=convert_validated_type_to_relevant(
+                    URLValidatedType(result[label_type])
+                ),
                 record_type_fine=result[label_record_type_fine],
                 record_type_coarse=convert_fine_to_coarse_record_type(
                     result[label_record_type_fine]
diff --git a/src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py b/src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py
index ed4a7da2..0fd12b28 100644
--- a/src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py
+++ b/src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py
@@ -47,9 +47,3 @@
     RecordType.OTHER: RecordTypeCoarse.OTHER,
     None: RecordTypeCoarse.NOT_RELEVANT
 }
-
-OUTCOME_RELEVANCY_MAPPING = {
-    URLStatus.SUBMITTED: True,
-    URLStatus.VALIDATED: True,
-    URLStatus.NOT_RELEVANT: False
-}
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/sync/agency/operator.py b/src/core/tasks/scheduled/impl/sync/agency/operator.py
index db20acf1..6adff30b 100644
--- a/src/core/tasks/scheduled/impl/sync/agency/operator.py
+++ b/src/core/tasks/scheduled/impl/sync/agency/operator.py
@@ -1,9 +1,11 @@
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.core import UpsertAgenciesQueryBuilder
 from src.core.tasks.scheduled.impl.sync.check import check_max_sync_requests_not_exceeded
 from src.core.tasks.scheduled.impl.sync.agency.dtos.parameters import AgencySyncParameters
 from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase
 from src.db.client.async_ import AsyncDatabaseClient
 from src.db.enums import TaskType
 from src.external.pdap.client import PDAPClient
+from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInnerInfo
 
 
 class SyncAgenciesTaskOperator(ScheduledTaskOperatorBase):
@@ -21,17 +23,19 @@ def task_type(self) -> TaskType:  #
         return TaskType.SYNC_AGENCIES
 
     async def inner_task_logic(self):
-        count_agencies_synced = 0
         params = await self.adb_client.get_agencies_sync_parameters()
         if params.page is None:
             params.page = 1
 
         response = await self.pdap_client.sync_agencies(params)
-        count_agencies_synced += len(response.agencies)
-        request_count = 1
+        count_agencies_synced = 0
+        request_count = 0
         while len(response.agencies) > 0:
+            await self.update_data(response.agencies)
+            count_agencies_synced += len(response.agencies)
+            request_count += 1
+
             check_max_sync_requests_not_exceeded(request_count)
-            await self.adb_client.upsert_agencies(response.agencies)
 
             params = AgencySyncParameters(
                 page=params.page + 1,
@@ -40,9 +44,13 @@ async def inner_task_logic(self):
             await self.adb_client.update_agencies_sync_progress(params.page)
 
             response = await self.pdap_client.sync_agencies(params)
-            count_agencies_synced += len(response.agencies)
-            request_count += 1
+
 
         await self.adb_client.mark_full_agencies_sync()
         print(f"Sync complete. Synced {count_agencies_synced} agencies")
 
+    async def update_data(self, agencies: list[AgenciesSyncResponseInnerInfo]):
+        # First, add new agencies
+        await self.adb_client.run_query_builder(
+            UpsertAgenciesQueryBuilder(agencies)
+        )
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/__init__.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/convert.py
similarity index 97%
rename from src/core/tasks/scheduled/impl/sync/agency/queries/upsert.py
rename to src/core/tasks/scheduled/impl/sync/agency/queries/upsert/convert.py
index 61a0b104..4b944464 100644
--- a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert.py
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/convert.py
@@ -17,4 +17,4 @@ def convert_agencies_sync_response_to_agencies_upsert(
                 ds_last_updated_at=agency.updated_at
             )
         )
-    return results
\ No newline at end of file
+    return results
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/core.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/core.py
new file mode 100644
index 00000000..fc909e48
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/core.py
@@ -0,0 +1,30 @@
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.links.core import UpdateAgencyURLLinksQueryBuilder
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.core import UpsertMetaUrlsQueryBuilder
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.convert import \
+    convert_agencies_sync_response_to_agencies_upsert
+from src.db.models.impl.agency.pydantic.upsert import AgencyUpsertModel
+from src.db.queries.base.builder import QueryBuilderBase
+from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInnerInfo
+
+from src.db.helpers.session import session_helper as sh
+
+class UpsertAgenciesQueryBuilder(QueryBuilderBase):
+
+    def __init__(self, sync_responses: list[AgenciesSyncResponseInnerInfo]):
+        super().__init__()
+        self.sync_responses = sync_responses
+
+    async def run(self, session: AsyncSession) -> None:
+        # Upsert Agencies
+        agency_upserts: list[AgencyUpsertModel] = convert_agencies_sync_response_to_agencies_upsert(self.sync_responses)
+        await sh.bulk_upsert(session=session, models=agency_upserts)
+
+        # Add and update Meta URLs
+        meta_urls_query_builder = UpsertMetaUrlsQueryBuilder(self.sync_responses)
+        await meta_urls_query_builder.run(session=session)
+
+        # Add and remove URL-Agency Links
+        update_url_links_query_builder = UpdateAgencyURLLinksQueryBuilder(self.sync_responses)
+        await update_url_links_query_builder.run(session=session)
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/extract.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/extract.py
new file mode 100644
index 00000000..c05b55f1
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/extract.py
@@ -0,0 +1,12 @@
+from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInnerInfo
+
+
+def extract_urls_from_agencies_sync_response(
+    responses: list[AgenciesSyncResponseInnerInfo]
+) -> list[str]:
+    url_set: set[str] = set()
+    for response in responses:
+        for url in response.meta_urls:
+            url_set.add(url)
+
+    return list(url_set)
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/__init__.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/build.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/build.py
new file mode 100644
index 00000000..5511ea65
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/build.py
@@ -0,0 +1,23 @@
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.link.url_agency.pydantic import LinkURLAgencyPydantic
+from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInnerInfo
+from src.util.url_mapper import URLMapper
+
+def build_links_from_url_mappings_and_sync_responses(
+    url_mappings: list[URLMapping],
+    sync_responses: list[AgenciesSyncResponseInnerInfo],
+) -> list[LinkURLAgencyPydantic]:
+
+    links: list[LinkURLAgencyPydantic] = []
+
+    mapper = URLMapper(url_mappings)
+    for sync_response in sync_responses:
+        agency_id: int = sync_response.agency_id
+        for meta_url in sync_response.meta_urls:
+            url_id: int = mapper.get_id(meta_url)
+            link = LinkURLAgencyPydantic(
+                agency_id=agency_id,
+                url_id=url_id
+            )
+            links.append(link)
+    return links
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/core.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/core.py
new file mode 100644
index 00000000..37d63a03
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/core.py
@@ -0,0 +1,50 @@
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.extract import extract_urls_from_agencies_sync_response
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.links.build import \
+    build_links_from_url_mappings_and_sync_responses
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.links.requester import UpdateAgencyURLLinksRequester
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.lookup.extract import \
+    extract_agency_ids_from_agencies_sync_response
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.link.url_agency.pydantic import LinkURLAgencyPydantic
+from src.db.queries.base.builder import QueryBuilderBase
+from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInnerInfo
+
+
+class UpdateAgencyURLLinksQueryBuilder(QueryBuilderBase):
+    """Updates agency URL links."""
+
+    def __init__(
+        self,
+        sync_responses: list[AgenciesSyncResponseInnerInfo]
+    ):
+        super().__init__()
+        self._sync_responses = sync_responses
+
+    async def run(self, session: AsyncSession) -> None:
+        # Get all existing links
+        requester = UpdateAgencyURLLinksRequester(session)
+
+        # Build new links from sync responses and URL mappings
+        sync_urls: list[str] = extract_urls_from_agencies_sync_response(self._sync_responses)
+        url_mappings: list[URLMapping] = await requester.get_url_mappings(urls=sync_urls)
+        new_links: list[LinkURLAgencyPydantic] = build_links_from_url_mappings_and_sync_responses(
+            url_mappings=url_mappings,
+            sync_responses=self._sync_responses,
+        )
+
+        sync_agency_ids: list[int] = extract_agency_ids_from_agencies_sync_response(self._sync_responses)
+        old_links: list[LinkURLAgencyPydantic] = await requester.get_current_agency_url_links(
+            agency_ids=sync_agency_ids,
+        )
+
+        new_set: set[LinkURLAgencyPydantic] = set(new_links)
+        old_set: set[LinkURLAgencyPydantic] = set(old_links)
+
+        links_to_add: list[LinkURLAgencyPydantic] = list(new_set - old_set)
+        links_to_remove: list[LinkURLAgencyPydantic] = list(old_set - new_set)
+
+        await requester.add_agency_url_links(links=links_to_add)
+        await requester.remove_agency_url_links(links=links_to_remove)
+
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/filter.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/filter.py
new file mode 100644
index 00000000..123bd0ba
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/filter.py
@@ -0,0 +1,12 @@
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.links.models.mappings import AgencyURLMappings
+
+
+def filter_non_relevant_mappings(
+    mappings: list[AgencyURLMappings],
+    relevant_agency_ids: list[int]
+) -> list[AgencyURLMappings]:
+    relevant_mappings: list[AgencyURLMappings] = []
+    for mapping in mappings:
+        if mapping.agency_id in relevant_agency_ids:
+            relevant_mappings.append(mapping)
+    return relevant_mappings
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/lookup_/__init__.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/lookup_/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/lookup_/links.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/lookup_/links.py
new file mode 100644
index 00000000..9336deaa
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/lookup_/links.py
@@ -0,0 +1,46 @@
+from typing import Sequence
+
+from sqlalchemy import select, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.url_agency.pydantic import LinkURLAgencyPydantic
+from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class LookupMetaURLLinksQueryBuilder(QueryBuilderBase):
+
+    def __init__(self, agency_ids: list[int]):
+        super().__init__()
+        self.agency_ids: list[int] = agency_ids
+
+    async def run(self, session: AsyncSession) -> list[LinkURLAgencyPydantic]:
+
+        query = (
+            select(
+                LinkURLAgency.url_id,
+                LinkURLAgency.agency_id
+            )
+            .join(
+                URL,
+                LinkURLAgency.url_id == URL.id,
+            )
+            .join(
+                FlagURLValidated,
+                FlagURLValidated.url_id == URL.id,
+            )
+            .where(
+                FlagURLValidated.type == URLValidatedType.META_URL,
+                LinkURLAgency.agency_id.in_(self.agency_ids),
+            )
+        )
+
+        mappings: Sequence[RowMapping] = await sh.mappings(session, query=query)
+        links: list[LinkURLAgencyPydantic] = [
+            LinkURLAgencyPydantic(**mapping) for mapping in mappings
+        ]
+        return links
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/lookup_/url.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/lookup_/url.py
new file mode 100644
index 00000000..8b526447
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/lookup_/url.py
@@ -0,0 +1,31 @@
+from typing import Sequence
+
+from sqlalchemy import select, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.queries.base.builder import QueryBuilderBase
+from src.db.helpers.session import session_helper as sh
+
+class LookupURLQueryBuilder(QueryBuilderBase):
+
+    def __init__(self, urls: list[str]):
+        super().__init__()
+        self.urls: list[str] = urls
+
+    async def run(self, session: AsyncSession) -> list[URLMapping]:
+        query = (
+            select(
+                URL.id.label("url_id"),
+                URL.url,
+            )
+            .where(
+                URL.url.in_(self.urls),
+            )
+        )
+        mappings: Sequence[RowMapping] = await sh.mappings(session, query=query)
+        urls: list[URLMapping] = [
+            URLMapping(**mapping) for mapping in mappings
+        ]
+        return urls
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/models/__init__.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/models/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/models/mappings.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/models/mappings.py
new file mode 100644
index 00000000..0f3c9d69
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/models/mappings.py
@@ -0,0 +1,6 @@
+from pydantic import BaseModel
+
+
+class AgencyURLMappings(BaseModel):
+    agency_id: int
+    url_ids: list[int]
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/requester.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/requester.py
new file mode 100644
index 00000000..96887dfa
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/requester.py
@@ -0,0 +1,21 @@
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.links.lookup_.links import LookupMetaURLLinksQueryBuilder
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.links.lookup_.url import LookupURLQueryBuilder
+from src.db.dtos.url.mapping import URLMapping
+from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.link.url_agency.pydantic import LinkURLAgencyPydantic
+from src.db.templates.requester import RequesterBase
+
+
+class UpdateAgencyURLLinksRequester(RequesterBase):
+
+    async def get_url_mappings(self, urls: list[str]) -> list[URLMapping]:
+        return await LookupURLQueryBuilder(urls=urls).run(session=self.session)
+
+    async def get_current_agency_url_links(self, agency_ids: list[int]) -> list[LinkURLAgencyPydantic]:
+        return await LookupMetaURLLinksQueryBuilder(agency_ids=agency_ids).run(session=self.session)
+
+    async def add_agency_url_links(self, links: list[LinkURLAgencyPydantic]) -> None:
+        await sh.bulk_insert(self.session, models=links)
+
+    async def remove_agency_url_links(self, links: list[LinkURLAgencyPydantic]) -> None:
+        await sh.bulk_delete(self.session, models=links)
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/__init__.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/add/__init__.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/add/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/add/core.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/add/core.py
new file mode 100644
index 00000000..73761251
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/add/core.py
@@ -0,0 +1,57 @@
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.core.enums import RecordType
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic
+from src.db.models.impl.url.core.enums import URLSource
+from src.db.models.impl.url.core.pydantic.insert import URLInsertModel
+from src.db.queries.base.builder import QueryBuilderBase
+
+from src.db.helpers.session import session_helper as sh
+
+class AddMetaURLsQueryBuilder(QueryBuilderBase):
+
+    """Add Meta URLs to DB with:
+    - Record type set to CONTACT_INFO_AND_AGENCY_META
+    - Validation Flag added as META_URL
+    - Source set to DATA_SOURCES
+    """
+    def __init__(self, urls: list[str]):
+        super().__init__()
+        self.urls = urls
+
+    async def run(self, session: AsyncSession) -> list[URLMapping]:
+        # Add URLs
+        url_inserts: list[URLInsertModel] = []
+        for url in self.urls:
+            url_inserts.append(
+                URLInsertModel(
+                    url=url,
+                    record_type=RecordType.CONTACT_INFO_AND_AGENCY_META,
+                    source=URLSource.DATA_SOURCES
+                )
+            )
+        url_ids: list[int] = await sh.bulk_insert(session, models=url_inserts, return_ids=True)
+
+        # Connect with URLs
+        mappings: list[URLMapping] = [
+            URLMapping(
+                url=url,
+                url_id=url_id,
+            )
+            for url, url_id in zip(self.urls, url_ids)
+        ]
+
+        # Add Validation Flags
+        flag_inserts: list[FlagURLValidatedPydantic] = []
+        for url_id in url_ids:
+            flag_inserts.append(
+                FlagURLValidatedPydantic(
+                    url_id=url_id,
+                    type=URLValidatedType.META_URL
+                )
+            )
+        await sh.bulk_insert(session, models=flag_inserts)
+
+        return mappings
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/convert.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/convert.py
new file mode 100644
index 00000000..8d3e8785
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/convert.py
@@ -0,0 +1,27 @@
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.lookup.response import MetaURLLookupResponse
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.update.params import UpdateMetaURLsParams
+from src.db.dtos.url.mapping import URLMapping
+
+
+def convert_to_update_meta_urls_params(
+    lookups: list[MetaURLLookupResponse]
+) -> list[UpdateMetaURLsParams]:
+    return [
+        UpdateMetaURLsParams(
+            url_id=lookup.url_id,
+            validation_type=lookup.validation_type,
+            record_type=lookup.record_type,
+        )
+        for lookup in lookups
+    ]
+
+def convert_url_lookups_to_url_mappings(
+    lookups: list[MetaURLLookupResponse]
+) -> list[URLMapping]:
+    return [
+        URLMapping(
+            url_id=lookup.url_id,
+            url=lookup.url,
+        )
+        for lookup in lookups
+    ]
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/core.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/core.py
new file mode 100644
index 00000000..6f5c3593
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/core.py
@@ -0,0 +1,33 @@
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.extract import extract_urls_from_agencies_sync_response
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.filter import filter_urls_in_sync
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.lookup.response import MetaURLLookupResponse
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.requester import UpdateMetaURLsRequester
+from src.db.queries.base.builder import QueryBuilderBase
+from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInnerInfo
+
+
+class UpsertMetaUrlsQueryBuilder(QueryBuilderBase):
+    """Add and update meta URLs for agencies."""
+
+    def __init__(self, sync_responses: list[AgenciesSyncResponseInnerInfo]):
+        super().__init__()
+        self.sync_responses = sync_responses
+
+    async def run(self, session: AsyncSession) -> None:
+
+        requester = UpdateMetaURLsRequester(session)
+        sync_urls: list[str] = extract_urls_from_agencies_sync_response(self.sync_responses)
+
+
+        lookup_responses: list[MetaURLLookupResponse] = \
+            await requester.lookup_meta_urls(sync_urls)
+        await requester.add_new_urls_to_database(lookup_responses)
+
+        filtered_lookup_responses: list[MetaURLLookupResponse] = \
+            filter_urls_in_sync(self.sync_responses, lookup_responses=lookup_responses)
+        await requester.update_existing_urls(filtered_lookup_responses)
+
+
+
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/filter.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/filter.py
new file mode 100644
index 00000000..227f0edc
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/filter.py
@@ -0,0 +1,37 @@
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.extract import extract_urls_from_agencies_sync_response
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.lookup.response import MetaURLLookupResponse
+from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInnerInfo
+
+
+def filter_urls_to_add(
+    lookup_responses: list[MetaURLLookupResponse]
+) -> list[str]:
+    return [
+        lookup_response.url
+        for lookup_response in lookup_responses
+        if not lookup_response.exists_in_db
+    ]
+
+def filter_existing_url_mappings(
+    lookup_responses: list[MetaURLLookupResponse]
+) -> list[MetaURLLookupResponse]:
+    """Filter only URL mappings that already exist in the database."""
+    return [
+        lookup_response
+        for lookup_response in lookup_responses
+        if lookup_response.exists_in_db
+    ]
+
+def filter_urls_in_sync(
+    sync_responses: list[AgenciesSyncResponseInnerInfo],
+    lookup_responses: list[MetaURLLookupResponse]
+) -> list[MetaURLLookupResponse]:
+    """Filter only URLs that are in sync responses."""
+    sync_urls: set[str] = set(
+        extract_urls_from_agencies_sync_response(sync_responses)
+    )
+    filtered_lookup_responses: list[MetaURLLookupResponse] = []
+    for lookup_response in lookup_responses:
+        if lookup_response.url in sync_urls:
+            filtered_lookup_responses.append(lookup_response)
+    return filtered_lookup_responses
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/__init__.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/core.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/core.py
new file mode 100644
index 00000000..8a817bd4
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/core.py
@@ -0,0 +1,66 @@
+from typing import Sequence
+
+from sqlalchemy import select, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.lookup.response import MetaURLLookupResponse
+from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class LookupMetaURLsQueryBuilder(QueryBuilderBase):
+    """Lookup whether URLs exist in DB and are validated as meta URLs"""
+
+    def __init__(self, urls: list[str]):
+        super().__init__()
+        self.urls: list[str] = urls
+
+    async def run(self, session: AsyncSession) -> list[MetaURLLookupResponse]:
+        url_id_label: str = "url_id"
+
+        query = (
+            select(
+                URL.id.label(url_id_label),
+                URL.url,
+                URL.record_type,
+                FlagURLValidated.type
+            )
+            .select_from(
+                URL
+            )
+            .outerjoin(
+                FlagURLValidated,
+                FlagURLValidated.url_id == URL.id,
+            )
+            .where(
+                URL.url.in_(self.urls)
+            )
+        )
+        mappings: Sequence[RowMapping] = await sh.mappings(session, query=query)
+
+        urls_in_db = set()
+        extant_lookup_responses: list[MetaURLLookupResponse] = []
+        for mapping in mappings:
+            url = mapping["url"]
+            urls_in_db.add(url)
+            response = MetaURLLookupResponse(
+                url=url,
+                url_id=mapping[url_id_label],
+                record_type=mapping["record_type"],
+                validation_type=mapping["type"],
+            )
+            extant_lookup_responses.append(response)
+
+        urls_not_in_db = set(self.urls) - set(urls_in_db)
+        non_extant_lookup_responses = [
+            MetaURLLookupResponse(
+                url=url,
+                url_id=None,
+                record_type=None,
+                validation_type=None,
+            ) for url in urls_not_in_db
+        ]
+
+        return extant_lookup_responses + non_extant_lookup_responses
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/extract.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/extract.py
new file mode 100644
index 00000000..d054f645
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/extract.py
@@ -0,0 +1,10 @@
+from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInnerInfo
+
+
+def extract_agency_ids_from_agencies_sync_response(
+    responses: list[AgenciesSyncResponseInnerInfo]
+) -> list[int]:
+    return [
+        response.agency_id
+        for response in responses
+    ]
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/response.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/response.py
new file mode 100644
index 00000000..ff2d668d
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/response.py
@@ -0,0 +1,23 @@
+from pydantic import BaseModel
+
+from src.core.enums import RecordType
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+
+
+class MetaURLLookupResponse(BaseModel):
+    url: str
+    url_id: int | None
+    record_type: RecordType | None
+    validation_type: URLValidatedType | None
+
+    @property
+    def exists_in_db(self) -> bool:
+        return self.url_id is not None
+
+    @property
+    def is_meta_url(self) -> bool:
+        return self.record_type == RecordType.CONTACT_INFO_AND_AGENCY_META
+
+    @property
+    def is_validated(self) -> bool:
+        return self.validation_type is not None
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/requester.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/requester.py
new file mode 100644
index 00000000..0a3e3c76
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/requester.py
@@ -0,0 +1,48 @@
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.add.core import AddMetaURLsQueryBuilder
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.convert import \
+    convert_to_update_meta_urls_params, convert_url_lookups_to_url_mappings
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.filter import filter_existing_url_mappings, \
+    filter_urls_to_add
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.lookup.core import LookupMetaURLsQueryBuilder
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.lookup.response import MetaURLLookupResponse
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.update.core import UpdateMetaURLsQueryBuilder
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.update.params import UpdateMetaURLsParams
+from src.db.dtos.url.mapping import URLMapping
+from src.db.templates.requester import RequesterBase
+
+
+class UpdateMetaURLsRequester(RequesterBase):
+
+    async def lookup_meta_urls(
+        self,
+        urls: list[str]
+    ) -> list[MetaURLLookupResponse]:
+        return await LookupMetaURLsQueryBuilder(
+            urls
+        ).run(self.session)
+
+    async def add_new_urls_to_database(
+        self,
+        lookup_responses: list[MetaURLLookupResponse]
+    ) -> list[URLMapping]:
+        if len(lookup_responses) == 0:
+            return []
+        urls_to_add: list[str] = filter_urls_to_add(lookup_responses)
+        if len(urls_to_add) == 0:
+            return []
+        return await AddMetaURLsQueryBuilder(urls_to_add).run(self.session)
+
+    async def update_existing_urls(
+        self,
+        lookup_responses: list[MetaURLLookupResponse]
+    ) -> list[URLMapping]:
+        existing_url_lookups: list[MetaURLLookupResponse] = (
+            filter_existing_url_mappings(lookup_responses)
+        )
+        params: list[UpdateMetaURLsParams] = \
+            convert_to_update_meta_urls_params(existing_url_lookups)
+        await UpdateMetaURLsQueryBuilder(params).run(self.session)
+        existing_url_mappings: list[URLMapping] = \
+            convert_url_lookups_to_url_mappings(existing_url_lookups)
+        return existing_url_mappings
+
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/__init__.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/core.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/core.py
new file mode 100644
index 00000000..1e479652
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/core.py
@@ -0,0 +1,39 @@
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.update.filter import \
+    filter_urls_with_non_meta_record_type, filter_urls_with_non_meta_url_validation_flag, \
+    filter_urls_without_validation_flag
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.update.params import UpdateMetaURLsParams
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.update.requester import  \
+    UpdateMetaURLsUpdateURLAndValidationFlagsRequester
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class UpdateMetaURLsQueryBuilder(QueryBuilderBase):
+    """Update meta URLs in DB
+
+    Meta URLs should be given a validation status as a Meta URL
+    and have their record type updated to CONTACT_INFO_AND_AGENCY_META
+    """
+
+    def __init__(
+        self,
+        params: list[UpdateMetaURLsParams]
+    ):
+        super().__init__()
+        self.params = params
+
+    async def run(
+        self,
+        session: AsyncSession
+    ) -> None:
+        requester = UpdateMetaURLsUpdateURLAndValidationFlagsRequester(session)
+
+        urls_with_non_meta_record_type: list[int] = filter_urls_with_non_meta_record_type(self.params)
+        await requester.update_urls(urls_with_non_meta_record_type)
+
+        urls_without_validation_flag: list[int] = filter_urls_without_validation_flag(self.params)
+        await requester.add_validation_flags(urls_without_validation_flag)
+
+        urls_with_non_meta_url_validation_flag: list[int] = filter_urls_with_non_meta_url_validation_flag(self.params)
+        await requester.update_validation_flags(urls_with_non_meta_url_validation_flag)
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/filter.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/filter.py
new file mode 100644
index 00000000..b0c32a7e
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/filter.py
@@ -0,0 +1,37 @@
+from src.core.enums import RecordType
+from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.update.params import UpdateMetaURLsParams
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+
+
+def filter_urls_with_non_meta_record_type(
+    params: list[UpdateMetaURLsParams]
+) -> list[int]:
+    url_ids: list[int] = []
+    for param in params:
+        if param.record_type is None:
+            url_ids.append(param.url_id)
+        if param.record_type != RecordType.CONTACT_INFO_AND_AGENCY_META:
+            url_ids.append(param.url_id)
+
+    return url_ids
+
+def filter_urls_without_validation_flag(
+    params: list[UpdateMetaURLsParams]
+) -> list[int]:
+    url_ids: list[int] = []
+    for param in params:
+        if param.validation_type is None:
+            url_ids.append(param.url_id)
+    return url_ids
+
+def filter_urls_with_non_meta_url_validation_flag(
+    params: list[UpdateMetaURLsParams]
+) -> list[int]:
+    url_ids: list[int] = []
+    for param in params:
+        if param.validation_type is None:
+            continue
+        if param.validation_type != URLValidatedType.META_URL:
+            url_ids.append(param.url_id)
+
+    return url_ids
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/params.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/params.py
new file mode 100644
index 00000000..cb74a378
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/params.py
@@ -0,0 +1,11 @@
+from pydantic import BaseModel
+
+from src.core.enums import RecordType
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+
+
+class UpdateMetaURLsParams(BaseModel):
+    validation_type: URLValidatedType | None
+    url_id: int
+    record_type: RecordType | None
+
diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/requester.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/requester.py
new file mode 100644
index 00000000..175b1bbf
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/requester.py
@@ -0,0 +1,53 @@
+from sqlalchemy import update
+
+from src.core.enums import RecordType
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.templates.requester import RequesterBase
+
+from src.db.helpers.session import session_helper as sh
+
+class UpdateMetaURLsUpdateURLAndValidationFlagsRequester(RequesterBase):
+
+    async def update_validation_flags(self, url_ids: list[int]) -> None:
+        """Set validation flag for URLs to Meta URL"""
+        query = (
+            update(
+                FlagURLValidated
+            )
+            .where(
+                FlagURLValidated.url_id.in_(url_ids)
+            )
+            .values(
+                type=URLValidatedType.META_URL
+            )
+        )
+        await self.session.execute(query)
+
+    async def add_validation_flags(self, url_ids: list[int]) -> None:
+        inserts: list[FlagURLValidatedPydantic] = []
+        for url_id in url_ids:
+            flag = FlagURLValidatedPydantic(
+                url_id=url_id,
+                type=URLValidatedType.META_URL,
+            )
+            inserts.append(flag)
+
+        await sh.bulk_insert(self.session, models=inserts)
+
+    async def update_urls(self, url_ids: list[int]) -> None:
+        """Update URLs and set record type to Contact Info and Agency Meta"""
+        query = (
+            update(
+                URL
+            )
+            .values(
+                record_type=RecordType.CONTACT_INFO_AND_AGENCY_META,
+            )
+            .where(
+                URL.id.in_(url_ids)
+            )
+        )
+        await self.session.execute(query)
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/core.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/core.py
index 6222d1fd..93c1cbc9 100644
--- a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/core.py
+++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/core.py
@@ -1,13 +1,88 @@
+from collections import defaultdict
+
+from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 
-from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.query import URLAgencyLinkUpdateQueryBuilder
-from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.params import UpdateLinkURLAgencyForDataSourcesSyncParams
+from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.convert import convert_to_link_url_agency_models
+from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.url_agency.pydantic import LinkURLAgencyPydantic
+from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.params import UpdateLinkURLAgencyParams
+from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class URLAgencyLinkUpdateQueryBuilder(QueryBuilderBase):
+    """Given a set of URL-Agency links, remove all non-matching non-Meta URL links and add new ones."""
+
+
+    def __init__(self, models: list[UpdateLinkURLAgencyParams]):
+        super().__init__()
+        self.models = models
+        self._new_links: dict[int, list[int]] = {
+            model.url_id: model.new_agency_ids
+            for model in self.models
+        }
+        self._existing_links: dict[int, list[int]] = defaultdict(list)
+        self.existing_url_ids: set[int] = {
+            model.url_id for model in self.models
+        }
+
+    async def _get_existing_links(self, session: AsyncSession) -> None:
+        """Get existing non-meta URL agency links for provided URL IDs.
+
+        Modifies:
+            self._existing_links
+        """
+        query = (
+            select(LinkURLAgency)
+            .outerjoin(
+                FlagURLValidated,
+                FlagURLValidated.url_id == LinkURLAgency.url_id,
+            )
+            .where(
+                LinkURLAgency.url_id.in_(
+                    self.existing_url_ids
+                ),
+                FlagURLValidated.type != URLValidatedType.META_URL
+            )
+        )
+        links = await session.scalars(query)
+        for link in links:
+            self._existing_links[link.url_id].append(link.agency_id)
+
+    async def _update_links(self, session: AsyncSession) -> None:
+        # Remove all existing links not in new links
+        links_to_delete: list[LinkURLAgencyPydantic] = []
+        links_to_insert: list[LinkURLAgencyPydantic] = []
+
+        for url_id in self.existing_url_ids:
+            new_agency_ids = self._new_links.get(url_id, [])
+            existing_agency_ids = self._existing_links.get(url_id, [])
+            # IDs to delete are existing agency ids that are not new agency ids
+            ids_to_delete = set(existing_agency_ids) - set(new_agency_ids)
+            # IDs to insert are new agency ids that are not existing agency ids
+            ids_to_insert = set(new_agency_ids) - set(existing_agency_ids)
+
+            links_to_delete.extend(
+                convert_to_link_url_agency_models(
+                    url_id=url_id,
+                    agency_ids=list(ids_to_delete)
+                )
+            )
+            links_to_insert.extend(
+                convert_to_link_url_agency_models(
+                    url_id=url_id,
+                    agency_ids=list(ids_to_insert)
+                )
+            )
+
+        await sh.bulk_delete(session=session, models=links_to_delete)
+        await sh.bulk_insert(session=session, models=links_to_insert)
+
+    async def run(self, session: AsyncSession) -> None:
+        await self._get_existing_links(session=session)
+        await self._update_links(session=session)
 
 
-async def update_agency_links(
-    session: AsyncSession,
-    params: list[UpdateLinkURLAgencyForDataSourcesSyncParams]
-) -> None:
-    """Overwrite existing url_agency links with new ones, if applicable."""
-    query = URLAgencyLinkUpdateQueryBuilder(params)
-    await query.run(session)
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/params.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/params.py
index d43bbbd8..6f8a14eb 100644
--- a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/params.py
+++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/params.py
@@ -1,7 +1,7 @@
 from pydantic import BaseModel
 
 
-class UpdateLinkURLAgencyForDataSourcesSyncParams(BaseModel):
+class UpdateLinkURLAgencyParams(BaseModel):
     url_id: int
     new_agency_ids: list[int]
     old_agency_ids: list[int]
diff --git a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/query.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/query.py
deleted file mode 100644
index a81be905..00000000
--- a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/query.py
+++ /dev/null
@@ -1,79 +0,0 @@
-from collections import defaultdict
-
-from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.convert import convert_to_link_url_agency_models
-from src.db.helpers.session import session_helper as sh
-from src.db.models.impl.link.url_agency.pydantic import LinkURLAgencyPydantic
-from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.params import UpdateLinkURLAgencyForDataSourcesSyncParams
-from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
-from src.db.queries.base.builder import QueryBuilderBase
-
-
-class URLAgencyLinkUpdateQueryBuilder(QueryBuilderBase):
-    """Given a set of URL-Agency links, remove all non-matching links and add new ones."""
-
-
-    def __init__(self, models: list[UpdateLinkURLAgencyForDataSourcesSyncParams]):
-        super().__init__()
-        self.models = models
-        self._new_links: dict[int, list[int]] = {
-            model.url_id: model.new_agency_ids
-            for model in self.models
-        }
-        self._existing_links: dict[int, list[int]] = defaultdict(list)
-        self.existing_url_ids = {model.url_id for model in self.models}
-
-    async def _get_existing_links(self, session: AsyncSession):
-        """Get existing agency links for provided URLs.
-
-        Modifies:
-            self._existing_links
-        """
-        query = (
-            select(LinkURLAgency)
-            .where(
-                LinkURLAgency.url_id.in_(
-                    self.existing_url_ids
-                )
-            )
-        )
-        links = await session.scalars(query)
-        for link in links:
-            self._existing_links[link.url_id].append(link.agency_id)
-
-    async def _update_links(self, session: AsyncSession):
-        # Remove all existing links not in new links
-        links_to_delete: list[LinkURLAgencyPydantic] = []
-        links_to_insert: list[LinkURLAgencyPydantic] = []
-
-        for url_id in self.existing_url_ids:
-            new_agency_ids = self._new_links.get(url_id, [])
-            existing_agency_ids = self._existing_links.get(url_id, [])
-            # IDs to delete are existing agency ids that are not new agency ids
-            ids_to_delete = set(existing_agency_ids) - set(new_agency_ids)
-            # IDs to insert are new agency ids that are not existing agency ids
-            ids_to_insert = set(new_agency_ids) - set(existing_agency_ids)
-
-            links_to_delete.extend(
-                convert_to_link_url_agency_models(
-                    url_id=url_id,
-                    agency_ids=list(ids_to_delete)
-                )
-            )
-            links_to_insert.extend(
-                convert_to_link_url_agency_models(
-                    url_id=url_id,
-                    agency_ids=list(ids_to_insert)
-                )
-            )
-
-        await sh.bulk_delete(session=session, models=links_to_delete)
-        await sh.bulk_insert(session=session, models=links_to_insert)
-
-    async def run(self, session: AsyncSession):
-        await self._get_existing_links(session=session)
-        await self._update_links(session=session)
-
-
diff --git a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/convert.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/convert.py
new file mode 100644
index 00000000..e2def8c2
--- /dev/null
+++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/convert.py
@@ -0,0 +1,24 @@
+from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.response import URLDataSyncInfo
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.external.pdap.enums import ApprovalStatus
+
+
+def convert_url_sync_info_to_url_mappings(
+    url_sync_info: URLDataSyncInfo
+) -> URLMapping:
+    return URLMapping(
+        url=url_sync_info.url,
+        url_id=url_sync_info.url_id
+    )
+
+def convert_approval_status_to_validated_type(
+    approval_status: ApprovalStatus
+) -> URLValidatedType:
+    match approval_status:
+        case ApprovalStatus.APPROVED:
+            return URLValidatedType.DATA_SOURCE
+        case ApprovalStatus.REJECTED:
+            return URLValidatedType.NOT_RELEVANT
+        case _:
+            raise ValueError(f"Invalid approval status: {approval_status}")
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/core.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/core.py
index 751192f9..2b021045 100644
--- a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/core.py
+++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/core.py
@@ -3,6 +3,7 @@
 from sqlalchemy.ext.asyncio import AsyncSession
 from typing_extensions import override
 
+from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.convert import convert_url_sync_info_to_url_mappings
 from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.helpers.filter import filter_for_urls_with_ids, \
     get_mappings_for_urls_without_data_sources
 from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.mapper import URLSyncInfoMapper
@@ -14,8 +15,11 @@
 from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.response import \
     LookupURLForDataSourcesSyncResponse
 from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from src.db.queries.base.builder import QueryBuilderBase
 from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInnerInfo
+from src.util.url_mapper import URLMapper
 
 
 @final
@@ -50,24 +54,36 @@ async def run(self, session: AsyncSession) -> None:
         """
         self._session = session
 
-        lookup_results = await self._lookup_urls()
-        lookups_existing_urls = filter_for_urls_with_ids(lookup_results)
+        lookup_results: list[LookupURLForDataSourcesSyncResponse] = await self._lookup_urls()
+
+        # Update existing url and associated metadata
+        lookups_existing_urls: list[LookupURLForDataSourcesSyncResponse] = filter_for_urls_with_ids(lookup_results)
         await self._update_existing_urls(lookups_existing_urls)
         await self._update_agency_link(lookups_existing_urls)
-        mappings_without_data_sources = get_mappings_for_urls_without_data_sources(lookup_results)
-        await self._add_new_data_sources(mappings_without_data_sources)
+        existing_url_mappings: list[URLMapping] = [
+            convert_url_sync_info_to_url_mappings(lookup.url_info)
+            for lookup in lookups_existing_urls
+        ]
 
-        extant_urls = {lookup.url_info.url for lookup in lookups_existing_urls}
-        urls_to_add = list(self.urls - extant_urls)
-        if len(urls_to_add) == 0:
-            return
-        url_mappings = await self._add_new_urls(urls_to_add)
-        await self._add_new_data_sources(url_mappings)
-        await self._insert_agency_link(url_mappings)
-
-    async def _lookup_urls(self):
-        lookup_results = await self.requester.lookup_urls(list(self.urls))
-        return lookup_results
+        # Add new URLs and associated metadata
+        mappings_without_data_sources: list[URLMapping] = get_mappings_for_urls_without_data_sources(lookup_results)
+        await self._add_new_data_sources(mappings_without_data_sources)
+        extant_urls: set[str] = {lookup.url_info.url for lookup in lookups_existing_urls}
+        urls_to_add: list[str] = list(self.urls - extant_urls)
+        if len(urls_to_add) != 0:
+            new_url_mappings: list[URLMapping] = await self._add_new_urls(urls_to_add)
+            await self._add_new_data_sources(new_url_mappings)
+            await self._insert_agency_link(new_url_mappings)
+        else:
+            new_url_mappings: list[URLMapping] = []
+
+        # Upsert validated flags
+        all_url_mappings: list[URLMapping] = existing_url_mappings + new_url_mappings
+        mapper = URLMapper(all_url_mappings)
+        await self._upsert_validated_flags(mapper)
+
+    async def _lookup_urls(self) -> list[LookupURLForDataSourcesSyncResponse]:
+        return await self.requester.lookup_urls(list(self.urls))
 
     async def _insert_agency_link(self, url_mappings: list[URLMapping]):
         link_url_agency_insert_params = self.param_manager.insert_agency_link(
@@ -81,16 +97,19 @@ async def _update_agency_link(self, lookups_existing_urls: list[LookupURLForData
         )
         await self.requester.update_agency_links(link_url_agency_update_params)
 
-    async def _add_new_data_sources(self, url_mappings: list[URLMapping]):
+    async def _add_new_data_sources(self, url_mappings: list[URLMapping]) -> None:
         url_ds_insert_params = self.param_manager.add_new_data_sources(url_mappings)
         await self.requester.add_new_data_sources(url_ds_insert_params)
 
-    async def _add_new_urls(self, urls: list[str]):
+    async def _add_new_urls(self, urls: list[str]) -> list[URLMapping]:
         url_insert_params: list[InsertURLForDataSourcesSyncParams] = self.param_manager.add_new_urls(urls)
         url_mappings = await self.requester.add_new_urls(url_insert_params)
         return url_mappings
 
-    async def _update_existing_urls(self, lookups_existing_urls: list[LookupURLForDataSourcesSyncResponse]):
+    async def _update_existing_urls(self, lookups_existing_urls: list[LookupURLForDataSourcesSyncResponse]) -> None:
         update_params = self.param_manager.update_existing_urls(lookups_existing_urls)
         await self.requester.update_existing_urls(update_params)
 
+    async def _upsert_validated_flags(self, url_mapper: URLMapper) -> None:
+        flags: list[FlagURLValidatedPydantic] = self.param_manager.upsert_validated_flags(url_mapper)
+        await self.requester.upsert_validated_flags(flags)
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/convert.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/convert.py
index 3240e409..168f2511 100644
--- a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/convert.py
+++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/convert.py
@@ -23,13 +23,13 @@ def convert_to_source_collector_url_status(
 
     match ds_approval_status:
         case ApprovalStatus.APPROVED:
-            return URLStatus.SUBMITTED
+            return URLStatus.OK
         case ApprovalStatus.REJECTED:
             return URLStatus.NOT_RELEVANT
         case ApprovalStatus.NEEDS_IDENTIFICATION:
-            return URLStatus.PENDING
+            return URLStatus.OK
         case ApprovalStatus.PENDING:
-            return URLStatus.PENDING
+            return URLStatus.OK
         case _:
             raise NotImplementedError(f"Logic not implemented for this approval status: {ds_approval_status}")
 
diff --git a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/param_manager.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/param_manager.py
index 7ca8ebad..e0a7225f 100644
--- a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/param_manager.py
+++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/param_manager.py
@@ -1,5 +1,7 @@
 from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.params import \
-    UpdateLinkURLAgencyForDataSourcesSyncParams
+    UpdateLinkURLAgencyParams
+from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.convert import \
+    convert_approval_status_to_validated_type
 from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.helpers.convert import convert_to_url_update_params, \
     convert_to_url_insert_params
 from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.mapper import URLSyncInfoMapper
@@ -10,8 +12,14 @@
 from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.update.params import \
     UpdateURLForDataSourcesSyncParams
 from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from src.db.models.impl.link.url_agency.pydantic import LinkURLAgencyPydantic
 from src.db.models.impl.url.data_source.pydantic import URLDataSourcePydantic
+from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInnerInfo
+from src.external.pdap.enums import ApprovalStatus
+from src.util.url_mapper import URLMapper
 
 
 class UpsertURLsFromDataSourcesParamManager:
@@ -53,12 +61,12 @@ def add_new_urls(
     def update_agency_link(
         self,
         lookup_results: list[LookupURLForDataSourcesSyncResponse]
-    ) -> list[UpdateLinkURLAgencyForDataSourcesSyncParams]:
+    ) -> list[UpdateLinkURLAgencyParams]:
         results = []
         for lookup_result in lookup_results:
             url_info = lookup_result.url_info
             sync_info = self._mapper.get(url_info.url)
-            update_params = UpdateLinkURLAgencyForDataSourcesSyncParams(
+            update_params = UpdateLinkURLAgencyParams(
                 url_id=url_info.url_id,
                 new_agency_ids=sync_info.agency_ids,
                 old_agency_ids=url_info.agency_ids
@@ -98,4 +106,21 @@ def add_new_data_sources(
             )
         return results
 
+    def upsert_validated_flags(
+        self,
+        mapper: URLMapper
+    ) -> list[FlagURLValidatedPydantic]:
+        urls: list[str] = mapper.get_all_urls()
+        flags: list[FlagURLValidatedPydantic] = []
+        for url in urls:
+            url_id: int = mapper.get_id(url)
+            sync_info: DataSourcesSyncResponseInnerInfo = self._mapper.get(url)
+            approval_status: ApprovalStatus = sync_info.approval_status
+            validated_type: URLValidatedType = convert_approval_status_to_validated_type(approval_status)
+            flag = FlagURLValidatedPydantic(
+                url_id=url_id,
+                type=validated_type
+            )
+            flags.append(flag)
 
+        return flags
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/requester.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/requester.py
index 08b5df22..eaae3a17 100644
--- a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/requester.py
+++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/requester.py
@@ -1,8 +1,8 @@
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.params import \
-    UpdateLinkURLAgencyForDataSourcesSyncParams
-from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.query import \
+    UpdateLinkURLAgencyParams
+from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.core import \
     URLAgencyLinkUpdateQueryBuilder
 from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.insert.params import \
     InsertURLForDataSourcesSyncParams
@@ -14,6 +14,7 @@
     UpdateURLForDataSourcesSyncParams
 from src.db.dtos.url.mapping import URLMapping
 from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic
 from src.db.models.impl.link.url_agency.pydantic import LinkURLAgencyPydantic
 from src.db.models.impl.url.data_source.pydantic import URLDataSourcePydantic
 
@@ -71,8 +72,11 @@ async def add_new_agency_links(
 
     async def update_agency_links(
         self,
-        params: list[UpdateLinkURLAgencyForDataSourcesSyncParams]
+        params: list[UpdateLinkURLAgencyParams]
     ) -> None:
         """Overwrite existing url_agency links with new ones, if applicable."""
         query = URLAgencyLinkUpdateQueryBuilder(params)
-        await query.run(self.session)
\ No newline at end of file
+        await query.run(self.session)
+
+    async def upsert_validated_flags(self, flags: list[FlagURLValidatedPydantic]) -> None:
+        await sh.bulk_upsert(self.session, models=flags)
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/loader.py b/src/core/tasks/scheduled/loader.py
index 83c3b100..76c707ea 100644
--- a/src/core/tasks/scheduled/loader.py
+++ b/src/core/tasks/scheduled/loader.py
@@ -77,6 +77,14 @@ async def load_entries(self) -> list[ScheduledTaskEntry]:
                 interval=IntervalEnum.DAILY,
                 enabled=self.env.bool("SYNC_DATA_SOURCES_TASK_FLAG", default=True)
             ),
+            ScheduledTaskEntry(
+                operator=SyncAgenciesTaskOperator(
+                    adb_client=self.async_core.adb_client,
+                    pdap_client=self.pdap_client
+                ),
+                interval=IntervalEnum.DAILY,
+                enabled=self.env.bool("SYNC_AGENCIES_TASK_FLAG", default=True)
+            ),
             ScheduledTaskEntry(
                 operator=RunURLTasksTaskOperator(async_core=self.async_core),
                 interval=IntervalEnum.HOURLY,
@@ -88,14 +96,6 @@ async def load_entries(self) -> list[ScheduledTaskEntry]:
                 interval=IntervalEnum.DAILY,
                 enabled=self.env.bool("POPULATE_BACKLOG_SNAPSHOT_TASK_FLAG", default=True)
             ),
-            ScheduledTaskEntry(
-                operator=SyncAgenciesTaskOperator(
-                    adb_client=self.async_core.adb_client,
-                    pdap_client=self.pdap_client
-                ),
-                interval=IntervalEnum.DAILY,
-                enabled=self.env.bool("SYNC_AGENCIES_TASK_FLAG", default=True)
-            ),
             ScheduledTaskEntry(
                 operator=PushToHuggingFaceTaskOperator(
                     adb_client=self.async_core.adb_client,
diff --git a/src/core/tasks/scheduled/manager.py b/src/core/tasks/scheduled/manager.py
index e97e0f8e..87cb5a27 100644
--- a/src/core/tasks/scheduled/manager.py
+++ b/src/core/tasks/scheduled/manager.py
@@ -1,6 +1,3 @@
-from apscheduler.job import Job
-from apscheduler.schedulers.asyncio import AsyncIOScheduler
-
 from src.core.tasks.base.run_info import TaskOperatorRunInfo
 from src.core.tasks.handler import TaskHandler
 from src.core.tasks.mixins.link_urls import LinkURLsMixin
@@ -25,13 +22,13 @@ def __init__(
         self._loader = loader
         self._registry = registry
 
-        # Main objects
-        self.scheduler = AsyncIOScheduler()
-
 
     async def setup(self):
         self._registry.start_scheduler()
         await self.add_scheduled_tasks()
+        await self._registry.report_next_scheduled_task()
+
+
 
     async def add_scheduled_tasks(self):
         """
@@ -39,15 +36,19 @@ async def add_scheduled_tasks(self):
             self._registry
         """
         entries: list[ScheduledTaskEntry] = await self._loader.load_entries()
-        for idx, entry in enumerate(entries):
+        enabled_entries: list[ScheduledTaskEntry] = []
+        for entry in entries:
             if not entry.enabled:
                 print(f"{entry.operator.task_type.value} is disabled. Skipping add to scheduler.")
                 continue
+            enabled_entries.append(entry)
 
+        initial_lag: int = 1
+        for idx, entry in enumerate(enabled_entries):
             await self._registry.add_job(
                 func=self.run_task,
                 entry=entry,
-                minute_lag=idx
+                minute_lag=idx + initial_lag
             )
 
     def shutdown(self):
@@ -68,3 +69,4 @@ async def run_task(self, operator: ScheduledTaskOperatorBase):
                 operator: ScheduledTaskOperatorBase
                 raise Exception(f"Task {operator.task_type.value} has not been linked to any URLs but is designated as a link task")
         await self._handler.handle_outcome(run_info)
+        await self._registry.report_next_scheduled_task()
diff --git a/src/core/tasks/scheduled/registry/core.py b/src/core/tasks/scheduled/registry/core.py
index a1928504..a622346c 100644
--- a/src/core/tasks/scheduled/registry/core.py
+++ b/src/core/tasks/scheduled/registry/core.py
@@ -6,6 +6,7 @@
 from apscheduler.triggers.interval import IntervalTrigger
 
 from src.core.tasks.scheduled.models.entry import ScheduledTaskEntry
+from src.core.tasks.scheduled.registry.format import format_job_datetime
 from src.db.enums import TaskType
 
 
@@ -29,8 +30,9 @@ async def add_job(
         Modifies:
             self._jobs
         """
-        self._jobs[entry.operator.task_type] = self.scheduler.add_job(
-            func,
+        job: Job = self.scheduler.add_job(
+            id=entry.operator.task_type.value,
+            func=func,
             trigger=IntervalTrigger(
                 minutes=entry.interval.value,
                 start_date=datetime.now() + timedelta(minutes=minute_lag)
@@ -38,6 +40,10 @@ async def add_job(
             misfire_grace_time=60,
             kwargs={"operator": entry.operator}
         )
+        run_time_str: str = format_job_datetime(job.next_run_time)
+        print(f"Adding {job.id} task to scheduler. " +
+              f"First run at {run_time_str}")
+        self._jobs[entry.operator.task_type] = job
 
     def start_scheduler(self) -> None:
         """
@@ -48,4 +54,16 @@ def start_scheduler(self) -> None:
 
     def shutdown_scheduler(self) -> None:
         if self.scheduler.running:
-            self.scheduler.shutdown()
\ No newline at end of file
+            self.scheduler.shutdown()
+
+    async def report_next_scheduled_task(self):
+        jobs: list[Job] = self.scheduler.get_jobs()
+        if len(jobs) == 0:
+            print("No scheduled tasks found.")
+            return
+
+        jobs_sorted: list[Job] = sorted(jobs, key=lambda job: job.next_run_time)
+        next_job: Job = jobs_sorted[0]
+
+        run_time_str: str = format_job_datetime(next_job.next_run_time)
+        print(f"Next scheduled task: {run_time_str} ({next_job.id})")
\ No newline at end of file
diff --git a/src/core/tasks/scheduled/registry/format.py b/src/core/tasks/scheduled/registry/format.py
new file mode 100644
index 00000000..23eea364
--- /dev/null
+++ b/src/core/tasks/scheduled/registry/format.py
@@ -0,0 +1,7 @@
+from datetime import datetime
+
+def format_job_datetime(dt: datetime) -> str:
+    date_str: str = dt.strftime("%Y-%m-%d")
+    format_24: str = dt.strftime("%H:%M:%S")
+    format_12: str = dt.strftime("%I:%M:%S %p")
+    return f"{date_str} {format_24} ({format_12})"
\ No newline at end of file
diff --git a/src/core/tasks/url/loader.py b/src/core/tasks/url/loader.py
index 45f750af..600ea1d2 100644
--- a/src/core/tasks/url/loader.py
+++ b/src/core/tasks/url/loader.py
@@ -7,6 +7,8 @@
 from src.collectors.impl.muckrock.api_interface.core import MuckrockAPIInterface
 from src.core.tasks.url.models.entry import URLTaskEntry
 from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.core import \
+    NLPProcessor
 from src.core.tasks.url.operators.agency_identification.subtasks.loader import AgencyIdentificationSubtaskLoader
 from src.core.tasks.url.operators.auto_relevant.core import URLAutoRelevantTaskOperator
 from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator
@@ -20,7 +22,6 @@
 from src.core.tasks.url.operators.submit_approved.core import SubmitApprovedURLTaskOperator
 from src.db.client.async_ import AsyncDatabaseClient
 from src.external.huggingface.inference.client import HuggingFaceInferenceClient
-from src.external.internet_archives.client import InternetArchivesClient
 from src.external.pdap.client import PDAPClient
 from src.external.url_request.core import URLRequestInterface
 
@@ -35,11 +36,13 @@ def __init__(
             pdap_client: PDAPClient,
             muckrock_api_interface: MuckrockAPIInterface,
             hf_inference_client: HuggingFaceInferenceClient,
+            nlp_processor: NLPProcessor
     ):
         # Dependencies
         self.adb_client = adb_client
         self.url_request_interface = url_request_interface
         self.html_parser = html_parser
+        self.nlp_processor = nlp_processor
         self.env = Env()
 
         # External clients and interfaces
@@ -79,7 +82,9 @@ async def _get_agency_identification_task_operator(self) -> URLTaskEntry:
             adb_client=self.adb_client,
             loader=AgencyIdentificationSubtaskLoader(
                 pdap_client=self.pdap_client,
-                muckrock_api_interface=self.muckrock_api_interface
+                muckrock_api_interface=self.muckrock_api_interface,
+                adb_client=self.adb_client,
+                nlp_processor=self.nlp_processor
             )
         )
         return URLTaskEntry(
diff --git a/src/core/tasks/url/manager.py b/src/core/tasks/url/manager.py
index 399da5b0..7fc6b4e3 100644
--- a/src/core/tasks/url/manager.py
+++ b/src/core/tasks/url/manager.py
@@ -56,8 +56,7 @@ async def _run_task(self, entry: URLTaskEntry) -> None:
                 print(message)
                 await self.handler.post_to_discord(message=message)
                 break
-            task_id = await self.handler.initiate_task_in_db(task_type=operator.task_type)
-            run_info: TaskOperatorRunInfo = await operator.run_task(task_id)
+            run_info: TaskOperatorRunInfo = await operator.run_task()
             await self.conclude_task(run_info)
             if run_info.outcome == TaskOperatorOutcome.ERROR:
                 break
diff --git a/src/core/tasks/url/operators/agency_identification/core.py b/src/core/tasks/url/operators/agency_identification/core.py
index 8ac1f632..92ece84e 100644
--- a/src/core/tasks/url/operators/agency_identification/core.py
+++ b/src/core/tasks/url/operators/agency_identification/core.py
@@ -1,17 +1,21 @@
-from src.collectors.enums import CollectorType
-from src.core.enums import SuggestionType
-from src.core.tasks.url.operators.agency_identification.dtos.output import GetAgencySuggestionsOutput
-from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
-from src.core.tasks.url.operators.agency_identification.dtos.tdo import AgencyIdentificationTDO
-from src.core.tasks.url.operators.agency_identification.subtasks.impl.base import AgencyIdentificationSubtaskBase
+from src.core.tasks.mixins.link_urls import LinkURLsMixin
+from src.core.tasks.url.operators.agency_identification.exceptions import SubtaskError
+from src.core.tasks.url.operators.agency_identification.subtasks.flags.core import SubtaskFlagger
 from src.core.tasks.url.operators.agency_identification.subtasks.loader import AgencyIdentificationSubtaskLoader
+from src.core.tasks.url.operators.agency_identification.subtasks.models.run_info import AgencyIDSubtaskRunInfo
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.core import \
+    AgencyIDSubtaskSurveyQueryBuilder
+from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import AgencyIDSubtaskOperatorBase
 from src.core.tasks.url.operators.base import URLTaskOperatorBase
 from src.db.client.async_ import AsyncDatabaseClient
 from src.db.enums import TaskType
-from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
 
 
-class AgencyIdentificationTaskOperator(URLTaskOperatorBase):
+class AgencyIdentificationTaskOperator(
+    URLTaskOperatorBase,
+    LinkURLsMixin
+):
 
     def __init__(
             self,
@@ -20,90 +24,51 @@ def __init__(
     ):
         super().__init__(adb_client)
         self.loader = loader
+        self._subtask: AutoAgencyIDSubtaskType | None = None
 
     @property
     def task_type(self) -> TaskType:
         return TaskType.AGENCY_IDENTIFICATION
 
     async def meets_task_prerequisites(self) -> bool:
-        has_urls_without_agency_suggestions = await self.adb_client.has_urls_without_agency_suggestions()
-        return has_urls_without_agency_suggestions
+        """
+        Modifies:
+        - self._subtask
+        """
+        flagger = SubtaskFlagger()
+        allowed_subtasks: list[AutoAgencyIDSubtaskType] = flagger.get_allowed_subtasks()
+
+        next_subtask: AutoAgencyIDSubtaskType | None = \
+            await self.adb_client.run_query_builder(
+                AgencyIDSubtaskSurveyQueryBuilder(
+                    allowed_subtasks=allowed_subtasks
+                )
+            )
+        self._subtask = next_subtask
+        if next_subtask is None:
+            return False
+        return True
 
-    async def get_pending_urls_without_agency_identification(self) -> list[AgencyIdentificationTDO]:
-        return await self.adb_client.get_urls_without_agency_suggestions()
 
-    async def get_subtask(
+    async def load_subtask(
         self,
-        collector_type: CollectorType
-    ) -> AgencyIdentificationSubtaskBase:
+        subtask_type: AutoAgencyIDSubtaskType
+    ) -> AgencyIDSubtaskOperatorBase:
         """Get subtask based on collector type."""
-        return await self.loader.load_subtask(collector_type)
+        return await self.loader.load_subtask(subtask_type, task_id=self.task_id)
 
     @staticmethod
     async def run_subtask(
-        subtask: AgencyIdentificationSubtaskBase,
-        url_id: int,
-        collector_metadata: dict | None
-    ) -> list[URLAgencySuggestionInfo]:
-        return await subtask.run(
-            url_id=url_id,
-            collector_metadata=collector_metadata
-        )
+        subtask_operator: AgencyIDSubtaskOperatorBase,
+    ) -> AgencyIDSubtaskRunInfo:
+        return await subtask_operator.run()
 
     async def inner_task_logic(self) -> None:
-        tdos: list[AgencyIdentificationTDO] = await self.get_pending_urls_without_agency_identification()
-        await self.link_urls_to_task(url_ids=[tdo.url_id for tdo in tdos])
-        output = await self._get_agency_suggestions(tdos)
-
-        await self._process_agency_suggestions(output.agency_suggestions)
-        await self.adb_client.add_url_error_infos(output.error_infos)
-
-    async def _process_agency_suggestions(
-        self,
-        suggestions: list[URLAgencySuggestionInfo]
-    ) -> None:
-        non_unknown_agency_suggestions = [
-            suggestion for suggestion in suggestions
-            if suggestion.suggestion_type != SuggestionType.UNKNOWN
-        ]
-        await self.adb_client.upsert_new_agencies(non_unknown_agency_suggestions)
-        confirmed_suggestions = [
-            suggestion for suggestion in suggestions
-            if suggestion.suggestion_type == SuggestionType.CONFIRMED
-        ]
-        await self.adb_client.add_confirmed_agency_url_links(confirmed_suggestions)
-        non_confirmed_suggestions = [
-            suggestion for suggestion in suggestions
-            if suggestion.suggestion_type != SuggestionType.CONFIRMED
-        ]
-        await self.adb_client.add_agency_auto_suggestions(non_confirmed_suggestions)
-
-    async def _get_agency_suggestions(
-        self,
-        tdos: list[AgencyIdentificationTDO]
-    ) -> GetAgencySuggestionsOutput:
-        error_infos = []
-        all_agency_suggestions = []
-        for tdo in tdos:
-            subtask = await self.get_subtask(tdo.collector_type)
-            try:
-                new_agency_suggestions = await self.run_subtask(
-                    subtask,
-                    tdo.url_id,
-                    tdo.collector_metadata
-                )
-                all_agency_suggestions.extend(new_agency_suggestions)
-            except Exception as e:
-                error_info = URLErrorPydanticInfo(
-                    task_id=self.task_id,
-                    url_id=tdo.url_id,
-                    error=str(e),
-                )
-                error_infos.append(error_info)
-        output = GetAgencySuggestionsOutput(
-            agency_suggestions=all_agency_suggestions,
-            error_infos=error_infos
-        )
-        return output
+        subtask_operator: AgencyIDSubtaskOperatorBase = await self.load_subtask(self._subtask)
+        print(f"Running Subtask: {self._subtask.value}")
+        run_info: AgencyIDSubtaskRunInfo = await self.run_subtask(subtask_operator)
+        await self.link_urls_to_task(run_info.linked_url_ids)
+        if not run_info.is_success:
+            raise SubtaskError(run_info.error)
 
 
diff --git a/src/core/tasks/url/operators/agency_identification/dtos/output.py b/src/core/tasks/url/operators/agency_identification/dtos/output.py
deleted file mode 100644
index d7381129..00000000
--- a/src/core/tasks/url/operators/agency_identification/dtos/output.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from pydantic import BaseModel
-
-from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
-from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo
-
-
-class GetAgencySuggestionsOutput(BaseModel):
-    error_infos: list[URLErrorPydanticInfo]
-    agency_suggestions: list[URLAgencySuggestionInfo]
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/dtos/tdo.py b/src/core/tasks/url/operators/agency_identification/dtos/tdo.py
deleted file mode 100644
index 72f24d97..00000000
--- a/src/core/tasks/url/operators/agency_identification/dtos/tdo.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from typing import Optional
-
-from pydantic import BaseModel
-
-from src.collectors.enums import CollectorType
-
-
-class AgencyIdentificationTDO(BaseModel):
-    url_id: int
-    collector_metadata: dict | None = None
-    collector_type: CollectorType | None
diff --git a/src/core/tasks/url/operators/agency_identification/exceptions.py b/src/core/tasks/url/operators/agency_identification/exceptions.py
new file mode 100644
index 00000000..709189e3
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/exceptions.py
@@ -0,0 +1,4 @@
+
+
+class SubtaskError(Exception):
+    pass
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/queries/get_pending_urls_without_agency_suggestions.py b/src/core/tasks/url/operators/agency_identification/queries/get_pending_urls_without_agency_suggestions.py
deleted file mode 100644
index 5eeb4355..00000000
--- a/src/core/tasks/url/operators/agency_identification/queries/get_pending_urls_without_agency_suggestions.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from src.collectors.enums import URLStatus, CollectorType
-from src.core.tasks.url.operators.agency_identification.dtos.tdo import AgencyIdentificationTDO
-from src.db.models.impl.batch.sqlalchemy import Batch
-from src.db.models.impl.link.batch_url import LinkBatchURL
-from src.db.models.impl.url.core.sqlalchemy import URL
-from src.db.queries.base.builder import QueryBuilderBase
-from src.db.statement_composer import StatementComposer
-
-
-class GetPendingURLsWithoutAgencySuggestionsQueryBuilder(QueryBuilderBase):
-
-    async def run(self, session: AsyncSession) -> list[AgencyIdentificationTDO]:
-
-        statement = (
-            select(
-                URL.id,
-                URL.collector_metadata,
-                Batch.strategy
-            )
-            .select_from(URL)
-            .where(URL.status == URLStatus.PENDING.value)
-            .outerjoin(LinkBatchURL)
-            .outerjoin(Batch)
-        )
-        statement = StatementComposer.exclude_urls_with_agency_suggestions(statement)
-        statement = statement.limit(100)
-        raw_results = await session.execute(statement)
-        return [
-            AgencyIdentificationTDO(
-                url_id=raw_result[0],
-                collector_metadata=raw_result[1],
-                collector_type=CollectorType(raw_result[2]) if raw_result[2] is not None else None
-            )
-            for raw_result in raw_results
-        ]
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/queries/has_urls_without_agency_suggestions.py b/src/core/tasks/url/operators/agency_identification/queries/has_urls_without_agency_suggestions.py
deleted file mode 100644
index e8a0e8ce..00000000
--- a/src/core/tasks/url/operators/agency_identification/queries/has_urls_without_agency_suggestions.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from src.collectors.enums import URLStatus
-from src.db.models.impl.url.core.sqlalchemy import URL
-from src.db.queries.base.builder import QueryBuilderBase
-from src.db.statement_composer import StatementComposer
-
-
-class HasURLsWithoutAgencySuggestionsQueryBuilder(QueryBuilderBase):
-
-    async def run(
-        self,
-        session: AsyncSession
-    ) -> bool:
-        statement = (
-            select(
-                URL.id
-            ).where(
-                URL.status == URLStatus.PENDING.value
-            )
-        )
-
-        statement = StatementComposer.exclude_urls_with_agency_suggestions(statement)
-        raw_result = await session.execute(statement)
-        result = raw_result.all()
-        return len(result) != 0
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/convert.py b/src/core/tasks/url/operators/agency_identification/subtasks/convert.py
new file mode 100644
index 00000000..95c9e704
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/convert.py
@@ -0,0 +1,54 @@
+from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData
+from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic
+from src.external.pdap.dtos.match_agency.post import MatchAgencyInfo
+from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse
+from src.external.pdap.enums import MatchAgencyResponseStatus
+
+def convert_match_agency_response_to_subtask_data(
+    url_id: int,
+    response: MatchAgencyResponse,
+    subtask_type: AutoAgencyIDSubtaskType,
+    task_id: int
+):
+    suggestions: list[AgencySuggestion] = \
+        _convert_match_agency_response_to_suggestions(
+            response
+        )
+    agencies_found: bool = len(suggestions) > 0
+    subtask_pydantic = URLAutoAgencyIDSubtaskPydantic(
+        url_id=url_id,
+        type=subtask_type,
+        agencies_found=agencies_found,
+        task_id=task_id
+    )
+    return AutoAgencyIDSubtaskData(
+        pydantic_model=subtask_pydantic,
+        suggestions=suggestions
+    )
+
+def _convert_match_agency_response_to_suggestions(
+    match_response: MatchAgencyResponse,
+) -> list[AgencySuggestion]:
+    if match_response.status == MatchAgencyResponseStatus.EXACT_MATCH:
+        match_info: MatchAgencyInfo = match_response.matches[0]
+        return [
+            AgencySuggestion(
+                agency_id=int(match_info.id),
+                confidence=100
+            )
+        ]
+    if match_response.status == MatchAgencyResponseStatus.NO_MATCH:
+        return []
+    if match_response.status != MatchAgencyResponseStatus.PARTIAL_MATCH:
+        raise ValueError(f"Unknown Match Agency Response Status: {match_response.status}")
+    total_confidence: int = 100
+    confidence_per_match: int = total_confidence // len(match_response.matches)
+    return [
+        AgencySuggestion(
+            agency_id=int(match_info.id),
+            confidence=confidence_per_match
+        )
+        for match_info in match_response.matches
+    ]
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/flags/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/flags/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/flags/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/flags/core.py
new file mode 100644
index 00000000..41997322
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/flags/core.py
@@ -0,0 +1,26 @@
+
+from environs import Env
+
+from src.core.tasks.url.operators.agency_identification.subtasks.flags.mappings import SUBTASK_TO_ENV_FLAG
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+
+
+class SubtaskFlagger:
+    """
+    Manages flags allowing and disallowing subtasks
+    """
+    def __init__(self):
+        self.env = Env()
+
+    def _get_subtask_flag(self, subtask_type: AutoAgencyIDSubtaskType) -> bool:
+        return self.env.bool(
+            SUBTASK_TO_ENV_FLAG[subtask_type],
+            default=True
+        )
+
+    def get_allowed_subtasks(self) -> list[AutoAgencyIDSubtaskType]:
+        return [
+            subtask_type
+            for subtask_type, flag in SUBTASK_TO_ENV_FLAG.items()
+            if self._get_subtask_flag(subtask_type)
+        ]
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/flags/mappings.py b/src/core/tasks/url/operators/agency_identification/subtasks/flags/mappings.py
new file mode 100644
index 00000000..d6997423
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/flags/mappings.py
@@ -0,0 +1,8 @@
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+
+SUBTASK_TO_ENV_FLAG: dict[AutoAgencyIDSubtaskType, str] = {
+    AutoAgencyIDSubtaskType.HOMEPAGE_MATCH: "AGENCY_ID_HOMEPAGE_MATCH_FLAG",
+    AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH: "AGENCY_ID_NLP_LOCATION_MATCH_FLAG",
+    AutoAgencyIDSubtaskType.CKAN: "AGENCY_ID_CKAN_FLAG",
+    AutoAgencyIDSubtaskType.MUCKROCK: "AGENCY_ID_MUCKROCK_FLAG"
+}
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/base.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/base.py
deleted file mode 100644
index 96f98f30..00000000
--- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/base.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import abc
-from abc import ABC
-from typing import Optional
-
-from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
-
-
-class AgencyIdentificationSubtaskBase(ABC):
-
-    @abc.abstractmethod
-    async def run(
-            self,
-            url_id: int,
-            collector_metadata: dict | None = None
-    ) -> list[URLAgencySuggestionInfo]:
-        raise NotImplementedError
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan.py
deleted file mode 100644
index 15dddf6f..00000000
--- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from typing import final
-
-from typing_extensions import override
-
-from src.core.helpers import process_match_agency_response_to_suggestions
-from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
-from src.core.tasks.url.operators.agency_identification.subtasks.impl.base import AgencyIdentificationSubtaskBase
-from src.external.pdap.client import PDAPClient
-from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse
-
-@final
-class CKANAgencyIdentificationSubtask(AgencyIdentificationSubtaskBase):
-
-    def __init__(
-            self,
-            pdap_client: PDAPClient
-    ):
-        self.pdap_client = pdap_client
-
-    @override
-    async def run(
-            self,
-            url_id: int,
-            collector_metadata: dict | None = None
-    ) -> list[URLAgencySuggestionInfo]:
-        agency_name = collector_metadata["agency_name"]
-        match_agency_response: MatchAgencyResponse = await self.pdap_client.match_agency(
-            name=agency_name
-        )
-        return process_match_agency_response_to_suggestions(
-            url_id=url_id,
-            match_agency_response=match_agency_response
-        )
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/core.py
new file mode 100644
index 00000000..d1af5391
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/core.py
@@ -0,0 +1,54 @@
+from typing import final
+
+from typing_extensions import override
+
+from src.core.tasks.url.operators.agency_identification.subtasks.convert import \
+    convert_match_agency_response_to_subtask_data
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.ckan_.params import CKANAgencyIDSubtaskParams
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.ckan_.query import \
+    GetCKANAgencyIDSubtaskParamsQueryBuilder
+from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData
+from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import \
+    AgencyIDSubtaskOperatorBase
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+from src.external.pdap.client import PDAPClient
+from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse
+
+
+@final
+class CKANAgencyIDSubtaskOperator(AgencyIDSubtaskOperatorBase):
+
+    def __init__(
+            self,
+            adb_client: AsyncDatabaseClient,
+            task_id: int,
+            pdap_client: PDAPClient
+    ):
+        super().__init__(adb_client, task_id=task_id)
+        self.pdap_client = pdap_client
+
+    @override
+    async def inner_logic(self) -> None:
+        params: list[CKANAgencyIDSubtaskParams] = await self._get_params()
+        self.linked_urls = [param.url_id for param in params]
+        subtask_data_list: list[AutoAgencyIDSubtaskData] = []
+        for param in params:
+            agency_name: str = param.collector_metadata["agency_name"]
+            response: MatchAgencyResponse = await self.pdap_client.match_agency(
+                name=agency_name
+            )
+            subtask_data: AutoAgencyIDSubtaskData = convert_match_agency_response_to_subtask_data(
+                url_id=param.url_id,
+                response=response,
+                subtask_type=AutoAgencyIDSubtaskType.CKAN,
+                task_id=self.task_id
+            )
+            subtask_data_list.append(subtask_data)
+
+        await self._upload_subtask_data(subtask_data_list)
+
+    async def _get_params(self) -> list[CKANAgencyIDSubtaskParams]:
+        return await self.adb_client.run_query_builder(
+            GetCKANAgencyIDSubtaskParamsQueryBuilder()
+        )
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/params.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/params.py
new file mode 100644
index 00000000..ce4b7ce1
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/params.py
@@ -0,0 +1,6 @@
+from pydantic import BaseModel
+
+
+class CKANAgencyIDSubtaskParams(BaseModel):
+    url_id: int
+    collector_metadata: dict
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/query.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/query.py
new file mode 100644
index 00000000..90e965e7
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/query.py
@@ -0,0 +1,46 @@
+from typing import Sequence
+
+from sqlalchemy import select, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.collectors.enums import CollectorType
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.ckan_.params import CKANAgencyIDSubtaskParams
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.eligible import \
+    EligibleContainer
+from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class GetCKANAgencyIDSubtaskParamsQueryBuilder(QueryBuilderBase):
+
+    async def run(
+        self,
+        session: AsyncSession
+    ) -> list[CKANAgencyIDSubtaskParams]:
+        container = EligibleContainer()
+        query = (
+            select(
+                container.url_id,
+                URL.collector_metadata
+            )
+            .join(
+                URL,
+                URL.id == container.url_id,
+            )
+            .where(
+                container.ckan,
+            )
+            .limit(500)
+        )
+
+        results: Sequence[RowMapping] = await sh.mappings(session, query=query)
+        return [
+            CKANAgencyIDSubtaskParams(
+                url_id=mapping["id"],
+                collector_metadata=mapping["collector_metadata"],
+            )
+            for mapping in results
+        ]
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/convert.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/convert.py
new file mode 100644
index 00000000..f4ba913e
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/convert.py
@@ -0,0 +1,47 @@
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.models.entry import \
+    GetHomepageMatchParams
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.models.mapping import \
+    SubtaskURLMapping
+from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode, AutoAgencyIDSubtaskType
+from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic
+from src.db.models.impl.url.suggestion.agency.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic
+
+
+def convert_params_to_subtask_entries(
+    params: list[GetHomepageMatchParams],
+    task_id: int
+) -> list[URLAutoAgencyIDSubtaskPydantic]:
+    url_id_to_detail_code: dict[int, SubtaskDetailCode] = {}
+    for param in params:
+        url_id_to_detail_code[param.url_id] = param.detail_code
+
+    results: list[URLAutoAgencyIDSubtaskPydantic] = []
+    for url_id, detail_code in url_id_to_detail_code.items():
+        result = URLAutoAgencyIDSubtaskPydantic(
+            task_id=task_id,
+            url_id=url_id,
+            type=AutoAgencyIDSubtaskType.HOMEPAGE_MATCH,
+            agencies_found=True,
+            detail=detail_code,
+        )
+        results.append(result)
+    return results
+
+def convert_subtask_mappings_and_params_to_suggestions(
+    mappings: list[SubtaskURLMapping],
+    params: list[GetHomepageMatchParams]
+) -> list[AgencyIDSubtaskSuggestionPydantic]:
+    url_id_to_subtask_id: dict[int, int] = {
+        mapping.url_id: mapping.subtask_id
+        for mapping in mappings
+    }
+    suggestions: list[AgencyIDSubtaskSuggestionPydantic] = []
+    for param in params:
+        subtask_id = url_id_to_subtask_id.get(param.url_id)
+        suggestion = AgencyIDSubtaskSuggestionPydantic(
+            subtask_id=subtask_id,
+            agency_id=param.agency_id,
+            confidence=param.confidence,
+        )
+        suggestions.append(suggestion)
+    return suggestions
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/core.py
new file mode 100644
index 00000000..f335cb3a
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/core.py
@@ -0,0 +1,63 @@
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.convert import \
+    convert_params_to_subtask_entries, convert_subtask_mappings_and_params_to_suggestions
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.models.entry import \
+    GetHomepageMatchParams
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.models.mapping import \
+    SubtaskURLMapping
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.get import \
+    GetHomepageMatchSubtaskURLsQueryBuilder
+from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import AgencyIDSubtaskOperatorBase
+from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic
+from src.db.models.impl.url.suggestion.agency.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic
+
+
+class HomepageMatchSubtaskOperator(
+    AgencyIDSubtaskOperatorBase,
+):
+
+    async def inner_logic(self) -> None:
+        # Get Params
+        params: list[GetHomepageMatchParams] = \
+            await self.adb_client.run_query_builder(
+                GetHomepageMatchSubtaskURLsQueryBuilder()
+            )
+
+        # Insert Subtask Entries
+        subtask_entries: list[URLAutoAgencyIDSubtaskPydantic] = convert_params_to_subtask_entries(
+            params=params,
+            task_id=self.task_id
+        )
+        subtask_mappings: list[SubtaskURLMapping] = await self.insert_subtask_entries(
+            entries=subtask_entries
+        )
+
+        # Link URLs
+        url_ids: list[int] = [mapping.url_id for mapping in subtask_mappings]
+        self.linked_urls = url_ids
+
+        # Insert Entries
+        suggestions: list[AgencyIDSubtaskSuggestionPydantic] = convert_subtask_mappings_and_params_to_suggestions(
+            mappings=subtask_mappings,
+            params=params
+        )
+        await self.adb_client.bulk_insert(
+            models=suggestions,
+        )
+
+
+    async def insert_subtask_entries(
+        self,
+        entries: list[URLAutoAgencyIDSubtaskPydantic]
+    ) -> list[SubtaskURLMapping]:
+        subtask_ids: list[int] = await self.adb_client.bulk_insert(
+            models=entries,
+            return_ids=True
+        )
+        mappings: list[SubtaskURLMapping] = []
+        for subtask_id, entry in zip(subtask_ids, entries):
+            mapping = SubtaskURLMapping(
+                url_id=entry.url_id,
+                subtask_id=subtask_id,
+            )
+            mappings.append(mapping)
+        return mappings
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/entry.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/entry.py
new file mode 100644
index 00000000..6c65f9ad
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/entry.py
@@ -0,0 +1,10 @@
+from pydantic import BaseModel, Field
+
+from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode
+
+
+class GetHomepageMatchParams(BaseModel):
+    url_id: int
+    agency_id: int
+    confidence: int = Field(..., ge=0, le=100)
+    detail_code: SubtaskDetailCode
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/mapping.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/mapping.py
new file mode 100644
index 00000000..2e4d2fbb
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/mapping.py
@@ -0,0 +1,6 @@
+from pydantic import BaseModel
+
+
+class SubtaskURLMapping(BaseModel):
+    url_id: int
+    subtask_id: int
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/consolidated.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/consolidated.py
new file mode 100644
index 00000000..d90dfed6
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/consolidated.py
@@ -0,0 +1,28 @@
+from sqlalchemy import CTE, select
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.count_agency_per_url import \
+    COUNT_AGENCY_PER_URL_CTE
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.meta_urls_with_root_agencies import \
+    META_ROOT_URLS_WITH_AGENCIES
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.unvalidated_urls_with_root import \
+    UNVALIDATED_URLS_WITH_ROOT
+
+CONSOLIDATED_CTE: CTE = (
+    select(
+        UNVALIDATED_URLS_WITH_ROOT.c.url_id,
+        META_ROOT_URLS_WITH_AGENCIES.c.agency_id,
+        COUNT_AGENCY_PER_URL_CTE.c.agency_count,
+    )
+    .join(
+        COUNT_AGENCY_PER_URL_CTE,
+        COUNT_AGENCY_PER_URL_CTE.c.root_url_id == UNVALIDATED_URLS_WITH_ROOT.c.root_url_id
+    )
+    .join(
+        META_ROOT_URLS_WITH_AGENCIES,
+        META_ROOT_URLS_WITH_AGENCIES.c.root_url_id == UNVALIDATED_URLS_WITH_ROOT.c.root_url_id
+    )
+    .where(
+        COUNT_AGENCY_PER_URL_CTE.c.agency_count >= 1
+    )
+    .cte("consolidated")
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/count_agency_per_url.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/count_agency_per_url.py
new file mode 100644
index 00000000..774787b7
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/count_agency_per_url.py
@@ -0,0 +1,20 @@
+from sqlalchemy import CTE, func, select
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.meta_urls_with_root import \
+    META_ROOT_URLS_CTE
+from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
+
+COUNT_AGENCY_PER_URL_CTE: CTE = (
+    select(
+        META_ROOT_URLS_CTE.c.root_url_id,
+        func.count(LinkURLAgency.agency_id).label("agency_count")
+    )
+    .join(
+        LinkURLAgency,
+        META_ROOT_URLS_CTE.c.meta_url_id == LinkURLAgency.url_id
+    )
+    .group_by(
+        META_ROOT_URLS_CTE.c.root_url_id
+    )
+    .cte("count_agency_per_url")
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/meta_urls_with_root.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/meta_urls_with_root.py
new file mode 100644
index 00000000..63b6b417
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/meta_urls_with_root.py
@@ -0,0 +1,23 @@
+from sqlalchemy import CTE, select
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.whitelisted_root_urls import \
+    WHITELISTED_ROOT_URLS_CTE
+from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL
+from src.db.models.views.meta_url import MetaURL
+
+META_ROOT_URLS_CTE: CTE = (
+    select(
+        MetaURL.url_id.label("meta_url_id"),
+        LinkURLRootURL.root_url_id
+    )
+    .join(
+        LinkURLRootURL,
+        MetaURL.url_id == LinkURLRootURL.url_id
+    )
+    # Must be a Whitelisted Root URL
+    .join(
+        WHITELISTED_ROOT_URLS_CTE,
+        WHITELISTED_ROOT_URLS_CTE.c.id == LinkURLRootURL.root_url_id
+    )
+    .cte("meta_root_urls")
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/meta_urls_with_root_agencies.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/meta_urls_with_root_agencies.py
new file mode 100644
index 00000000..86b14ee4
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/meta_urls_with_root_agencies.py
@@ -0,0 +1,20 @@
+from sqlalchemy import CTE, select
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.meta_urls_with_root import \
+    META_ROOT_URLS_CTE
+from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
+
+META_ROOT_URLS_WITH_AGENCIES: CTE = (
+    select(
+        META_ROOT_URLS_CTE.c.meta_url_id,
+        META_ROOT_URLS_CTE.c.root_url_id,
+        LinkURLAgency.agency_id
+    )
+    .join(
+        LinkURLAgency,
+        META_ROOT_URLS_CTE.c.meta_url_id == LinkURLAgency.url_id
+    )
+    .cte(
+        "meta_root_urls_with_agencies"
+    )
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/multi_agency_case.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/multi_agency_case.py
new file mode 100644
index 00000000..edf9e601
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/multi_agency_case.py
@@ -0,0 +1,17 @@
+from sqlalchemy import CTE, select, literal
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.consolidated import \
+    CONSOLIDATED_CTE
+from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode
+
+MULTI_AGENCY_CASE_QUERY = (
+    select(
+        CONSOLIDATED_CTE.c.url_id,
+        CONSOLIDATED_CTE.c.agency_id,
+        (literal(100) / CONSOLIDATED_CTE.c.agency_count).label("confidence"),
+        literal(SubtaskDetailCode.HOMEPAGE_MULTI_AGENCY.value).label("detail_code")
+    )
+    .where(
+        CONSOLIDATED_CTE.c.agency_count > 1
+    )
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/single_agency_case.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/single_agency_case.py
new file mode 100644
index 00000000..5778ecb6
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/single_agency_case.py
@@ -0,0 +1,17 @@
+from sqlalchemy import select, CTE, literal
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.consolidated import \
+    CONSOLIDATED_CTE
+from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode
+
+SINGLE_AGENCY_CASE_QUERY = (
+    select(
+        CONSOLIDATED_CTE.c.url_id,
+        CONSOLIDATED_CTE.c.agency_id,
+        literal(95).label("confidence"),
+        literal(SubtaskDetailCode.HOMEPAGE_SINGLE_AGENCY.value).label("detail_code")
+    )
+    .where(
+        CONSOLIDATED_CTE.c.agency_count == 1
+    )
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/unvalidated_urls_with_root.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/unvalidated_urls_with_root.py
new file mode 100644
index 00000000..46702833
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/unvalidated_urls_with_root.py
@@ -0,0 +1,22 @@
+from sqlalchemy import CTE, select
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.whitelisted_root_urls import \
+    WHITELISTED_ROOT_URLS_CTE
+from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL
+from src.db.models.views.unvalidated_url import UnvalidatedURL
+
+UNVALIDATED_URLS_WITH_ROOT: CTE = (
+    select(
+        UnvalidatedURL.url_id,
+        LinkURLRootURL.root_url_id
+    )
+    .join(
+        LinkURLRootURL,
+        UnvalidatedURL.url_id == LinkURLRootURL.url_id
+    )
+    .join(
+        WHITELISTED_ROOT_URLS_CTE,
+        WHITELISTED_ROOT_URLS_CTE.c.id == LinkURLRootURL.root_url_id
+    )
+    .cte("unvalidated_urls_with_root")
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/whitelisted_root_urls.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/whitelisted_root_urls.py
new file mode 100644
index 00000000..1af8f46c
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/whitelisted_root_urls.py
@@ -0,0 +1,47 @@
+from sqlalchemy import CTE, select, func
+
+from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
+from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL
+from src.db.models.impl.url.core.sqlalchemy import URL
+
+WHITELISTED_ROOT_URLS_CTE: CTE = (
+    select(
+        URL.id
+    )
+    .join(
+        FlagRootURL,
+        URL.id == FlagRootURL.url_id
+    )
+    # Must be linked to other URLs
+    .join(
+        LinkURLRootURL,
+        URL.id == LinkURLRootURL.root_url_id
+    )
+    # Those URLs must be meta URLS
+    .join(
+        FlagURLValidated,
+        FlagURLValidated.url_id == LinkURLRootURL.url_id
+    )
+    # Get the Agency URLs for those URLs
+    .join(
+        LinkURLAgency,
+        LinkURLAgency.url_id == LinkURLRootURL.url_id
+    )
+    .where(
+        # The connected URLs must be Meta URLs
+        FlagURLValidated.type == URLValidatedType.META_URL,
+        # Root URL can't be "https://catalog.data.gov"
+        URL.url != "https://catalog.data.gov"
+    )
+    .group_by(
+        URL.id
+    )
+    # Must have no more than two agencies connected
+    .having(
+        func.count(LinkURLAgency.agency_id) <= 2
+    )
+    .cte("whitelisted_root_urls")
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/get.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/get.py
new file mode 100644
index 00000000..10619531
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/get.py
@@ -0,0 +1,35 @@
+from typing import Sequence
+
+from sqlalchemy import Select, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.models.entry import \
+    GetHomepageMatchParams
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.multi_agency_case import \
+    MULTI_AGENCY_CASE_QUERY
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.single_agency_case import \
+    SINGLE_AGENCY_CASE_QUERY
+from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class GetHomepageMatchSubtaskURLsQueryBuilder(QueryBuilderBase):
+
+    async def run(self, session: AsyncSession) -> list[GetHomepageMatchParams]:
+
+        query: Select = SINGLE_AGENCY_CASE_QUERY.union(MULTI_AGENCY_CASE_QUERY)
+
+        mappings: Sequence[RowMapping] = await sh.mappings(session, query=query)
+
+        results: list[GetHomepageMatchParams] = []
+        for mapping in mappings:
+            response = GetHomepageMatchParams(
+                url_id=mapping["url_id"],
+                agency_id=mapping["agency_id"],
+                confidence=mapping["confidence"],
+                detail_code=SubtaskDetailCode(mapping["detail_code"]),
+            )
+            results.append(response)
+
+        return results
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock.py
deleted file mode 100644
index 633d84ac..00000000
--- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from typing import final
-
-from typing_extensions import override
-
-from src.collectors.impl.muckrock.api_interface.core import MuckrockAPIInterface
-from src.collectors.impl.muckrock.api_interface.lookup_response import AgencyLookupResponse
-from src.collectors.impl.muckrock.enums import AgencyLookupResponseType
-from src.core.exceptions import MuckrockAPIError
-from src.core.helpers import process_match_agency_response_to_suggestions
-from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
-from src.core.tasks.url.operators.agency_identification.subtasks.impl.base import AgencyIdentificationSubtaskBase
-from src.external.pdap.client import PDAPClient
-from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse
-
-@final
-class MuckrockAgencyIdentificationSubtask(AgencyIdentificationSubtaskBase):
-
-    def __init__(
-            self,
-            muckrock_api_interface: MuckrockAPIInterface,
-            pdap_client: PDAPClient
-    ):
-        self.muckrock_api_interface = muckrock_api_interface
-        self.pdap_client = pdap_client
-
-    @override
-    async def run(
-            self,
-            url_id: int,
-            collector_metadata: dict | None = None
-    ) -> list[URLAgencySuggestionInfo]:
-        muckrock_agency_id = collector_metadata["agency"]
-        agency_lookup_response: AgencyLookupResponse = await self.muckrock_api_interface.lookup_agency(
-            muckrock_agency_id=muckrock_agency_id
-        )
-        if agency_lookup_response.type != AgencyLookupResponseType.FOUND:
-            raise MuckrockAPIError(
-                f"Failed to lookup muckrock agency: {muckrock_agency_id}:"
-                f" {agency_lookup_response.type.value}: {agency_lookup_response.error}"
-            )
-
-        match_agency_response: MatchAgencyResponse = await self.pdap_client.match_agency(
-            name=agency_lookup_response.name
-        )
-        return process_match_agency_response_to_suggestions(
-            url_id=url_id,
-            match_agency_response=match_agency_response
-        )
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/core.py
new file mode 100644
index 00000000..4fa92c2e
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/core.py
@@ -0,0 +1,93 @@
+from typing import final
+
+from typing_extensions import override
+
+from src.collectors.impl.muckrock.api_interface.core import MuckrockAPIInterface
+from src.collectors.impl.muckrock.api_interface.lookup_response import AgencyLookupResponse
+from src.collectors.impl.muckrock.enums import AgencyLookupResponseType
+from src.core.tasks.url.operators.agency_identification.subtasks.convert import \
+    convert_match_agency_response_to_subtask_data
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.muckrock_.params import \
+    MuckrockAgencyIDSubtaskParams
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.muckrock_.query import \
+    GetMuckrockAgencyIDSubtaskParamsQueryBuilder
+from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData
+from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import AgencyIDSubtaskOperatorBase
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode
+from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic
+from src.external.pdap.client import PDAPClient
+from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse
+
+
+@final
+class MuckrockAgencyIDSubtaskOperator(AgencyIDSubtaskOperatorBase):
+
+    def __init__(
+            self,
+            adb_client: AsyncDatabaseClient,
+            task_id: int,
+            muckrock_api_interface: MuckrockAPIInterface,
+            pdap_client: PDAPClient
+    ):
+        super().__init__(adb_client, task_id=task_id)
+        self.muckrock_api_interface = muckrock_api_interface
+        self.pdap_client = pdap_client
+
+    @override
+    async def inner_logic(self) -> None:
+        params: list[MuckrockAgencyIDSubtaskParams] = await self._get_params()
+        self.linked_urls = [param.url_id for param in params]
+        subtask_data_list: list[AutoAgencyIDSubtaskData] = []
+        for param in params:
+            muckrock_agency_id: int = param.collector_metadata["agency"]
+            agency_lookup_response: AgencyLookupResponse = await self.muckrock_api_interface.lookup_agency(
+                muckrock_agency_id=muckrock_agency_id
+            )
+            if agency_lookup_response.type != AgencyLookupResponseType.FOUND:
+                data: AutoAgencyIDSubtaskData = await self._error_subtask_data(
+                    url_id=param.url_id,
+                    muckrock_agency_id=muckrock_agency_id,
+                    agency_lookup_response=agency_lookup_response
+                )
+                subtask_data_list.append(data)
+                continue
+            match_agency_response: MatchAgencyResponse = await self.pdap_client.match_agency(
+                name=agency_lookup_response.name
+            )
+            subtask_data: AutoAgencyIDSubtaskData = convert_match_agency_response_to_subtask_data(
+                url_id=param.url_id,
+                response=match_agency_response,
+                subtask_type=AutoAgencyIDSubtaskType.MUCKROCK,
+                task_id=self.task_id
+            )
+            subtask_data_list.append(subtask_data)
+
+        await self._upload_subtask_data(subtask_data_list)
+
+
+    async def _error_subtask_data(
+        self,
+        url_id: int,
+        muckrock_agency_id: int,
+        agency_lookup_response: AgencyLookupResponse
+    ) -> AutoAgencyIDSubtaskData:
+        pydantic_model = URLAutoAgencyIDSubtaskPydantic(
+            task_id=self.task_id,
+            url_id=url_id,
+            type=AutoAgencyIDSubtaskType.MUCKROCK,
+            agencies_found=False,
+            detail=SubtaskDetailCode.RETRIEVAL_ERROR
+        )
+        error: str = f"Failed to lookup muckrock agency: {muckrock_agency_id}:" + \
+            f" {agency_lookup_response.type.value}: {agency_lookup_response.error}"
+        return AutoAgencyIDSubtaskData(
+            pydantic_model=pydantic_model,
+            suggestions=[],
+            error=error
+        )
+
+    async def _get_params(self) -> list[MuckrockAgencyIDSubtaskParams]:
+        return await self.adb_client.run_query_builder(
+            GetMuckrockAgencyIDSubtaskParamsQueryBuilder()
+        )
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/params.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/params.py
new file mode 100644
index 00000000..6010f022
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/params.py
@@ -0,0 +1,6 @@
+from pydantic import BaseModel
+
+
+class MuckrockAgencyIDSubtaskParams(BaseModel):
+    url_id: int
+    collector_metadata: dict
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/query.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/query.py
new file mode 100644
index 00000000..6f575b4f
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/query.py
@@ -0,0 +1,49 @@
+from typing import Sequence
+
+from sqlalchemy import select, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.collectors.enums import CollectorType
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.muckrock_.params import \
+    MuckrockAgencyIDSubtaskParams
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.eligible import \
+    EligibleContainer
+from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class GetMuckrockAgencyIDSubtaskParamsQueryBuilder(QueryBuilderBase):
+
+    async def run(
+        self,
+        session: AsyncSession
+    ) -> list[MuckrockAgencyIDSubtaskParams]:
+        container = EligibleContainer()
+
+        query = (
+            select(
+                container.url_id,
+                URL.collector_metadata
+            )
+            .join(
+                URL,
+                URL.id == container.url_id,
+            )
+            .where(
+                container.muckrock,
+            )
+            .limit(500)
+        )
+
+        results: Sequence[RowMapping] = await sh.mappings(session, query=query)
+        return [
+            MuckrockAgencyIDSubtaskParams(
+                url_id=mapping["id"],
+                collector_metadata=mapping["collector_metadata"],
+            )
+            for mapping in results
+        ]
+
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/constants.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/constants.py
new file mode 100644
index 00000000..b8b4ce4d
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/constants.py
@@ -0,0 +1,4 @@
+
+
+ITERATIONS_PER_SUBTASK = 2
+NUMBER_OF_ENTRIES_PER_ITERATION = 20
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/core.py
new file mode 100644
index 00000000..0c172e5d
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/core.py
@@ -0,0 +1,57 @@
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.constants import \
+    ITERATIONS_PER_SUBTASK
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.models.input import \
+    NLPLocationMatchSubtaskInput
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.core import \
+    AgencyIDSubtaskInternalProcessor
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.core import \
+    NLPProcessor
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.query import \
+    GetNLPLocationMatchSubtaskInputQueryBuilder
+from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData
+from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import AgencyIDSubtaskOperatorBase
+from src.db.client.async_ import AsyncDatabaseClient
+from src.external.pdap.client import PDAPClient
+
+
+class NLPLocationMatchSubtaskOperator(AgencyIDSubtaskOperatorBase):
+
+    def __init__(
+        self,
+        adb_client: AsyncDatabaseClient,
+        task_id: int,
+        pdap_client: PDAPClient,
+        processor: NLPProcessor
+    ) -> None:
+        super().__init__(adb_client, task_id=task_id)
+        self.processor = AgencyIDSubtaskInternalProcessor(
+            nlp_processor=processor,
+            pdap_client=pdap_client,
+            task_id=task_id,
+        )
+
+    async def inner_logic(self) -> None:
+        for iteration in range(ITERATIONS_PER_SUBTASK):
+            inputs: list[NLPLocationMatchSubtaskInput] = await self._get_from_db()
+            if len(inputs) == 0:
+                break
+            await self.run_subtask_iteration(inputs)
+
+    async def run_subtask_iteration(self, inputs: list[NLPLocationMatchSubtaskInput]) -> None:
+        self.linked_urls.extend([input_.url_id for input_ in inputs])
+        subtask_data_list: list[AutoAgencyIDSubtaskData] = await self._process_inputs(inputs)
+
+        await self._upload_subtask_data(subtask_data_list)
+
+    async def _process_inputs(
+        self,
+        inputs: list[NLPLocationMatchSubtaskInput]
+    ) -> list[AutoAgencyIDSubtaskData]:
+        return await self.processor.process(
+            inputs=inputs,
+        )
+
+    async def _get_from_db(self) -> list[NLPLocationMatchSubtaskInput]:
+        return await self.adb_client.run_query_builder(
+            GetNLPLocationMatchSubtaskInputQueryBuilder(),
+        )
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/models/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/models/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/models/input.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/models/input.py
new file mode 100644
index 00000000..398c1504
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/models/input.py
@@ -0,0 +1,6 @@
+from pydantic import BaseModel
+
+
+class NLPLocationMatchSubtaskInput(BaseModel):
+    url_id: int
+    html: str
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/constants.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/constants.py
new file mode 100644
index 00000000..cc16da9f
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/constants.py
@@ -0,0 +1,3 @@
+
+
+MAX_NLP_CONFIDENCE: int = 90
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/convert.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/convert.py
new file mode 100644
index 00000000..103580da
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/convert.py
@@ -0,0 +1,162 @@
+from math import ceil
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.constants import \
+    MAX_NLP_CONFIDENCE
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.counter import \
+    RequestCounter
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.models.mappings.url_id_nlp_response import \
+    URLToNLPResponseMapping
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.models.mappings.url_id_search_params import \
+    URLToSearchParamsMapping
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.models.mappings.url_id_search_response import \
+    URLToSearchResponseMapping
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.models.response import \
+    NLPLocationMatchResponse
+from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData
+from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic
+from src.external.pdap.dtos.search_agency_by_location.params import SearchAgencyByLocationParams
+from src.external.pdap.dtos.search_agency_by_location.response import SearchAgencyByLocationResponse
+
+
+def convert_nlp_response_to_search_agency_by_location_params(
+    nlp_response: NLPLocationMatchResponse,
+    counter: RequestCounter
+) -> list[SearchAgencyByLocationParams]:
+    params: list[SearchAgencyByLocationParams] = []
+    for location in nlp_response.locations:
+        if nlp_response.us_state is None:
+            raise ValueError("US State is None; cannot convert NLP response to search agency by location params")
+        request_id: int = counter.next()
+        param = SearchAgencyByLocationParams(
+            request_id=request_id,
+            query=location,
+            iso=nlp_response.us_state.iso,
+        )
+        params.append(param)
+
+    return params
+
+
+
+def convert_search_agency_responses_to_subtask_data_list(
+    mappings: list[URLToSearchResponseMapping],
+    task_id: int
+) -> list[AutoAgencyIDSubtaskData]:
+    subtask_data_list: list[AutoAgencyIDSubtaskData] = []
+
+    # First, extract agency suggestions for URL
+    for mapping in mappings:
+        url_id: int = mapping.url_id
+        search_responses: list[SearchAgencyByLocationResponse] = mapping.search_responses
+        suggestions: list[AgencySuggestion] = _convert_search_agency_response_to_agency_suggestions(
+            search_responses
+        )
+        pydantic_model: URLAutoAgencyIDSubtaskPydantic = convert_search_agency_response_to_subtask_pydantic(
+            url_id=url_id,
+            task_id=task_id
+        )
+        subtask_data = AutoAgencyIDSubtaskData(
+            pydantic_model=pydantic_model,
+            suggestions=suggestions
+        )
+        subtask_data_list.append(subtask_data)
+
+    return subtask_data_list
+
+
+def _convert_search_agency_response_to_agency_suggestions(
+    responses: list[SearchAgencyByLocationResponse],
+) -> list[AgencySuggestion]:
+    suggestions: list[AgencySuggestion] = []
+    for response in responses:
+        for result in response.results:
+            agency_id: int = result.agency_id
+            similarity: float = result.similarity
+            confidence: int = min(ceil(similarity * 100), MAX_NLP_CONFIDENCE)
+            suggestion: AgencySuggestion = AgencySuggestion(
+                agency_id=agency_id,
+                confidence=confidence,
+            )
+            suggestions.append(suggestion)
+    return suggestions
+
+def convert_url_ids_to_empty_subtask_data_list(
+    url_ids: list[int],
+    task_id: int
+) -> list[AutoAgencyIDSubtaskData]:
+    results: list[AutoAgencyIDSubtaskData] = []
+    for url_id in url_ids:
+        subtask_data = AutoAgencyIDSubtaskData(
+            pydantic_model=URLAutoAgencyIDSubtaskPydantic(
+                task_id=task_id,
+                url_id=url_id,
+                type=AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH,
+                agencies_found=False
+            ),
+            suggestions=[]
+        )
+        results.append(subtask_data)
+
+    return results
+
+
+
+def convert_empty_url_search_param_mappings_to_subtask_data_list(
+    mappings: list[URLToSearchParamsMapping],
+    task_id: int
+) -> list[AutoAgencyIDSubtaskData]:
+    url_ids: list[int] = []
+    for mapping in mappings:
+        url_ids.append(mapping.url_id)
+
+    return convert_url_ids_to_empty_subtask_data_list(
+        url_ids=url_ids,
+        task_id=task_id
+    )
+
+def convert_invalid_url_nlp_mappings_to_subtask_data_list(
+    mappings: list[URLToNLPResponseMapping],
+    task_id: int
+) -> list[AutoAgencyIDSubtaskData]:
+    url_ids: list[int] = []
+    for mapping in mappings:
+        url_ids.append(mapping.url_id)
+
+    return convert_url_ids_to_empty_subtask_data_list(
+        url_ids=url_ids,
+        task_id=task_id
+    )
+
+
+def convert_search_agency_response_to_subtask_pydantic(
+    url_id: int,
+    task_id: int
+) -> URLAutoAgencyIDSubtaskPydantic:
+
+    return URLAutoAgencyIDSubtaskPydantic(
+        task_id=task_id,
+        url_id=url_id,
+        type=AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH,
+        agencies_found=True
+    )
+
+
+def convert_urls_to_search_params(
+    url_to_nlp_mappings: list[URLToNLPResponseMapping]
+) -> list[URLToSearchParamsMapping]:
+    url_to_search_params_mappings: list[URLToSearchParamsMapping] = []
+    counter = RequestCounter()
+    for mapping in url_to_nlp_mappings:
+        search_params: list[SearchAgencyByLocationParams] = \
+            convert_nlp_response_to_search_agency_by_location_params(
+                counter=counter,
+                nlp_response=mapping.nlp_response,
+            )
+        mapping = URLToSearchParamsMapping(
+            url_id=mapping.url_id,
+            search_params=search_params,
+        )
+        url_to_search_params_mappings.append(mapping)
+    return url_to_search_params_mappings
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/core.py
new file mode 100644
index 00000000..1e349318
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/core.py
@@ -0,0 +1,143 @@
+from collections import defaultdict
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.models.input import \
+    NLPLocationMatchSubtaskInput
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.convert import \
+    convert_search_agency_responses_to_subtask_data_list, \
+    convert_invalid_url_nlp_mappings_to_subtask_data_list, convert_urls_to_search_params
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.filter import \
+    filter_valid_and_invalid_nlp_responses, filter_top_n_suggestions
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.mapper import \
+    URLRequestIDMapper
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.models.mappings.url_id_nlp_response import \
+    URLToNLPResponseMapping
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.models.mappings.url_id_search_params import \
+    URLToSearchParamsMapping
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.models.mappings.url_id_search_response import \
+    URLToSearchResponseMapping
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.models.subsets.nlp_responses import \
+    NLPResponseSubsets
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.core import \
+    NLPProcessor
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.models.response import \
+    NLPLocationMatchResponse
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.preprocess import \
+    preprocess_html
+from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData
+from src.external.pdap.client import PDAPClient
+from src.external.pdap.dtos.search_agency_by_location.params import SearchAgencyByLocationParams
+from src.external.pdap.dtos.search_agency_by_location.response import SearchAgencyByLocationResponse
+
+
+class AgencyIDSubtaskInternalProcessor:
+
+    def __init__(
+        self,
+        nlp_processor: NLPProcessor,
+        pdap_client: PDAPClient,
+        task_id: int,
+    ):
+        self._nlp_processor = nlp_processor
+        self._pdap_client = pdap_client
+        self._task_id = task_id
+
+    async def process(
+        self, 
+        inputs: list[NLPLocationMatchSubtaskInput]
+    ) -> list[AutoAgencyIDSubtaskData]:
+        subtask_data_list: list[AutoAgencyIDSubtaskData] = []
+
+        url_to_nlp_mappings: list[URLToNLPResponseMapping] = \
+            self._match_urls_to_nlp_responses(inputs)
+
+        # Filter out valid and invalid NLP responses
+        nlp_response_subsets: NLPResponseSubsets = \
+            filter_valid_and_invalid_nlp_responses(url_to_nlp_mappings)
+
+        # For invalid responses, convert to subtask data with empty agencies
+        subtask_data_no_agency_list: list[AutoAgencyIDSubtaskData] = \
+            convert_invalid_url_nlp_mappings_to_subtask_data_list(
+                mappings=nlp_response_subsets.invalid,
+                task_id=self._task_id,
+            )
+        subtask_data_list.extend(subtask_data_no_agency_list)
+
+        # For valid responses, convert to search param mappings
+        url_to_search_params_mappings: list[URLToSearchParamsMapping] = \
+            convert_urls_to_search_params(nlp_response_subsets.valid)
+
+
+        response_mappings: list[URLToSearchResponseMapping] = \
+            await self._get_pdap_info(url_to_search_params_mappings)
+
+        subtask_data_list_agency_list: list[AutoAgencyIDSubtaskData] = \
+            convert_search_agency_responses_to_subtask_data_list(
+                mappings=response_mappings,
+                task_id=self._task_id,
+            )
+
+        filter_top_n_suggestions(subtask_data_list_agency_list)
+
+        subtask_data_list.extend(subtask_data_list_agency_list)
+
+        return subtask_data_list
+
+    def _match_urls_to_nlp_responses(
+        self,
+        inputs: list[NLPLocationMatchSubtaskInput]
+    ) -> list[URLToNLPResponseMapping]:
+        url_to_nlp_mappings: list[URLToNLPResponseMapping] = []
+        for input_ in inputs:
+            nlp_response: NLPLocationMatchResponse = self._get_location_match(input_.html)
+            mapping = URLToNLPResponseMapping(
+                url_id=input_.url_id,
+                nlp_response=nlp_response,
+            )
+            url_to_nlp_mappings.append(mapping)
+        return url_to_nlp_mappings
+
+    def _get_location_match(
+        self,
+        html: str
+    ) -> NLPLocationMatchResponse:
+        preprocessed_html: str = preprocess_html(html)
+        return self._nlp_processor.parse_for_locations(preprocessed_html)
+
+    async def _get_pdap_info(
+        self,
+        mappings: list[URLToSearchParamsMapping]
+    ) -> list[URLToSearchResponseMapping]:
+        if len(mappings) == 0:
+            return []
+        params: list[SearchAgencyByLocationParams] = []
+        # Map request IDs to URL IDs for later use
+        mapper = URLRequestIDMapper()
+        for mapping in mappings:
+            for search_param in mapping.search_params:
+                mapper.add_mapping(
+                    request_id=search_param.request_id,
+                    url_id=mapping.url_id,
+                )
+                params.append(search_param)
+
+        url_id_to_search_responses: dict[int, list[SearchAgencyByLocationResponse]] = defaultdict(list)
+
+        responses: list[SearchAgencyByLocationResponse] = await self._pdap_client.search_agency_by_location(params)
+        # Map responses to URL IDs via request IDs
+        for response in responses:
+            request_id: int = response.request_id
+            url_id: int = mapper.get_url_id_by_request_id(request_id)
+            url_id_to_search_responses[url_id].append(response)
+
+        # Reconcile URL IDs to search responses
+        response_mappings: list[URLToSearchResponseMapping] = []
+        for url_id, responses in url_id_to_search_responses.items():
+            mapping = URLToSearchResponseMapping(
+                url_id=url_id,
+                search_responses=responses,
+            )
+            response_mappings.append(mapping)
+
+        return response_mappings
+
+
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/counter.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/counter.py
new file mode 100644
index 00000000..12e9e048
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/counter.py
@@ -0,0 +1,11 @@
+
+
+
+class RequestCounter:
+
+    def __init__(self):
+        self._counter: int = 0
+
+    def next(self) -> int:
+        self._counter += 1
+        return self._counter
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/extract.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/extract.py
new file mode 100644
index 00000000..053f4fb5
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/extract.py
@@ -0,0 +1,12 @@
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.models.mappings.url_id_search_params import \
+    URLToSearchParamsMapping
+from src.external.pdap.dtos.search_agency_by_location.params import SearchAgencyByLocationParams
+
+
+def _extract_all_search_params(
+    url_to_search_params_mappings: list[URLToSearchParamsMapping]
+) -> list[SearchAgencyByLocationParams]:
+    all_search_params: list[SearchAgencyByLocationParams] = []
+    for mapping in url_to_search_params_mappings:
+        all_search_params.extend(mapping.search_params)
+    return all_search_params
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/filter.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/filter.py
new file mode 100644
index 00000000..ff8b2de5
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/filter.py
@@ -0,0 +1,59 @@
+from collections import defaultdict
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.models.mappings.url_id_nlp_response import \
+    URLToNLPResponseMapping
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.models.subsets.nlp_responses import \
+    NLPResponseSubsets
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.models.response import \
+    NLPLocationMatchResponse
+from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData
+from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion
+
+
+def filter_valid_and_invalid_nlp_responses(
+    mappings: list[URLToNLPResponseMapping]
+) -> NLPResponseSubsets:
+    valid: list[URLToNLPResponseMapping] = []
+    invalid: list[URLToNLPResponseMapping] = []
+    for mapping in mappings:
+        nlp_response: NLPLocationMatchResponse = mapping.nlp_response
+        if nlp_response.valid:
+            valid.append(mapping)
+        else:
+            invalid.append(mapping)
+    return NLPResponseSubsets(
+        valid=valid,
+        invalid=invalid,
+    )
+
+def filter_top_n_suggestions(
+    subtask_data_list: list[AutoAgencyIDSubtaskData],
+    n: int = 5
+) -> None:
+    """Filters out all but the top N suggestions for each URL.
+
+    Modifies:
+        - AutoAgencyIDSubtaskData.suggestions
+    """
+    for subtask_data in subtask_data_list:
+        # Eliminate agency ID duplicates;
+        agency_to_suggestions: dict[int, list[AgencySuggestion]] = defaultdict(list)
+        for suggestion in subtask_data.suggestions:
+            agency_to_suggestions[suggestion.agency_id].append(suggestion)
+
+        # in the case of a tie, keep the suggestion with the highest confidence
+        deduped_suggestions: list[AgencySuggestion] = []
+        for agency_suggestions in agency_to_suggestions.values():
+            agency_suggestions.sort(
+                key=lambda x: x.confidence,
+                reverse=True  # Descending order
+            )
+            deduped_suggestions.append(agency_suggestions[0])
+
+        # Sort suggestions by confidence and keep top N
+        suggestions_sorted: list[AgencySuggestion] = sorted(
+            deduped_suggestions,
+            key=lambda x: x.confidence,
+            reverse=True  # Descending order
+        )
+        subtask_data.suggestions = suggestions_sorted[:n]
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/mapper.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/mapper.py
new file mode 100644
index 00000000..8192dbb6
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/mapper.py
@@ -0,0 +1,10 @@
+class URLRequestIDMapper:
+
+    def __init__(self):
+        self._request_id_to_url_id_mapper: dict[int, int] = {}
+
+    def add_mapping(self, request_id: int, url_id: int) -> None:
+        self._request_id_to_url_id_mapper[request_id] = url_id
+
+    def get_url_id_by_request_id(self, request_id: int) -> int:
+        return self._request_id_to_url_id_mapper[request_id]
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/mappings/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/mappings/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/mappings/url_id_nlp_response.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/mappings/url_id_nlp_response.py
new file mode 100644
index 00000000..7bb7e701
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/mappings/url_id_nlp_response.py
@@ -0,0 +1,9 @@
+from pydantic import BaseModel
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.models.response import \
+    NLPLocationMatchResponse
+
+
+class URLToNLPResponseMapping(BaseModel):
+    url_id: int
+    nlp_response: NLPLocationMatchResponse
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/mappings/url_id_search_params.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/mappings/url_id_search_params.py
new file mode 100644
index 00000000..5ab9deac
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/mappings/url_id_search_params.py
@@ -0,0 +1,12 @@
+from pydantic import BaseModel
+
+from src.external.pdap.dtos.search_agency_by_location.params import SearchAgencyByLocationParams
+
+
+class URLToSearchParamsMapping(BaseModel):
+    url_id: int
+    search_params: list[SearchAgencyByLocationParams]
+
+    @property
+    def is_empty(self) -> bool:
+        return len(self.search_params) == 0
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/mappings/url_id_search_response.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/mappings/url_id_search_response.py
new file mode 100644
index 00000000..9a88b89d
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/mappings/url_id_search_response.py
@@ -0,0 +1,8 @@
+from pydantic import BaseModel
+
+from src.external.pdap.dtos.search_agency_by_location.response import SearchAgencyByLocationResponse
+
+
+class URLToSearchResponseMapping(BaseModel):
+    url_id: int
+    search_responses: list[SearchAgencyByLocationResponse]
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/subsets/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/subsets/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/subsets/nlp_responses.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/subsets/nlp_responses.py
new file mode 100644
index 00000000..22fdcf98
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/models/subsets/nlp_responses.py
@@ -0,0 +1,9 @@
+from pydantic import BaseModel
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.models.mappings.url_id_nlp_response import \
+    URLToNLPResponseMapping
+
+
+class NLPResponseSubsets(BaseModel):
+    valid: list[URLToNLPResponseMapping]
+    invalid: list[URLToNLPResponseMapping]
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/check.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/check.py
new file mode 100644
index 00000000..ef60e038
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/check.py
@@ -0,0 +1,9 @@
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.mappings import \
+    US_STATE_ISO_TO_NAME, US_NAME_TO_STATE_ISO
+
+
+def is_iso_us_state(iso: str) -> bool:
+    return iso in US_STATE_ISO_TO_NAME
+
+def is_name_us_state(name: str) -> bool:
+    return name in US_NAME_TO_STATE_ISO
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/constants.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/constants.py
new file mode 100644
index 00000000..8b9076fe
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/constants.py
@@ -0,0 +1,18 @@
+
+
+TOP_N_LOCATIONS_COUNT: int = 5
+
+INVALID_LOCATION_CHARACTERS: set[str] = {
+    "=",
+    "\\",
+    "/",
+    "\'",
+    "\","
+}
+
+# State ISOs that commonly align with other words,
+# Which cannot be used in simple text scanning
+INVALID_SCAN_ISOS: set[str] = {
+    "IN",
+    "OR",
+}
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/convert.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/convert.py
new file mode 100644
index 00000000..040bc466
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/convert.py
@@ -0,0 +1,27 @@
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.mappings import \
+    US_STATE_ISO_TO_NAME, US_NAME_TO_STATE_ISO
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.models.us_state import \
+    USState
+
+
+def convert_us_state_iso_to_us_state(iso: str) -> USState | None:
+    name: str | None = US_STATE_ISO_TO_NAME.get(iso, None)
+
+    if name is None:
+        return None
+
+    return USState(
+        name=name,
+        iso=iso
+    )
+
+def convert_us_state_name_to_us_state(name: str) -> USState | None:
+    iso: str | None = US_NAME_TO_STATE_ISO.get(name, None)
+
+    if iso is None:
+        return None
+
+    return USState(
+        name=name,
+        iso=iso
+    )
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/core.py
new file mode 100644
index 00000000..8e723aa6
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/core.py
@@ -0,0 +1,88 @@
+from collections import Counter
+
+import spacy
+from spacy import Language
+from spacy.tokens import Doc
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.check import \
+    is_name_us_state, is_iso_us_state
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.constants import \
+    INVALID_LOCATION_CHARACTERS, INVALID_SCAN_ISOS
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.convert import \
+    convert_us_state_name_to_us_state, convert_us_state_iso_to_us_state
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.enums import \
+    SpacyModelType
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.extract import \
+    extract_most_common_us_state, extract_top_n_locations
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.models.response import \
+    NLPLocationMatchResponse
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.models.us_state import \
+    USState
+
+
+class NLPProcessor:
+
+    def __init__(
+        self,
+        model_type: SpacyModelType = SpacyModelType.EN_CORE_WEB_SM
+    ):
+        self._model_type: SpacyModelType = model_type
+        self._model: Language | None = None
+
+    def lazy_load_model(self) -> Language:
+        if self._model is None:
+            self._model = spacy.load(self._model_type.value, disable=['parser'])
+        return self._model
+
+
+    def parse_for_locations(self, html: str) -> NLPLocationMatchResponse:
+        model: Language = self.lazy_load_model()
+        doc: Doc = model(html)
+        us_state_counter: Counter[USState] = Counter()
+        location_counter: Counter[str] = Counter()
+
+        # Scan over tokens
+        for token in doc:
+            upper_token: str = token.text.upper()
+            # Disregard certain ISOs that align with common words
+            if upper_token in INVALID_SCAN_ISOS:
+                continue
+            if not is_iso_us_state(upper_token):
+                continue
+
+            us_state: USState | None = convert_us_state_iso_to_us_state(upper_token)
+            if us_state is not None:
+                us_state_counter[us_state] += 1
+
+
+        # Scan over entities using spacy
+        for ent in doc.ents:
+            if ent.label_ != "GPE": # Geopolitical Entity
+                continue
+            text: str = ent.text
+            if any(char in text for char in INVALID_LOCATION_CHARACTERS):
+                continue
+            if is_name_us_state(text):
+                us_state: USState | None = convert_us_state_name_to_us_state(text)
+                if us_state is not None:
+                    us_state_counter[us_state] += 1
+                continue
+            if is_iso_us_state(text):
+                us_state: USState | None = convert_us_state_iso_to_us_state(text)
+                if us_state is not None:
+                    us_state_counter[us_state] += 1
+                continue
+            location_counter[text] += 1
+
+        # Get most common US State if exists
+        most_common_us_state: USState | None = extract_most_common_us_state(us_state_counter)
+
+        top_n_locations: list[str] = extract_top_n_locations(location_counter)
+
+        return NLPLocationMatchResponse(
+            us_state=most_common_us_state,
+            locations=top_n_locations
+        )
+
+
+
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/enums.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/enums.py
new file mode 100644
index 00000000..9d1b987b
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/enums.py
@@ -0,0 +1,8 @@
+from enum import Enum
+
+
+class SpacyModelType(Enum):
+    EN_CORE_WEB_SM = "en_core_web_sm"
+    EN_CORE_WEB_LG = "en_core_web_lg"
+    EN_CORE_WEB_MD = "en_core_web_md"
+    EN_CORE_WEB_TRF = "en_core_web_trf"
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/extract.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/extract.py
new file mode 100644
index 00000000..ea732ef0
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/extract.py
@@ -0,0 +1,25 @@
+from collections import Counter
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.constants import \
+    TOP_N_LOCATIONS_COUNT
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.models.us_state import \
+    USState
+
+
+def extract_most_common_us_state(
+    us_state_counter: Counter[USState]
+) -> USState | None:
+    try:
+        return us_state_counter.most_common(1)[0][0]
+    except IndexError:
+        return None
+
+def extract_top_n_locations(
+    location_counter: Counter[str]
+) -> list[str]:
+    top_n_locations_raw: list[tuple[str, int]] = \
+        location_counter.most_common(TOP_N_LOCATIONS_COUNT)
+    top_n_locations: list[str] = []
+    for location, _ in top_n_locations_raw:
+        top_n_locations.append(location)
+    return top_n_locations
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/mappings.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/mappings.py
new file mode 100644
index 00000000..03417480
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/mappings.py
@@ -0,0 +1,59 @@
+
+
+US_STATE_ISO_TO_NAME: dict[str, str] = {
+    'AL': 'Alabama',
+    'AK': 'Alaska',
+    'AZ': 'Arizona',
+    'AR': 'Arkansas',
+    'CA': 'California',
+    'CO': 'Colorado',
+    'CT': 'Connecticut',
+    'DE': 'Delaware',
+    'FL': 'Florida',
+    'GA': 'Georgia',
+    'HI': 'Hawaii',
+    'ID': 'Idaho',
+    'IL': 'Illinois',
+    'IN': 'Indiana',
+    'IA': 'Iowa',
+    'KS': 'Kansas',
+    'KY': 'Kentucky',
+    'LA': 'Louisiana',
+    'ME': 'Maine',
+    'MD': 'Maryland',
+    'MA': 'Massachusetts',
+    'MI': 'Michigan',
+    'MN': 'Minnesota',
+    'MS': 'Mississippi',
+    'MO': 'Missouri',
+    'MT': 'Montana',
+    'NE': 'Nebraska',
+    'NV': 'Nevada',
+    'NH': 'New Hampshire',
+    'NJ': 'New Jersey',
+    'NM': 'New Mexico',
+    'NY': 'New York',
+    'NC': 'North Carolina',
+    'ND': 'North Dakota',
+    'OH': 'Ohio',
+    'OK': 'Oklahoma',
+    'OR': 'Oregon',
+    'PA': 'Pennsylvania',
+    'RI': 'Rhode Island',
+    'SC': 'South Carolina',
+    'SD': 'South Dakota',
+    'TN': 'Tennessee',
+    'TX': 'Texas',
+    'UT': 'Utah',
+    'VT': 'Vermont',
+    'VA': 'Virginia',
+    'WA': 'Washington',
+    'WV': 'West Virginia',
+    'WI': 'Wisconsin',
+    'WY': 'Wyoming',
+    'DC': 'District of Columbia',
+}
+
+US_NAME_TO_STATE_ISO: dict[str, str] = {
+    name: iso for iso, name in US_STATE_ISO_TO_NAME.items()
+}
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/models/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/models/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/models/params.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/models/params.py
new file mode 100644
index 00000000..79378612
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/models/params.py
@@ -0,0 +1,6 @@
+from pydantic import BaseModel
+
+
+class NLPLocationMatchParams(BaseModel):
+    url_id: int
+    html: str 
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/models/response.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/models/response.py
new file mode 100644
index 00000000..387e32de
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/models/response.py
@@ -0,0 +1,18 @@
+from pydantic import BaseModel
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.models.us_state import \
+    USState
+
+
+class NLPLocationMatchResponse(BaseModel):
+    locations: list[str]
+    us_state: USState | None
+
+    @property
+    def valid(self) -> bool:
+        # Valid responses must have a US State and at least one location
+        if self.us_state is None:
+            return False
+        if len(self.locations) == 0:
+            return False
+        return True
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/models/us_state.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/models/us_state.py
new file mode 100644
index 00000000..0b29771f
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/models/us_state.py
@@ -0,0 +1,8 @@
+from pydantic import BaseModel, ConfigDict
+
+
+class USState(BaseModel):
+    model_config = ConfigDict(frozen=True)
+
+    name: str
+    iso: str
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/preprocess.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/preprocess.py
new file mode 100644
index 00000000..da20f4f4
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/processor/nlp/preprocess.py
@@ -0,0 +1,20 @@
+import re
+
+import unicodedata
+from bs4 import BeautifulSoup
+
+
+def preprocess_html(raw_html: str) -> str:
+    """Preprocess HTML to extract text content."""
+    soup = BeautifulSoup(raw_html, 'lxml')
+
+    # Remove scripts, styles, and other non-textual elements
+    for tag in soup(['script','style','noscript','iframe','canvas','svg','header','footer','nav','aside']):
+        tag.decompose()
+    # Extract text
+    text = soup.get_text(separator=' ')
+    # Normalize text and collapse whitespace
+    text = unicodedata.normalize('NFKC', text)
+    text = re.sub(r'[ \t\u00A0]+', ' ', text)
+    text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text)
+    return text.strip()
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/query.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/query.py
new file mode 100644
index 00000000..32311bd1
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/query.py
@@ -0,0 +1,49 @@
+from typing import Sequence
+
+from sqlalchemy import select, RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.constants import \
+    NUMBER_OF_ENTRIES_PER_ITERATION
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.models.input import \
+    NLPLocationMatchSubtaskInput
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.eligible import \
+    EligibleContainer
+from src.db.helpers.session import session_helper as sh
+from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML
+from src.db.queries.base.builder import QueryBuilderBase
+from src.db.utils.compression import decompress_html
+
+
+class GetNLPLocationMatchSubtaskInputQueryBuilder(QueryBuilderBase):
+
+    async def run(
+        self,
+        session: AsyncSession
+    ) -> list[NLPLocationMatchSubtaskInput]:
+        container = EligibleContainer()
+        query = (
+            select(
+                container.url_id,
+                URLCompressedHTML.compressed_html
+            )
+            .join(
+                URLCompressedHTML,
+                URLCompressedHTML.url_id == container.url_id,
+            )
+            .where(
+                container.nlp_location,
+            )
+            .limit(NUMBER_OF_ENTRIES_PER_ITERATION)
+        )
+
+        mappings: Sequence[RowMapping] = await sh.mappings(session, query=query)
+        inputs: list[NLPLocationMatchSubtaskInput] = [
+            NLPLocationMatchSubtaskInput(
+                url_id=mapping["id"],
+                html=decompress_html(mapping["compressed_html"]),
+            )
+            for mapping in mappings
+        ]
+        return inputs
+
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/unknown.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/unknown.py
deleted file mode 100644
index 7ffd57bc..00000000
--- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/unknown.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from typing_extensions import override, final
-
-from src.core.enums import SuggestionType
-from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
-from src.core.tasks.url.operators.agency_identification.subtasks.impl.base import AgencyIdentificationSubtaskBase
-
-@final
-class UnknownAgencyIdentificationSubtask(AgencyIdentificationSubtaskBase):
-    """A subtask that returns an unknown suggestion.
-
-    Used in cases where the agency cannot be reliably inferred from the source.
-    """
-
-    @override
-    async def run(
-            self,
-            url_id: int,
-            collector_metadata: dict | None = None
-    ) -> list[URLAgencySuggestionInfo]:
-        return [
-            URLAgencySuggestionInfo(
-                url_id=url_id,
-                suggestion_type=SuggestionType.UNKNOWN,
-                pdap_agency_id=None,
-                agency_name=None,
-                state=None,
-                county=None,
-                locality=None
-            )
-        ]
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/loader.py b/src/core/tasks/url/operators/agency_identification/subtasks/loader.py
index 6ef84149..5dab9608 100644
--- a/src/core/tasks/url/operators/agency_identification/subtasks/loader.py
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/loader.py
@@ -1,10 +1,16 @@
-from src.collectors.enums import CollectorType
 from src.collectors.impl.muckrock.api_interface.core import MuckrockAPIInterface
-from src.core.tasks.url.operators.agency_identification.subtasks.impl.base import AgencyIdentificationSubtaskBase
-from src.core.tasks.url.operators.agency_identification.subtasks.impl.ckan import CKANAgencyIdentificationSubtask
-from src.core.tasks.url.operators.agency_identification.subtasks.impl.muckrock import \
-    MuckrockAgencyIdentificationSubtask
-from src.core.tasks.url.operators.agency_identification.subtasks.impl.unknown import UnknownAgencyIdentificationSubtask
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.ckan_.core import CKANAgencyIDSubtaskOperator
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.core import \
+    HomepageMatchSubtaskOperator
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.muckrock_.core import \
+    MuckrockAgencyIDSubtaskOperator
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.core import \
+    NLPLocationMatchSubtaskOperator
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.core import \
+    NLPProcessor
+from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import AgencyIDSubtaskOperatorBase
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
 from src.external.pdap.client import PDAPClient
 
 
@@ -14,35 +20,58 @@ class AgencyIdentificationSubtaskLoader:
     def __init__(
         self,
         pdap_client: PDAPClient,
-        muckrock_api_interface: MuckrockAPIInterface
+        muckrock_api_interface: MuckrockAPIInterface,
+        adb_client: AsyncDatabaseClient,
+        nlp_processor: NLPProcessor
     ):
-        self.pdap_client = pdap_client
-        self.muckrock_api_interface = muckrock_api_interface
+        self._pdap_client = pdap_client
+        self._muckrock_api_interface = muckrock_api_interface
+        self._nlp_processor = nlp_processor
+        self.adb_client = adb_client
 
-    async def _load_muckrock_subtask(self) -> MuckrockAgencyIdentificationSubtask:
-        return MuckrockAgencyIdentificationSubtask(
-            muckrock_api_interface=self.muckrock_api_interface,
-            pdap_client=self.pdap_client
+    def _load_muckrock_subtask(self, task_id: int) -> MuckrockAgencyIDSubtaskOperator:
+        return MuckrockAgencyIDSubtaskOperator(
+            task_id=task_id,
+            adb_client=self.adb_client,
+            muckrock_api_interface=self._muckrock_api_interface,
+            pdap_client=self._pdap_client
         )
 
-    async def _load_ckan_subtask(self) -> CKANAgencyIdentificationSubtask:
-        return CKANAgencyIdentificationSubtask(
-            pdap_client=self.pdap_client
+    def _load_ckan_subtask(self, task_id: int) -> CKANAgencyIDSubtaskOperator:
+        return CKANAgencyIDSubtaskOperator(
+            task_id=task_id,
+            adb_client=self.adb_client,
+            pdap_client=self._pdap_client
         )
 
-    async def load_subtask(self, collector_type: CollectorType) -> AgencyIdentificationSubtaskBase:
+    def _load_homepage_match_subtask(self, task_id: int) -> HomepageMatchSubtaskOperator:
+        return HomepageMatchSubtaskOperator(
+            task_id=task_id,
+            adb_client=self.adb_client,
+        )
+
+    def _load_nlp_location_match_subtask(self, task_id: int) -> NLPLocationMatchSubtaskOperator:
+        return NLPLocationMatchSubtaskOperator(
+            task_id=task_id,
+            adb_client=self.adb_client,
+            pdap_client=self._pdap_client,
+            processor=self._nlp_processor
+        )
+
+
+    async def load_subtask(
+        self,
+        subtask_type: AutoAgencyIDSubtaskType,
+        task_id: int
+    ) -> AgencyIDSubtaskOperatorBase:
         """Get subtask based on collector type."""
-        match collector_type:
-            case CollectorType.MUCKROCK_SIMPLE_SEARCH:
-                return await self._load_muckrock_subtask()
-            case CollectorType.MUCKROCK_COUNTY_SEARCH:
-                return await self._load_muckrock_subtask()
-            case CollectorType.MUCKROCK_ALL_SEARCH:
-                return await self._load_muckrock_subtask()
-            case CollectorType.AUTO_GOOGLER:
-                return UnknownAgencyIdentificationSubtask()
-            case CollectorType.COMMON_CRAWLER:
-                return UnknownAgencyIdentificationSubtask()
-            case CollectorType.CKAN:
-                return await self._load_ckan_subtask()
-        return UnknownAgencyIdentificationSubtask()
\ No newline at end of file
+        match subtask_type:
+            case AutoAgencyIDSubtaskType.MUCKROCK:
+                return self._load_muckrock_subtask(task_id)
+            case AutoAgencyIDSubtaskType.CKAN:
+                return self._load_ckan_subtask(task_id)
+            case AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH:
+                return self._load_nlp_location_match_subtask(task_id)
+            case AutoAgencyIDSubtaskType.HOMEPAGE_MATCH:
+                return self._load_homepage_match_subtask(task_id)
+        raise ValueError(f"Unknown subtask type: {subtask_type}")
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/models/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/models/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/models/run_info.py b/src/core/tasks/url/operators/agency_identification/subtasks/models/run_info.py
new file mode 100644
index 00000000..524830e3
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/models/run_info.py
@@ -0,0 +1,14 @@
+from pydantic import BaseModel
+
+
+class AgencyIDSubtaskRunInfo(BaseModel):
+    error: str | None = None
+    linked_url_ids: list[int] | None = None
+
+    @property
+    def is_success(self) -> bool:
+        return self.error is None
+
+    @property
+    def has_linked_urls(self) -> bool:
+        return len(self.linked_url_ids) > 0
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/models/subtask.py b/src/core/tasks/url/operators/agency_identification/subtasks/models/subtask.py
new file mode 100644
index 00000000..7da0a8f5
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/models/subtask.py
@@ -0,0 +1,18 @@
+from pydantic import BaseModel
+
+from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion
+from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic
+
+
+class AutoAgencyIDSubtaskData(BaseModel):
+    pydantic_model: URLAutoAgencyIDSubtaskPydantic
+    suggestions: list[AgencySuggestion]
+    error: str | None = None
+
+    @property
+    def has_error(self) -> bool:
+        return self.error is not None
+
+    @property
+    def url_id(self) -> int:
+        return self.pydantic_model.url_id
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/models/suggestion.py b/src/core/tasks/url/operators/agency_identification/subtasks/models/suggestion.py
new file mode 100644
index 00000000..669c498c
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/models/suggestion.py
@@ -0,0 +1,6 @@
+from pydantic import BaseModel, Field
+
+
+class AgencySuggestion(BaseModel):
+    agency_id: int
+    confidence: int = Field(ge=0, le=100)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/constants.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/constants.py
new file mode 100644
index 00000000..749332e6
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/constants.py
@@ -0,0 +1,14 @@
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+
+# Determines priority of subtasks, all else being equal.
+SUBTASK_HIERARCHY: list[AutoAgencyIDSubtaskType] = [
+    AutoAgencyIDSubtaskType.CKAN,
+    AutoAgencyIDSubtaskType.MUCKROCK,
+    AutoAgencyIDSubtaskType.HOMEPAGE_MATCH,
+    AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH
+]
+
+SUBTASK_HIERARCHY_MAPPING: dict[AutoAgencyIDSubtaskType, int] = {
+    subtask: idx
+    for idx, subtask in enumerate(SUBTASK_HIERARCHY)
+}
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/core.py
new file mode 100644
index 00000000..2b81d2de
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/core.py
@@ -0,0 +1,77 @@
+from collections import Counter
+
+from sqlalchemy import RowMapping
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.constants import SUBTASK_HIERARCHY, \
+    SUBTASK_HIERARCHY_MAPPING
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.eligible_counts import \
+    ELIGIBLE_COUNTS_QUERY
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+from src.db.queries.base.builder import QueryBuilderBase
+
+from src.db.helpers.session import session_helper as sh
+
+class AgencyIDSubtaskSurveyQueryBuilder(QueryBuilderBase):
+    """
+    Survey applicable URLs to determine next subtask to run
+
+    URLs are "inapplicable" if they have any of the following properties:
+    - Are validated via FlagURLValidated model
+    - Have at least one annotation with agency suggestion with confidence >= 95
+    - Have all possible subtasks completed
+
+    Returns a list of one or more subtasks to run
+    based on which subtask(s) have the most applicable URLs
+    (or an empty list if no subtasks have applicable URLs)
+    """
+
+    def __init__(
+        self,
+        allowed_subtasks: list[AutoAgencyIDSubtaskType]
+    ):
+        super().__init__()
+        self._allowed_subtasks = allowed_subtasks
+
+    async def run(self, session: AsyncSession) -> AutoAgencyIDSubtaskType | None:
+        results: RowMapping = await sh.mapping(session, ELIGIBLE_COUNTS_QUERY)
+        counts: Counter[str] = Counter(results)
+
+        allowed_counts: Counter[str] = await self._filter_allowed_counts(counts)
+        if len(allowed_counts) == 0:
+            return None
+        max_count: int = max(allowed_counts.values())
+        if max_count == 0:
+            return None
+        subtasks_with_max_count: list[str] = [
+            subtask for subtask, count in allowed_counts.items()
+            if count == max_count
+        ]
+        subtasks_as_enum_list: list[AutoAgencyIDSubtaskType] = [
+            AutoAgencyIDSubtaskType(subtask)
+            for subtask in subtasks_with_max_count
+        ]
+        # Sort subtasks by priority
+        sorted_subtasks: list[AutoAgencyIDSubtaskType] = sorted(
+            subtasks_as_enum_list,
+            key=lambda subtask: SUBTASK_HIERARCHY_MAPPING[subtask],
+            reverse=True,
+        )
+        # Return the highest priority subtask
+        return sorted_subtasks[0]
+
+    async def _filter_allowed_counts(self, counts: Counter[str]) -> Counter[str]:
+        return Counter(
+            {
+                subtask: count
+                for subtask, count in counts.items()
+                if AutoAgencyIDSubtaskType(subtask) in self._allowed_subtasks
+            }
+        )
+
+
+
+
+
+
+
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/README.md b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/README.md
new file mode 100644
index 00000000..38324fa7
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/README.md
@@ -0,0 +1,3 @@
+Contains CTEs for determining validity for each subtask.
+
+Each file corresponds to the validity CTE for that subtask. 
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/eligible.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/eligible.py
new file mode 100644
index 00000000..5be64fbc
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/eligible.py
@@ -0,0 +1,57 @@
+from sqlalchemy import select, CTE, Column
+
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.exists.impl.high_confidence_annotations import \
+    HIGH_CONFIDENCE_ANNOTATIONS_EXISTS_CONTAINER
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.exists.impl.validated import \
+    VALIDATED_EXISTS_CONTAINER
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.impl.ckan import \
+    CKAN_SUBTASK_CONTAINER
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.impl.homepage import \
+    HOMEPAGE_SUBTASK_CONTAINER
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.impl.muckrock import \
+    MUCKROCK_SUBTASK_CONTAINER
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.impl.nlp_location import \
+    NLP_LOCATION_CONTAINER
+from src.db.models.impl.url.core.sqlalchemy import URL
+
+class EligibleContainer:
+
+    def __init__(self):
+        self._cte = (
+            select(
+                URL.id,
+                CKAN_SUBTASK_CONTAINER.eligible_query.label("ckan"),
+                MUCKROCK_SUBTASK_CONTAINER.eligible_query.label("muckrock"),
+                HOMEPAGE_SUBTASK_CONTAINER.eligible_query.label("homepage"),
+                NLP_LOCATION_CONTAINER.eligible_query.label("nlp_location"),
+            )
+            .where(
+                HIGH_CONFIDENCE_ANNOTATIONS_EXISTS_CONTAINER.not_exists_query,
+                VALIDATED_EXISTS_CONTAINER.not_exists_query,
+            )
+            .cte("eligible")
+        )
+
+    @property
+    def cte(self) -> CTE:
+        return self._cte
+
+    @property
+    def url_id(self) -> Column[int]:
+        return self._cte.c['id']
+
+    @property
+    def ckan(self) -> Column[bool]:
+        return self._cte.c['ckan']
+
+    @property
+    def muckrock(self) -> Column[bool]:
+        return self._cte.c['muckrock']
+
+    @property
+    def homepage(self) -> Column[bool]:
+        return self._cte.c['homepage']
+
+    @property
+    def nlp_location(self) -> Column[bool]:
+        return self._cte.c['nlp_location']
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/container.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/container.py
new file mode 100644
index 00000000..d59c508c
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/container.py
@@ -0,0 +1,33 @@
+from sqlalchemy import CTE, Column, ColumnElement, exists
+
+from src.db.models.impl.url.core.sqlalchemy import URL
+
+
+class ExistsCTEContainer:
+    """
+    Base class for CTEs that determine validity for each subtask.
+
+    Single column CTEs intended to be left-joined and considered valid only
+    if the joined row is not null.
+    """
+
+    def __init__(
+        self,
+        cte: CTE,
+    ) -> None:
+        self._cte = cte
+
+    @property
+    def cte(self) -> CTE:
+        return self._cte
+
+    @property
+    def url_id(self) -> Column[int]:
+        return self.cte.columns[0]
+
+    @property
+    def not_exists_query(self) -> ColumnElement[bool]:
+        return (
+            ~exists()
+            .where(self.url_id == URL.id)
+        )
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/impl/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/impl/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/impl/high_confidence_annotations.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/impl/high_confidence_annotations.py
new file mode 100644
index 00000000..3ac0ced7
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/impl/high_confidence_annotations.py
@@ -0,0 +1,29 @@
+from sqlalchemy import select
+
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.exists.container import \
+    ExistsCTEContainer
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask
+from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion
+
+cte = (
+    select(
+        URL.id
+    )
+    .join(
+        URLAutoAgencyIDSubtask,
+        URLAutoAgencyIDSubtask.url_id == URL.id,
+    )
+    .join(
+        AgencyIDSubtaskSuggestion,
+        AgencyIDSubtaskSuggestion.subtask_id == URLAutoAgencyIDSubtask.id,
+    )
+    .where(
+        AgencyIDSubtaskSuggestion.confidence >= 95,
+    )
+    .cte("high_confidence_annotations_exists")
+)
+
+HIGH_CONFIDENCE_ANNOTATIONS_EXISTS_CONTAINER = ExistsCTEContainer(
+    cte,
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/impl/validated.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/impl/validated.py
new file mode 100644
index 00000000..f515c1d1
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/impl/validated.py
@@ -0,0 +1,16 @@
+from sqlalchemy import select
+
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.exists.container import \
+    ExistsCTEContainer
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+
+cte = (
+    select(
+        FlagURLValidated.url_id
+    )
+    .cte("validated_exists")
+)
+
+VALIDATED_EXISTS_CONTAINER = ExistsCTEContainer(
+    cte,
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/container.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/container.py
new file mode 100644
index 00000000..9782e4fd
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/container.py
@@ -0,0 +1,40 @@
+from sqlalchemy import CTE, ColumnElement, Column, Select, exists, func
+
+from src.db.models.impl.url.core.sqlalchemy import URL
+
+
+class SubtaskCTEContainer:
+    """
+    CTE for URLs eligible for a given subtask.
+    A successful left join on this indicates the URL is eligible for the subtask.
+    A true value for `subtask_entry_exists` indicates
+        a subtask entry for the URL already exists
+    """
+
+    def __init__(
+        self,
+        cte: CTE,
+    ) -> None:
+        self._cte=cte
+
+    @property
+    def cte(self) -> CTE:
+        return self._cte
+
+    @property
+    def entry_exists(self) -> ColumnElement[bool]:
+        return self.cte.c['subtask_entry_exists']
+
+    @property
+    def url_id(self) -> Column[int]:
+        return self.cte.c['id']
+
+    @property
+    def eligible_query(self) -> ColumnElement[int]:
+        return (
+            exists()
+            .where(
+                self.url_id == URL.id,
+                self.entry_exists.is_(False),
+            )
+        )
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/helpers.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/helpers.py
new file mode 100644
index 00000000..b06442ea
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/helpers.py
@@ -0,0 +1,18 @@
+from sqlalchemy import ColumnElement, exists
+
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask
+
+
+def get_exists_subtask_query(
+    subtask_type: AutoAgencyIDSubtaskType,
+) -> ColumnElement[bool]:
+    return (
+        exists()
+        .where(
+            URLAutoAgencyIDSubtask.url_id == URL.id,
+            URLAutoAgencyIDSubtask.type == subtask_type,
+        )
+        .label("subtask_entry_exists")
+    )
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/ckan.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/ckan.py
new file mode 100644
index 00000000..b1b70cdb
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/ckan.py
@@ -0,0 +1,37 @@
+from sqlalchemy import select
+
+from src.collectors.enums import CollectorType
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.helpers import \
+    get_exists_subtask_query
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.container import \
+    SubtaskCTEContainer
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+
+cte = (
+    select(
+        URL.id,
+        get_exists_subtask_query(
+            AutoAgencyIDSubtaskType.CKAN,
+        ),
+    )
+    .join(
+        LinkBatchURL,
+        LinkBatchURL.url_id == URL.id,
+    )
+    .join(
+        Batch,
+        Batch.id == LinkBatchURL.batch_id,
+    )
+    .where(
+        Batch.strategy == CollectorType.CKAN.value,
+
+    )
+    .cte("ckan_eligible")
+)
+
+CKAN_SUBTASK_CONTAINER = SubtaskCTEContainer(
+    cte,
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/homepage.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/homepage.py
new file mode 100644
index 00000000..4d75b4e0
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/homepage.py
@@ -0,0 +1,34 @@
+from sqlalchemy import select, exists
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.consolidated import \
+    CONSOLIDATED_CTE
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.container import \
+    SubtaskCTEContainer
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.helpers import \
+    get_exists_subtask_query
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+
+VALID_URL_FLAG = (
+    exists()
+    .where(
+        URL.id == CONSOLIDATED_CTE.c.url_id,
+    )
+)
+
+cte = (
+    select(
+        URL.id,
+        get_exists_subtask_query(
+            AutoAgencyIDSubtaskType.HOMEPAGE_MATCH,
+        )
+    )
+    .where(
+        VALID_URL_FLAG,
+    )
+    .cte("homepage_eligible")
+)
+
+HOMEPAGE_SUBTASK_CONTAINER = SubtaskCTEContainer(
+    cte,
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/muckrock.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/muckrock.py
new file mode 100644
index 00000000..1f059e86
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/muckrock.py
@@ -0,0 +1,40 @@
+from sqlalchemy import select
+
+from src.collectors.enums import CollectorType
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.container import \
+    SubtaskCTEContainer
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.helpers import \
+    get_exists_subtask_query
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+
+cte = (
+    select(
+        URL.id,
+        get_exists_subtask_query(
+            AutoAgencyIDSubtaskType.MUCKROCK,
+        )
+    )
+    .join(
+        LinkBatchURL,
+        LinkBatchURL.url_id == URL.id,
+    )
+    .join(
+        Batch,
+        Batch.id == LinkBatchURL.batch_id,
+    )
+    .where(
+        Batch.strategy.in_(
+            (CollectorType.MUCKROCK_ALL_SEARCH.value,
+            CollectorType.MUCKROCK_COUNTY_SEARCH.value,
+            CollectorType.MUCKROCK_SIMPLE_SEARCH.value,)
+        ),
+    )
+    .cte("muckrock_eligible")
+)
+
+MUCKROCK_SUBTASK_CONTAINER = SubtaskCTEContainer(
+    cte,
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location.py
new file mode 100644
index 00000000..40533809
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location.py
@@ -0,0 +1,26 @@
+from sqlalchemy import select
+
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.helpers import \
+    get_exists_subtask_query
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.container import \
+    SubtaskCTEContainer
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+
+cte = (
+    select(
+        URL.id,
+        get_exists_subtask_query(
+            AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH
+        )
+    )
+    .join(
+        URLCompressedHTML
+    )
+    .cte("nlp_location_eligible")
+)
+
+NLP_LOCATION_CONTAINER = SubtaskCTEContainer(
+    cte,
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/eligible_counts.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/eligible_counts.py
new file mode 100644
index 00000000..96a322cb
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/eligible_counts.py
@@ -0,0 +1,25 @@
+from sqlalchemy import select, ColumnElement, Integer, func
+
+from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.eligible import \
+    EligibleContainer
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+
+
+def sum_count(col: ColumnElement[bool], subtask_type: AutoAgencyIDSubtaskType) -> ColumnElement[int]:
+    return func.coalesce(
+        func.sum(
+            col.cast(Integer)
+        ),
+        0,
+    ).label(subtask_type.value)
+
+container = EligibleContainer()
+
+ELIGIBLE_COUNTS_QUERY = (
+    select(
+        sum_count(container.ckan, AutoAgencyIDSubtaskType.CKAN),
+        sum_count(container.muckrock, AutoAgencyIDSubtaskType.MUCKROCK),
+        sum_count(container.homepage, AutoAgencyIDSubtaskType.HOMEPAGE_MATCH),
+        sum_count(container.nlp_location, AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH),
+    )
+)
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/templates/__init__.py b/src/core/tasks/url/operators/agency_identification/subtasks/templates/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/templates/output.py b/src/core/tasks/url/operators/agency_identification/subtasks/templates/output.py
new file mode 100644
index 00000000..02ae76a4
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/templates/output.py
@@ -0,0 +1,5 @@
+from pydantic import BaseModel
+
+
+class AgencyIDSubtaskOutputBase(BaseModel):
+    pass
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/templates/postprocessor.py b/src/core/tasks/url/operators/agency_identification/subtasks/templates/postprocessor.py
new file mode 100644
index 00000000..b366747f
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/templates/postprocessor.py
@@ -0,0 +1,26 @@
+from abc import ABC, abstractmethod
+
+from src.core.tasks.url.operators.agency_identification.subtasks.templates.output import AgencyIDSubtaskOutputBase
+from src.db.client.async_ import AsyncDatabaseClient
+
+
+class SubtaskPostprocessorBase(ABC):
+    """
+    An optional class which takes
+    the output of the subtask along with the subtask id
+    and adds additional information to the database.
+    """
+
+    def __init__(
+        self,
+        subtask_id: int,
+        subtask_output: AgencyIDSubtaskOutputBase,
+        adb_client: AsyncDatabaseClient
+    ):
+        self.subtask_id = subtask_id
+        self.subtask_output = subtask_output
+        self.adb_client = adb_client
+
+    @abstractmethod
+    async def run(self) -> None:
+        raise NotImplementedError
\ No newline at end of file
diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py b/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py
new file mode 100644
index 00000000..4085b6dd
--- /dev/null
+++ b/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py
@@ -0,0 +1,82 @@
+import abc
+import traceback
+from abc import ABC
+
+from src.core.tasks.url.operators.agency_identification.subtasks.models.run_info import AgencyIDSubtaskRunInfo
+from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo
+from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic
+from src.db.models.impl.url.suggestion.agency.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic
+
+
+class AgencyIDSubtaskOperatorBase(ABC):
+
+    def __init__(
+        self,
+        adb_client: AsyncDatabaseClient,
+        task_id: int
+    ) -> None:
+        self.adb_client: AsyncDatabaseClient = adb_client
+        self.task_id: int = task_id
+        self.linked_urls: list[int] = []
+
+    async def run(self) -> AgencyIDSubtaskRunInfo:
+        try:
+            await self.inner_logic()
+        except Exception as e:
+            # Get stack trace
+            stack_trace: str = traceback.format_exc()
+            return AgencyIDSubtaskRunInfo(
+                error=f"{type(e).__name__}: {str(e)}: {stack_trace}",
+                linked_url_ids=self.linked_urls
+            )
+        return AgencyIDSubtaskRunInfo(
+            linked_url_ids=self.linked_urls
+        )
+
+    @abc.abstractmethod
+    async def inner_logic(self) -> AgencyIDSubtaskRunInfo:
+        raise NotImplementedError
+
+    async def _upload_subtask_data(
+        self,
+        subtask_data_list: list[AutoAgencyIDSubtaskData]
+    ) -> None:
+
+        subtask_models: list[URLAutoAgencyIDSubtaskPydantic] = [
+            subtask_data.pydantic_model
+            for subtask_data in subtask_data_list
+        ]
+        subtask_ids: list[int] = await self.adb_client.bulk_insert(
+            models=subtask_models,
+            return_ids=True
+        )
+        suggestions: list[AgencyIDSubtaskSuggestionPydantic] = []
+        for subtask_id, subtask_info in zip(subtask_ids, subtask_data_list):
+            for suggestion in subtask_info.suggestions:
+                suggestion_pydantic = AgencyIDSubtaskSuggestionPydantic(
+                    subtask_id=subtask_id,
+                    agency_id=suggestion.agency_id,
+                    confidence=suggestion.confidence,
+                )
+                suggestions.append(suggestion_pydantic)
+
+        await self.adb_client.bulk_insert(
+            models=suggestions,
+        )
+
+        error_infos: list[URLErrorPydanticInfo] = []
+        for subtask_info in subtask_data_list:
+            if not subtask_info.has_error:
+                continue
+            error_info = URLErrorPydanticInfo(
+                url_id=subtask_info.url_id,
+                error=subtask_info.error,
+                task_id=self.task_id,
+            )
+            error_infos.append(error_info)
+
+        await self.adb_client.bulk_insert(
+            models=error_infos,
+        )
diff --git a/src/core/tasks/url/operators/auto_relevant/queries/get_tdos.py b/src/core/tasks/url/operators/auto_relevant/queries/get_tdos.py
index b3ba90ec..384cb5c4 100644
--- a/src/core/tasks/url/operators/auto_relevant/queries/get_tdos.py
+++ b/src/core/tasks/url/operators/auto_relevant/queries/get_tdos.py
@@ -28,7 +28,7 @@ async def run(self, session: AsyncSession) -> list[URLRelevantTDO]:
             .join(URLCompressedHTML)
             .outerjoin(AutoRelevantSuggestion)
             .where(
-                URL.status == URLStatus.PENDING.value,
+                URL.status == URLStatus.OK.value,
                 AutoRelevantSuggestion.id.is_(None),
             )
         )
diff --git a/src/core/tasks/url/operators/submit_approved/queries/get.py b/src/core/tasks/url/operators/submit_approved/queries/get.py
index 6c22c731..19b32b5d 100644
--- a/src/core/tasks/url/operators/submit_approved/queries/get.py
+++ b/src/core/tasks/url/operators/submit_approved/queries/get.py
@@ -4,6 +4,8 @@
 
 from src.collectors.enums import URLStatus
 from src.core.tasks.url.operators.submit_approved.tdo import SubmitApprovedURLTDO
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.queries.base.builder import QueryBuilderBase
 from src.db.helpers.session import session_helper as sh
@@ -29,7 +31,8 @@ async def _process_results(self, urls):
     async def _build_query():
         query = (
             select(URL)
-            .where(URL.status == URLStatus.VALIDATED.value)
+            .join(FlagURLValidated, FlagURLValidated.url_id == URL.id)
+            .where(FlagURLValidated.type == URLValidatedType.DATA_SOURCE)
             .options(
                 selectinload(URL.optional_data_source_metadata),
                 selectinload(URL.confirmed_agencies),
diff --git a/src/core/tasks/url/operators/submit_approved/queries/has_validated.py b/src/core/tasks/url/operators/submit_approved/queries/has_validated.py
index abd94d20..5a3ff464 100644
--- a/src/core/tasks/url/operators/submit_approved/queries/has_validated.py
+++ b/src/core/tasks/url/operators/submit_approved/queries/has_validated.py
@@ -2,6 +2,8 @@
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from src.collectors.enums import URLStatus
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.queries.base.builder import QueryBuilderBase
 
@@ -11,7 +13,13 @@ class HasValidatedURLsQueryBuilder(QueryBuilderBase):
     async def run(self, session: AsyncSession) -> bool:
         query = (
             select(URL)
-            .where(URL.status == URLStatus.VALIDATED.value)
+            .join(
+                FlagURLValidated,
+                FlagURLValidated.url_id == URL.id
+            )
+            .where(
+                FlagURLValidated.type == URLValidatedType.DATA_SOURCE
+            )
         )
         urls = await session.execute(query)
         urls = urls.scalars().all()
diff --git a/src/core/tasks/url/operators/submit_approved/queries/mark_submitted.py b/src/core/tasks/url/operators/submit_approved/queries/mark_submitted.py
index d2563335..4ebfef56 100644
--- a/src/core/tasks/url/operators/submit_approved/queries/mark_submitted.py
+++ b/src/core/tasks/url/operators/submit_approved/queries/mark_submitted.py
@@ -19,14 +19,6 @@ async def run(self, session: AsyncSession):
             url_id = info.url_id
             data_source_id = info.data_source_id
 
-            query = (
-                update(URL)
-                .where(URL.id == url_id)
-                .values(
-                    status=URLStatus.SUBMITTED.value
-                )
-            )
-
             url_data_source_object = URLDataSource(
                 url_id=url_id,
                 data_source_id=data_source_id
@@ -35,4 +27,3 @@ async def run(self, session: AsyncSession):
                 url_data_source_object.created_at = info.submitted_at
             session.add(url_data_source_object)
 
-            await session.execute(query)
\ No newline at end of file
diff --git a/src/db/client/async_.py b/src/db/client/async_.py
index 3b994f86..19cbc3f5 100644
--- a/src/db/client/async_.py
+++ b/src/db/client/async_.py
@@ -3,7 +3,7 @@
 from operator import or_
 from typing import Optional, Type, Any, List, Sequence
 
-from sqlalchemy import select, exists, func, case, Select, and_, update, delete, literal, Row
+from sqlalchemy import select, exists, func, Select, and_, update, delete, Row
 from sqlalchemy.dialects.postgresql import insert as pg_insert
 from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
 from sqlalchemy.orm import selectinload, QueryableAttribute
@@ -26,21 +26,23 @@
 from src.api.endpoints.collector.dtos.manual_batch.post import ManualBatchInputDTO
 from src.api.endpoints.collector.dtos.manual_batch.response import ManualBatchResponseDTO
 from src.api.endpoints.collector.manual.query import UploadManualBatchQueryBuilder
+from src.api.endpoints.metrics.backlog.query import GetBacklogMetricsQueryBuilder
 from src.api.endpoints.metrics.batches.aggregated.dto import GetMetricsBatchesAggregatedResponseDTO
-from src.api.endpoints.metrics.batches.aggregated.query import GetBatchesAggregatedMetricsQueryBuilder
+from src.api.endpoints.metrics.batches.aggregated.query.core import GetBatchesAggregatedMetricsQueryBuilder
 from src.api.endpoints.metrics.batches.breakdown.dto import GetMetricsBatchesBreakdownResponseDTO
 from src.api.endpoints.metrics.batches.breakdown.query import GetBatchesBreakdownMetricsQueryBuilder
-from src.api.endpoints.metrics.dtos.get.backlog import GetMetricsBacklogResponseDTO, GetMetricsBacklogResponseInnerDTO
+from src.api.endpoints.metrics.dtos.get.backlog import GetMetricsBacklogResponseDTO
 from src.api.endpoints.metrics.dtos.get.urls.aggregated.core import GetMetricsURLsAggregatedResponseDTO
-from src.api.endpoints.metrics.dtos.get.urls.breakdown.pending import GetMetricsURLsBreakdownPendingResponseDTO, \
-    GetMetricsURLsBreakdownPendingResponseInnerDTO
+from src.api.endpoints.metrics.dtos.get.urls.breakdown.pending import GetMetricsURLsBreakdownPendingResponseDTO
 from src.api.endpoints.metrics.dtos.get.urls.breakdown.submitted import GetMetricsURLsBreakdownSubmittedResponseDTO, \
     GetMetricsURLsBreakdownSubmittedInnerDTO
+from src.api.endpoints.metrics.urls.aggregated.query.core import GetURLsAggregatedMetricsQueryBuilder
+from src.api.endpoints.metrics.urls.breakdown.query.core import GetURLsBreakdownPendingMetricsQueryBuilder
 from src.api.endpoints.review.approve.dto import FinalReviewApprovalInfo
 from src.api.endpoints.review.approve.query_.core import ApproveURLQueryBuilder
 from src.api.endpoints.review.enums import RejectionReason
+from src.api.endpoints.review.next.core import GetNextURLForFinalReviewQueryBuilder
 from src.api.endpoints.review.next.dto import GetNextURLForFinalReviewOuterResponse
-from src.api.endpoints.review.next.query import GetNextURLForFinalReviewQueryBuilder
 from src.api.endpoints.review.reject.query import RejectURLQueryBuilder
 from src.api.endpoints.search.dtos.response import SearchURLResponse
 from src.api.endpoints.task.by_id.dto import TaskInfo
@@ -50,19 +52,14 @@
 from src.api.endpoints.url.get.query import GetURLsQueryBuilder
 from src.collectors.enums import URLStatus, CollectorType
 from src.collectors.queries.insert.urls.query import InsertURLsQueryBuilder
-from src.core.enums import BatchStatus, SuggestionType, RecordType, SuggestedStatus
+from src.core.enums import BatchStatus, RecordType, SuggestedStatus
 from src.core.env_var_manager import EnvVarManager
-from src.core.tasks.scheduled.impl.huggingface.queries.check.core import CheckValidURLsUpdatedQueryBuilder
-from src.core.tasks.scheduled.impl.huggingface.queries.get.core import GetForLoadingToHuggingFaceQueryBuilder
-from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput
 from src.core.tasks.scheduled.impl.huggingface.queries.state import SetHuggingFaceUploadStateQueryBuilder
 from src.core.tasks.scheduled.impl.sync.agency.dtos.parameters import AgencySyncParameters
 from src.core.tasks.scheduled.impl.sync.agency.queries.get_sync_params import GetAgenciesSyncParametersQueryBuilder
 from src.core.tasks.scheduled.impl.sync.agency.queries.mark_full_sync import get_mark_full_agencies_sync_query
 from src.core.tasks.scheduled.impl.sync.agency.queries.update_sync_progress import \
     get_update_agencies_sync_progress_query
-from src.core.tasks.scheduled.impl.sync.agency.queries.upsert import \
-    convert_agencies_sync_response_to_agencies_upsert
 from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters
 from src.core.tasks.scheduled.impl.sync.data_sources.queries.get_sync_params import \
     GetDataSourcesSyncParametersQueryBuilder
@@ -72,11 +69,6 @@
 from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.core import \
     UpsertURLsFromDataSourcesQueryBuilder
 from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
-from src.core.tasks.url.operators.agency_identification.dtos.tdo import AgencyIdentificationTDO
-from src.core.tasks.url.operators.agency_identification.queries.get_pending_urls_without_agency_suggestions import \
-    GetPendingURLsWithoutAgencySuggestionsQueryBuilder
-from src.core.tasks.url.operators.agency_identification.queries.has_urls_without_agency_suggestions import \
-    HasURLsWithoutAgencySuggestionsQueryBuilder
 from src.core.tasks.url.operators.auto_relevant.models.tdo import URLRelevantTDO
 from src.core.tasks.url.operators.auto_relevant.queries.get_tdos import GetAutoRelevantTDOsQueryBuilder
 from src.core.tasks.url.operators.html.queries.get import \
@@ -106,9 +98,10 @@
 from src.db.helpers.session import session_helper as sh
 from src.db.models.impl.agency.sqlalchemy import Agency
 from src.db.models.impl.backlog_snapshot import BacklogSnapshot
-from src.db.models.impl.batch.pydantic import BatchInfo
+from src.db.models.impl.batch.pydantic.info import BatchInfo
 from src.db.models.impl.batch.sqlalchemy import Batch
 from src.db.models.impl.duplicate.pydantic.info import DuplicateInfo
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from src.db.models.impl.link.task_url import LinkTaskURL
 from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
 from src.db.models.impl.log.pydantic.info import LogInfo
@@ -126,7 +119,6 @@
 from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent
 from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata
 from src.db.models.impl.url.probed_for_404 import URLProbedFor404
-from src.db.models.impl.url.suggestion.agency.auto import AutomatedUrlAgencySuggestion
 from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion
 from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion
 from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion
@@ -145,7 +137,6 @@
 from src.db.templates.markers.bulk.insert import BulkInsertableModel
 from src.db.templates.markers.bulk.upsert import BulkUpsertableModel
 from src.db.utils.compression import decompress_html, compress_html
-from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInnerInfo
 from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInnerInfo
 
 
@@ -546,7 +537,7 @@ async def get_urls_with_html_data_and_without_models(
     ):
         statement = (select(URL)
                      .options(selectinload(URL.html_content))
-                     .where(URL.status == URLStatus.PENDING.value))
+                     .where(URL.status == URLStatus.OK.value))
         statement = self.statement_composer.exclude_urls_with_extant_model(
             statement=statement,
             model=model
@@ -575,7 +566,7 @@ async def has_urls_with_html_data_and_without_models(
     ) -> bool:
         statement = (select(URL)
                      .join(URLCompressedHTML)
-                     .where(URL.status == URLStatus.PENDING.value))
+                     .where(URL.status == URLStatus.OK.value))
         # Exclude URLs with auto suggested record types
         statement = self.statement_composer.exclude_urls_with_extant_model(
             statement=statement,
@@ -614,9 +605,11 @@ async def get_urls(
         page: int,
         errors: bool
     ) -> GetURLsResponseInfo:
-        return await self.run_query_builder(GetURLsQueryBuilder(
-            page=page, errors=errors
-        ))
+        return await self.run_query_builder(
+            GetURLsQueryBuilder(
+                page=page, errors=errors
+            )
+        )
 
     @session_manager
     async def initiate_task(
@@ -657,7 +650,12 @@ async def get_html_content_info(self, url_id: int) -> list[URLHTMLContentInfo]:
         return await self.run_query_builder(GetHTMLContentInfoQueryBuilder(url_id))
 
     @session_manager
-    async def link_urls_to_task(self, session: AsyncSession, task_id: int, url_ids: list[int]):
+    async def link_urls_to_task(
+        self,
+        session: AsyncSession,
+        task_id: int,
+        url_ids: list[int]
+    ) -> None:
         for url_id in url_ids:
             link = LinkTaskURL(
                 url_id=url_id,
@@ -720,24 +718,19 @@ async def get_tasks(
             tasks=final_results
         )
 
-    async def has_urls_without_agency_suggestions(self) -> bool:
-        return await self.run_query_builder(HasURLsWithoutAgencySuggestionsQueryBuilder())
 
-    async def get_urls_without_agency_suggestions(
-        self
-    ) -> list[AgencyIdentificationTDO]:
-        """Retrieve URLs without confirmed or suggested agencies."""
-        return await self.run_query_builder(GetPendingURLsWithoutAgencySuggestionsQueryBuilder())
 
     async def get_next_url_agency_for_annotation(
         self,
         user_id: int,
         batch_id: int | None
     ) -> GetNextURLForAgencyAnnotationResponse:
-        return await self.run_query_builder(builder=GetNextURLAgencyForAnnotationQueryBuilder(
-            user_id=user_id,
-            batch_id=batch_id
-        ))
+        return await self.run_query_builder(
+            builder=GetNextURLAgencyForAnnotationQueryBuilder(
+                user_id=user_id,
+                batch_id=batch_id
+            )
+        )
 
     @session_manager
     async def upsert_new_agencies(
@@ -773,20 +766,6 @@ async def add_confirmed_agency_url_links(
             )
             session.add(confirmed_agency)
 
-    @session_manager
-    async def add_agency_auto_suggestions(
-        self,
-        session: AsyncSession,
-        suggestions: list[URLAgencySuggestionInfo]
-    ):
-        for suggestion in suggestions:
-            url_agency_suggestion = AutomatedUrlAgencySuggestion(
-                url_id=suggestion.url_id,
-                agency_id=suggestion.pdap_agency_id,
-                is_unknown=suggestion.suggestion_type == SuggestionType.UNKNOWN
-            )
-            session.add(url_agency_suggestion)
-
     @session_manager
     async def add_agency_manual_suggestion(
         self,
@@ -842,10 +821,12 @@ async def approve_url(
         approval_info: FinalReviewApprovalInfo,
         user_id: int,
     ) -> None:
-        await self.run_query_builder(ApproveURLQueryBuilder(
-            user_id=user_id,
-            approval_info=approval_info
-        ))
+        await self.run_query_builder(
+            ApproveURLQueryBuilder(
+                user_id=user_id,
+                approval_info=approval_info
+            )
+        )
 
     async def reject_url(
         self,
@@ -853,11 +834,13 @@ async def reject_url(
         user_id: int,
         rejection_reason: RejectionReason
     ) -> None:
-        await self.run_query_builder(RejectURLQueryBuilder(
-            url_id=url_id,
-            user_id=user_id,
-            rejection_reason=rejection_reason
-        ))
+        await self.run_query_builder(
+            RejectURLQueryBuilder(
+                url_id=url_id,
+                user_id=user_id,
+                rejection_reason=rejection_reason
+            )
+        )
 
     @session_manager
     async def get_batch_by_id(self, session, batch_id: int) -> Optional[BatchSummary]:
@@ -873,10 +856,12 @@ async def get_batch_by_id(self, session, batch_id: int) -> Optional[BatchSummary
 
     async def get_urls_by_batch(self, batch_id: int, page: int = 1) -> list[URLInfo]:
         """Retrieve all URLs associated with a batch."""
-        return await self.run_query_builder(GetURLsByBatchQueryBuilder(
-            batch_id=batch_id,
-            page=page
-        ))
+        return await self.run_query_builder(
+            GetURLsByBatchQueryBuilder(
+                batch_id=batch_id,
+                page=page
+            )
+        )
 
     @session_manager
     async def insert_logs(
@@ -926,8 +911,6 @@ async def insert_urls(
         )
         return await self.run_query_builder(builder)
 
-
-
     @session_manager
     async def update_batch_post_collection(
         self,
@@ -960,10 +943,12 @@ async def mark_urls_as_submitted(self, infos: list[SubmittedURLInfo]):
         await self.run_query_builder(MarkURLsAsSubmittedQueryBuilder(infos))
 
     async def get_duplicates_by_batch_id(self, batch_id: int, page: int) -> list[DuplicateInfo]:
-        return await self.run_query_builder(GetDuplicatesByBatchIDQueryBuilder(
-            batch_id=batch_id,
-            page=page
-        ))
+        return await self.run_query_builder(
+            GetDuplicatesByBatchIDQueryBuilder(
+                batch_id=batch_id,
+                page=page
+            )
+        )
 
     @session_manager
     async def get_batch_summaries(
@@ -1048,10 +1033,12 @@ async def upload_manual_batch(
         user_id: int,
         dto: ManualBatchInputDTO
     ) -> ManualBatchResponseDTO:
-        return await self.run_query_builder(UploadManualBatchQueryBuilder(
-            user_id=user_id,
-            dto=dto
-        ))
+        return await self.run_query_builder(
+            UploadManualBatchQueryBuilder(
+                user_id=user_id,
+                dto=dto
+            )
+        )
 
     @session_manager
     async def search_for_url(self, session: AsyncSession, url: str) -> SearchURLResponse:
@@ -1114,183 +1101,16 @@ async def get_urls_breakdown_submitted_metrics(
             entries=final_results
         )
 
-    @session_manager
-    async def get_urls_aggregated_metrics(
-        self,
-        session: AsyncSession
-    ) -> GetMetricsURLsAggregatedResponseDTO:
-        sc = StatementComposer
+    async def get_urls_aggregated_metrics(self) -> GetMetricsURLsAggregatedResponseDTO:
+        return await self.run_query_builder(GetURLsAggregatedMetricsQueryBuilder())
 
-        oldest_pending_url_query = select(
-            URL.id,
-            URL.created_at
-        ).where(
-            URL.status == URLStatus.PENDING.value
-        ).order_by(
-            URL.created_at.asc()
-        ).limit(1)
-
-        oldest_pending_url = await session.execute(oldest_pending_url_query)
-        oldest_pending_url = oldest_pending_url.one_or_none()
-        if oldest_pending_url is None:
-            oldest_pending_url_id = None
-            oldest_pending_created_at = None
-        else:
-            oldest_pending_url_id = oldest_pending_url.id
-            oldest_pending_created_at = oldest_pending_url.created_at
-
-        def case_column(status: URLStatus, label):
-            return sc.count_distinct(
-                case(
-                    (
-                        URL.status == status.value,
-                        URL.id
-                    )
-                ),
-                label=label
-            )
-
-        count_query = select(
-            sc.count_distinct(URL.id, label="count"),
-            case_column(URLStatus.PENDING, label="count_pending"),
-            case_column(URLStatus.SUBMITTED, label="count_submitted"),
-            case_column(URLStatus.VALIDATED, label="count_validated"),
-            case_column(URLStatus.NOT_RELEVANT, label="count_rejected"),
-            case_column(URLStatus.ERROR, label="count_error"),
-        )
-        raw_results = await session.execute(count_query)
-        results = raw_results.all()
-
-        return GetMetricsURLsAggregatedResponseDTO(
-            count_urls_total=results[0].count,
-            count_urls_pending=results[0].count_pending,
-            count_urls_submitted=results[0].count_submitted,
-            count_urls_validated=results[0].count_validated,
-            count_urls_rejected=results[0].count_rejected,
-            count_urls_errors=results[0].count_error,
-            oldest_pending_url_id=oldest_pending_url_id,
-            oldest_pending_url_created_at=oldest_pending_created_at,
-        )
+    async def get_urls_breakdown_pending_metrics(self) -> GetMetricsURLsBreakdownPendingResponseDTO:
+        return await self.run_query_builder(GetURLsBreakdownPendingMetricsQueryBuilder())
 
-    @session_manager
-    async def get_urls_breakdown_pending_metrics(
-        self,
-        session: AsyncSession
-    ) -> GetMetricsURLsBreakdownPendingResponseDTO:
-        sc = StatementComposer
-
-        flags = (
-            select(
-                URL.id.label("url_id"),
-                case((UserRecordTypeSuggestion.url_id != None, literal(True)), else_=literal(False)).label(
-                    "has_user_record_type_annotation"
-                ),
-                case((UserRelevantSuggestion.url_id != None, literal(True)), else_=literal(False)).label(
-                    "has_user_relevant_annotation"
-                ),
-                case((UserUrlAgencySuggestion.url_id != None, literal(True)), else_=literal(False)).label(
-                    "has_user_agency_annotation"
-                ),
-            )
-            .outerjoin(UserRecordTypeSuggestion, URL.id == UserRecordTypeSuggestion.url_id)
-            .outerjoin(UserRelevantSuggestion, URL.id == UserRelevantSuggestion.url_id)
-            .outerjoin(UserUrlAgencySuggestion, URL.id == UserUrlAgencySuggestion.url_id)
-        ).cte("flags")
-
-        month = func.date_trunc('month', URL.created_at)
-
-        # Build the query
-        query = (
-            select(
-                month.label('month'),
-                func.count(URL.id).label('count_total'),
-                func.count(
-                    case(
-                        (flags.c.has_user_record_type_annotation == True, 1)
-                    )
-                ).label('user_record_type_count'),
-                func.count(
-                    case(
-                        (flags.c.has_user_relevant_annotation == True, 1)
-                    )
-                ).label('user_relevant_count'),
-                func.count(
-                    case(
-                        (flags.c.has_user_agency_annotation == True, 1)
-                    )
-                ).label('user_agency_count'),
-            )
-            .outerjoin(flags, flags.c.url_id == URL.id)
-            .where(URL.status == URLStatus.PENDING.value)
-            .group_by(month)
-            .order_by(month.asc())
-        )
-
-        # Execute the query and return the results
-        results = await session.execute(query)
-        all_results = results.all()
-        final_results: list[GetMetricsURLsBreakdownPendingResponseInnerDTO] = []
-
-        for result in all_results:
-            dto = GetMetricsURLsBreakdownPendingResponseInnerDTO(
-                month=result.month.strftime("%B %Y"),
-                count_pending_total=result.count_total,
-                count_pending_relevant_user=result.user_relevant_count,
-                count_pending_record_type_user=result.user_record_type_count,
-                count_pending_agency_user=result.user_agency_count,
-            )
-            final_results.append(dto)
-        return GetMetricsURLsBreakdownPendingResponseDTO(
-            entries=final_results,
-        )
-
-    @session_manager
     async def get_backlog_metrics(
         self,
-        session: AsyncSession
     ) -> GetMetricsBacklogResponseDTO:
-        month = func.date_trunc('month', BacklogSnapshot.created_at)
-
-        # 1. Create a subquery that assigns row_number() partitioned by month
-        monthly_snapshot_subq = (
-            select(
-                BacklogSnapshot.id,
-                BacklogSnapshot.created_at,
-                BacklogSnapshot.count_pending_total,
-                month.label("month_start"),
-                func.row_number()
-                .over(
-                    partition_by=month,
-                    order_by=BacklogSnapshot.created_at.desc()
-                )
-                .label("row_number")
-            )
-            .subquery()
-        )
-
-        # 2. Filter for the top (most recent) row in each month
-        stmt = (
-            select(
-                monthly_snapshot_subq.c.month_start,
-                monthly_snapshot_subq.c.created_at,
-                monthly_snapshot_subq.c.count_pending_total
-            )
-            .where(monthly_snapshot_subq.c.row_number == 1)
-            .order_by(monthly_snapshot_subq.c.month_start)
-        )
-
-        raw_result = await session.execute(stmt)
-        results = raw_result.all()
-        final_results = []
-        for result in results:
-            final_results.append(
-                GetMetricsBacklogResponseInnerDTO(
-                    month=result.month_start.strftime("%B %Y"),
-                    count_pending_total=result.count_pending_total,
-                )
-            )
-
-        return GetMetricsBacklogResponseDTO(entries=final_results)
+        return await self.run_query_builder(GetBacklogMetricsQueryBuilder())
 
     @session_manager
     async def populate_backlog_snapshot(
@@ -1300,10 +1120,15 @@ async def populate_backlog_snapshot(
     ):
         sc = StatementComposer
         # Get count of pending URLs
-        query = select(
-            sc.count_distinct(URL.id, label="count")
-        ).where(
-            URL.status == URLStatus.PENDING.value
+        query = (
+            select(
+                sc.count_distinct(URL.id, label="count")
+            )
+            .outerjoin(FlagURLValidated, URL.id == FlagURLValidated.url_id)
+            .where(
+                URL.status == URLStatus.OK.value,
+                FlagURLValidated.url_id.is_(None),
+            )
         )
 
         raw_result = await session.execute(query)
@@ -1355,7 +1180,7 @@ async def has_pending_urls_not_recently_probed_for_404(self, session: AsyncSessi
                 URLProbedFor404
             ).where(
                 and_(
-                    URL.status == URLStatus.PENDING.value,
+                    URL.status == URLStatus.OK.value,
                     or_(
                         URLProbedFor404.id == None,
                         URLProbedFor404.last_probed_at < month_ago
@@ -1378,7 +1203,7 @@ async def get_pending_urls_not_recently_probed_for_404(self, session: AsyncSessi
                 URLProbedFor404
             ).where(
                 and_(
-                    URL.status == URLStatus.PENDING.value,
+                    URL.status == URLStatus.OK.value,
                     or_(
                         URLProbedFor404.id == None,
                         URLProbedFor404.last_probed_at < month_ago
@@ -1404,14 +1229,6 @@ async def get_data_sources_sync_parameters(self) -> DataSourcesSyncParameters:
             GetDataSourcesSyncParametersQueryBuilder()
         )
 
-    async def upsert_agencies(
-        self,
-        agencies: list[AgenciesSyncResponseInnerInfo]
-    ) -> None:
-        await self.bulk_upsert(
-            models=convert_agencies_sync_response_to_agencies_upsert(agencies)
-        )
-
     async def upsert_urls_from_data_sources(
         self,
         data_sources: list[DataSourcesSyncResponseInnerInfo]
@@ -1463,21 +1280,11 @@ async def add_raw_html(
             )
             session.add(compressed_html)
 
-    async def get_data_sources_raw_for_huggingface(self, page: int) -> list[GetForLoadingToHuggingFaceOutput]:
-        return await self.run_query_builder(
-            GetForLoadingToHuggingFaceQueryBuilder(page)
-        )
-
     async def set_hugging_face_upload_state(self, dt: datetime) -> None:
         await self.run_query_builder(
             SetHuggingFaceUploadStateQueryBuilder(dt=dt)
         )
 
-    async def check_valid_urls_updated(self) -> bool:
-        return await self.run_query_builder(
-            CheckValidURLsUpdatedQueryBuilder()
-        )
-
     async def get_current_database_time(self) -> datetime:
         return await self.scalar(select(func.now()))
 
diff --git a/src/db/client/sync.py b/src/db/client/sync.py
index 03a45d3b..04ecc892 100644
--- a/src/db/client/sync.py
+++ b/src/db/client/sync.py
@@ -1,5 +1,5 @@
 from functools import wraps
-from typing import Optional, List
+from typing import List
 
 from sqlalchemy import create_engine, update, Select
 from sqlalchemy.exc import IntegrityError
@@ -7,12 +7,12 @@
 
 from src.collectors.enums import URLStatus
 from src.db.config_manager import ConfigManager
-from src.db.models.impl.batch.pydantic import BatchInfo
+from src.db.models.impl.batch.pydantic.info import BatchInfo
 from src.db.models.impl.duplicate.pydantic.insert import DuplicateInsertInfo
 from src.db.dtos.url.insert import InsertURLsInfo
 from src.db.models.impl.log.pydantic.info import LogInfo
 from src.db.dtos.url.mapping import URLMapping
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.url.core.pydantic.info import URLInfo
 from src.db.models.templates_.base import Base
 from src.db.models.impl.duplicate.sqlalchemy import Duplicate
@@ -58,6 +58,11 @@ def wrapper(self, *args, **kwargs):
 
         return wrapper
 
+    @session_manager
+    def add_all(self, session: Session, objects: list[Base]):
+        session.add_all(objects)
+        session.commit()
+
     @session_manager
     def insert_batch(self, session: Session, batch_info: BatchInfo) -> int:
         """Insert a new batch into the database and return its ID."""
@@ -221,14 +226,6 @@ def mark_urls_as_submitted(
             url_id = info.url_id
             data_source_id = info.data_source_id
 
-            query = (
-                update(URL)
-                .where(URL.id == url_id)
-                .values(
-                    status=URLStatus.SUBMITTED.value
-                )
-            )
-
             url_data_source_object = URLDataSource(
                 url_id=url_id,
                 data_source_id=data_source_id
@@ -237,7 +234,6 @@ def mark_urls_as_submitted(
                 url_data_source_object.created_at = info.submitted_at
             session.add(url_data_source_object)
 
-            session.execute(query)
 
 if __name__ == "__main__":
     client = DatabaseClient()
diff --git a/src/db/client/types.py b/src/db/client/types.py
index efdfdc72..02c0e39b 100644
--- a/src/db/client/types.py
+++ b/src/db/client/types.py
@@ -1,9 +1,5 @@
-from src.db.models.impl.url.suggestion.agency.auto import AutomatedUrlAgencySuggestion
 from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion
-from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion
 from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion
-from src.db.models.impl.url.suggestion.relevant.auto.sqlalchemy import AutoRelevantSuggestion
 from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion
 
 UserSuggestionModel = UserRelevantSuggestion or UserRecordTypeSuggestion or UserUrlAgencySuggestion
-AutoSuggestionModel = AutoRelevantSuggestion or AutoRecordTypeSuggestion or AutomatedUrlAgencySuggestion
diff --git a/src/db/constants.py b/src/db/constants.py
index 505a6e58..f2cdefb1 100644
--- a/src/db/constants.py
+++ b/src/db/constants.py
@@ -1,23 +1,11 @@
-from src.db.models.impl.url.suggestion.agency.auto import AutomatedUrlAgencySuggestion
 from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion
-from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion
 from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion
-from src.db.models.impl.url.suggestion.relevant.auto.sqlalchemy import AutoRelevantSuggestion
 from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion
 
 PLACEHOLDER_AGENCY_NAME = "PLACEHOLDER_AGENCY_NAME"
 
 STANDARD_ROW_LIMIT = 100
 
-ALL_ANNOTATION_MODELS = [
-    AutoRecordTypeSuggestion,
-    AutoRelevantSuggestion,
-    AutomatedUrlAgencySuggestion,
-    UserRelevantSuggestion,
-    UserRecordTypeSuggestion,
-    UserUrlAgencySuggestion
-]
-
 USER_ANNOTATION_MODELS = [
     UserRelevantSuggestion,
     UserRecordTypeSuggestion,
diff --git a/src/db/dto_converter.py b/src/db/dto_converter.py
index 979a3b51..b19b834d 100644
--- a/src/db/dto_converter.py
+++ b/src/db/dto_converter.py
@@ -1,21 +1,18 @@
-from typing import Optional
-
 from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo
 from src.api.endpoints.annotate.relevance.get.dto import RelevanceAnnotationResponseInfo
 from src.api.endpoints.review.next.dto import FinalReviewAnnotationRelevantInfo, FinalReviewAnnotationRecordTypeInfo, \
-    FinalReviewAnnotationAgencyAutoInfo, FinalReviewAnnotationAgencyInfo
+    FinalReviewAnnotationAgencyInfo
 from src.core.enums import RecordType, SuggestionType
 from src.core.tasks.url.operators.html.scraper.parser.dtos.response_html import ResponseHTMLInfo
 from src.core.tasks.url.operators.html.scraper.parser.mapping import ENUM_TO_ATTRIBUTE_MAPPING
 from src.db.dtos.url.html_content import URLHTMLContentInfo
-from src.db.models.impl.url.html.content.enums import HTMLContentType
 from src.db.dtos.url.with_html import URLWithHTML
 from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
-from src.db.models.impl.url.suggestion.agency.auto import AutomatedUrlAgencySuggestion
-from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion
-from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion
-from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent
 from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.models.impl.url.html.content.enums import HTMLContentType
+from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent
+from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion
+from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion
 from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion
 from src.db.models.impl.url.suggestion.relevant.auto.sqlalchemy import AutoRelevantSuggestion
 from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion
@@ -65,111 +62,6 @@ def final_review_annotation_record_type_info(
             user=user_value
         )
 
-    @staticmethod
-    def final_review_annotation_agency_auto_info(
-            automated_agency_suggestions: list[AutomatedUrlAgencySuggestion]
-    ) -> FinalReviewAnnotationAgencyAutoInfo:
-
-        if len(automated_agency_suggestions) == 0:
-            return FinalReviewAnnotationAgencyAutoInfo(
-                unknown=True,
-                suggestions=[]
-        )
-
-        if len(automated_agency_suggestions) == 1:
-            suggestion = automated_agency_suggestions[0]
-            unknown = suggestion.is_unknown
-        else:
-            unknown = False
-
-        if unknown:
-            return FinalReviewAnnotationAgencyAutoInfo(
-                unknown=True,
-                suggestions=[
-                    GetNextURLForAgencyAgencyInfo(
-                        suggestion_type=SuggestionType.UNKNOWN,
-                    )
-                ]
-            )
-
-        return FinalReviewAnnotationAgencyAutoInfo(
-            unknown=unknown,
-            suggestions=[
-                GetNextURLForAgencyAgencyInfo(
-                    suggestion_type=SuggestionType.AUTO_SUGGESTION,
-                    pdap_agency_id=suggestion.agency_id,
-                    agency_name=suggestion.agency.name,
-                    state=suggestion.agency.state,
-                    county=suggestion.agency.county,
-                    locality=suggestion.agency.locality
-                ) for suggestion in automated_agency_suggestions
-            ]
-        )
-
-    @staticmethod
-    def user_url_agency_suggestion_to_final_review_annotation_agency_user_info(
-        user_url_agency_suggestion: UserUrlAgencySuggestion
-    ) -> GetNextURLForAgencyAgencyInfo | None:
-        suggestion = user_url_agency_suggestion
-        if suggestion is None:
-            return None
-        if suggestion.is_new:
-            return GetNextURLForAgencyAgencyInfo(
-                suggestion_type=SuggestionType.NEW_AGENCY,
-            )
-        return GetNextURLForAgencyAgencyInfo(
-            suggestion_type=SuggestionType.USER_SUGGESTION,
-            pdap_agency_id=suggestion.agency_id,
-            agency_name=suggestion.agency.name,
-            state=suggestion.agency.state,
-            county=suggestion.agency.county,
-            locality=suggestion.agency.locality
-        )
-
-
-    @staticmethod
-    def confirmed_agencies_to_final_review_annotation_agency_info(
-        confirmed_agencies: list[LinkURLAgency]
-    ) -> list[GetNextURLForAgencyAgencyInfo]:
-        results = []
-        for confirmed_agency in confirmed_agencies:
-            agency = confirmed_agency.agency
-            agency_info = GetNextURLForAgencyAgencyInfo(
-                suggestion_type=SuggestionType.CONFIRMED,
-                pdap_agency_id=agency.agency_id,
-                agency_name=agency.name,
-                state=agency.state,
-                county=agency.county,
-                locality=agency.locality
-            )
-            results.append(agency_info)
-        return results
-
-
-    @staticmethod
-    def final_review_annotation_agency_info(
-        automated_agency_suggestions: list[AutomatedUrlAgencySuggestion],
-        confirmed_agencies: list[LinkURLAgency],
-        user_agency_suggestion: UserUrlAgencySuggestion
-    ):
-
-        confirmed_agency_info = DTOConverter.confirmed_agencies_to_final_review_annotation_agency_info(
-            confirmed_agencies
-        )
-
-        agency_auto_info = DTOConverter.final_review_annotation_agency_auto_info(
-            automated_agency_suggestions
-        )
-
-        agency_user_info = DTOConverter.user_url_agency_suggestion_to_final_review_annotation_agency_user_info(
-            user_agency_suggestion
-        )
-
-        return FinalReviewAnnotationAgencyInfo(
-            confirmed=confirmed_agency_info,
-            user=agency_user_info,
-            auto=agency_auto_info
-        )
 
 
     @staticmethod
diff --git a/src/db/dtos/url/mapping.py b/src/db/dtos/url/mapping.py
index 18fc5be2..d48a4649 100644
--- a/src/db/dtos/url/mapping.py
+++ b/src/db/dtos/url/mapping.py
@@ -1,7 +1,9 @@
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict
 
 
 class URLMapping(BaseModel):
     """Mapping between url and url_id."""
+    model_config = ConfigDict(frozen=True)  # <- makes it immutable & hashable
+
     url: str
     url_id: int
diff --git a/src/db/helpers/session/session_helper.py b/src/db/helpers/session/session_helper.py
index a616664f..aebf236f 100644
--- a/src/db/helpers/session/session_helper.py
+++ b/src/db/helpers/session/session_helper.py
@@ -11,8 +11,8 @@
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from src.db.helpers.session.parser import BulkActionParser
-from src.db.models.templates_.with_id import WithIDBase
 from src.db.models.templates_.base import Base
+from src.db.models.templates_.with_id import WithIDBase
 from src.db.templates.markers.bulk.delete import BulkDeletableModel
 from src.db.templates.markers.bulk.insert import BulkInsertableModel
 from src.db.templates.markers.bulk.update import BulkUpdatableModel
@@ -51,21 +51,27 @@ async def has_results(session: AsyncSession, query: sa.Select) -> bool:
 async def bulk_upsert(
     session: AsyncSession,
     models: list[BulkUpsertableModel],
-):
+) -> None:
     if len(models) == 0:
         return
+    # Parse models to get sa_model and id_field
     parser = BulkActionParser(models)
 
+    # Create base insert query
     query = pg_insert(parser.sa_model)
 
-    upsert_mappings = [upsert_model.model_dump() for upsert_model in models]
+    upsert_mappings: list[dict[str, Any]] = [
+        upsert_model.model_dump() for upsert_model in models
+    ]
 
+    # Set all non-id fields to the values in the upsert mapping
     set_ = {}
     for k, v in upsert_mappings[0].items():
         if k == parser.id_field:
             continue
         set_[k] = getattr(query.excluded, k)
 
+    # Add upsert logic to update on conflict
     query = query.on_conflict_do_update(
         index_elements=[parser.id_field],
         set_=set_
@@ -216,4 +222,3 @@ async def bulk_update(
         )
         await session.execute(stmt)
 
-
diff --git a/src/db/models/exceptions.py b/src/db/models/exceptions.py
new file mode 100644
index 00000000..491aa9a4
--- /dev/null
+++ b/src/db/models/exceptions.py
@@ -0,0 +1,4 @@
+
+
+class WriteToViewError(Exception):
+    pass
\ No newline at end of file
diff --git a/src/db/models/impl/agency/sqlalchemy.py b/src/db/models/impl/agency/sqlalchemy.py
index 556bde88..032dc397 100644
--- a/src/db/models/impl/agency/sqlalchemy.py
+++ b/src/db/models/impl/agency/sqlalchemy.py
@@ -25,6 +25,6 @@ class Agency(
     locality = Column(String, nullable=True)
 
     # Relationships
-    automated_suggestions = relationship("AutomatedUrlAgencySuggestion", back_populates="agency")
+    automated_suggestions = relationship("AgencyIDSubtaskSuggestion")
     user_suggestions = relationship("UserUrlAgencySuggestion", back_populates="agency")
     confirmed_urls = relationship("LinkURLAgency", back_populates="agency")
diff --git a/src/db/models/impl/batch/pydantic/__init__.py b/src/db/models/impl/batch/pydantic/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/db/models/impl/batch/pydantic.py b/src/db/models/impl/batch/pydantic/info.py
similarity index 100%
rename from src/db/models/impl/batch/pydantic.py
rename to src/db/models/impl/batch/pydantic/info.py
diff --git a/src/db/models/impl/batch/pydantic/insert.py b/src/db/models/impl/batch/pydantic/insert.py
new file mode 100644
index 00000000..882ab371
--- /dev/null
+++ b/src/db/models/impl/batch/pydantic/insert.py
@@ -0,0 +1,17 @@
+from datetime import datetime
+
+from src.core.enums import BatchStatus
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.templates.markers.bulk.insert import BulkInsertableModel
+
+
+class BatchInsertModel(BulkInsertableModel):
+    strategy: str
+    status: BatchStatus
+    parameters: dict
+    user_id: int
+    date_generated: datetime
+
+    @classmethod
+    def sa_model(cls) -> type[Batch]:
+        return Batch
\ No newline at end of file
diff --git a/src/db/models/impl/flag/url_validated/__init__.py b/src/db/models/impl/flag/url_validated/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/db/models/impl/flag/url_validated/enums.py b/src/db/models/impl/flag/url_validated/enums.py
new file mode 100644
index 00000000..fe74b84c
--- /dev/null
+++ b/src/db/models/impl/flag/url_validated/enums.py
@@ -0,0 +1,8 @@
+from enum import Enum
+
+
+class URLValidatedType(Enum):
+    DATA_SOURCE = "data source"
+    META_URL = "meta url"
+    NOT_RELEVANT = "not relevant"
+    INDIVIDUAL_RECORD = "individual record"
\ No newline at end of file
diff --git a/src/db/models/impl/flag/url_validated/pydantic.py b/src/db/models/impl/flag/url_validated/pydantic.py
new file mode 100644
index 00000000..197c05a0
--- /dev/null
+++ b/src/db/models/impl/flag/url_validated/pydantic.py
@@ -0,0 +1,22 @@
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.templates.markers.bulk.insert import BulkInsertableModel
+from src.db.templates.markers.bulk.upsert import BulkUpsertableModel
+
+type_ = type
+
+class FlagURLValidatedPydantic(
+    BulkInsertableModel,
+    BulkUpsertableModel
+):
+
+    url_id: int
+    type: URLValidatedType
+
+    @classmethod
+    def sa_model(cls) -> type_[FlagURLValidated]:
+        return FlagURLValidated
+
+    @classmethod
+    def id_field(cls) -> str:
+        return "url_id"
\ No newline at end of file
diff --git a/src/db/models/impl/flag/url_validated/sqlalchemy.py b/src/db/models/impl/flag/url_validated/sqlalchemy.py
new file mode 100644
index 00000000..f6d4e770
--- /dev/null
+++ b/src/db/models/impl/flag/url_validated/sqlalchemy.py
@@ -0,0 +1,25 @@
+from sqlalchemy import PrimaryKeyConstraint
+
+from src.db.models.helpers import enum_column
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.mixins import URLDependentMixin, CreatedAtMixin, UpdatedAtMixin
+from src.db.models.templates_.base import Base
+
+
+class FlagURLValidated(
+    URLDependentMixin,
+    CreatedAtMixin,
+    UpdatedAtMixin,
+    Base,
+):
+    __tablename__ = "flag_url_validated"
+    __table_args__ = (
+        PrimaryKeyConstraint(
+            'url_id',
+        ),
+    )
+
+    type = enum_column(
+        enum_type=URLValidatedType,
+        name="validated_url_type",
+    )
diff --git a/src/db/models/impl/link/batch_url/__init__.py b/src/db/models/impl/link/batch_url/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/db/models/impl/link/batch_url/pydantic.py b/src/db/models/impl/link/batch_url/pydantic.py
new file mode 100644
index 00000000..143c57ce
--- /dev/null
+++ b/src/db/models/impl/link/batch_url/pydantic.py
@@ -0,0 +1,11 @@
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.templates.markers.bulk.insert import BulkInsertableModel
+
+
+class LinkBatchURLPydantic(BulkInsertableModel):
+    batch_id: int
+    url_id: int
+
+    @classmethod
+    def sa_model(cls) -> type[LinkBatchURL]:
+        return LinkBatchURL
\ No newline at end of file
diff --git a/src/db/models/impl/link/batch_url.py b/src/db/models/impl/link/batch_url/sqlalchemy.py
similarity index 79%
rename from src/db/models/impl/link/batch_url.py
rename to src/db/models/impl/link/batch_url/sqlalchemy.py
index 8fb8f42e..951ac539 100644
--- a/src/db/models/impl/link/batch_url.py
+++ b/src/db/models/impl/link/batch_url/sqlalchemy.py
@@ -13,5 +13,3 @@ class LinkBatchURL(
 ):
     __tablename__ = "link_batch_urls"
 
-    url = relationship('URL', overlaps="batch")
-    batch = relationship('Batch', overlaps="url")
\ No newline at end of file
diff --git a/src/db/models/impl/link/url_agency/pydantic.py b/src/db/models/impl/link/url_agency/pydantic.py
index 77522a64..fe9194de 100644
--- a/src/db/models/impl/link/url_agency/pydantic.py
+++ b/src/db/models/impl/link/url_agency/pydantic.py
@@ -1,3 +1,5 @@
+from pydantic import ConfigDict
+
 from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
 from src.db.templates.markers.bulk.delete import BulkDeletableModel
 from src.db.templates.markers.bulk.insert import BulkInsertableModel
@@ -7,6 +9,8 @@ class LinkURLAgencyPydantic(
     BulkDeletableModel,
     BulkInsertableModel
 ):
+    model_config = ConfigDict(frozen=True)
+
     url_id: int
     agency_id: int
 
diff --git a/src/db/models/impl/link/url_agency/sqlalchemy.py b/src/db/models/impl/link/url_agency/sqlalchemy.py
index f8d72065..875fa25f 100644
--- a/src/db/models/impl/link/url_agency/sqlalchemy.py
+++ b/src/db/models/impl/link/url_agency/sqlalchemy.py
@@ -7,7 +7,7 @@
 
 
 class LinkURLAgency(URLDependentMixin, WithIDBase):
-    __tablename__ = "link_urls_agencies"
+    __tablename__ = "link_urls_agency"
 
     agency_id: Mapped[int] = get_agency_id_foreign_column()
 
diff --git a/src/db/models/impl/url/core/pydantic/info.py b/src/db/models/impl/url/core/pydantic/info.py
index 07df21fe..0985b3fc 100644
--- a/src/db/models/impl/url/core/pydantic/info.py
+++ b/src/db/models/impl/url/core/pydantic/info.py
@@ -12,7 +12,7 @@ class URLInfo(BaseModel):
     batch_id: int | None= None
     url: str
     collector_metadata: dict | None = None
-    status: URLStatus = URLStatus.PENDING
+    status: URLStatus = URLStatus.OK
     updated_at: datetime.datetime | None = None
     created_at: datetime.datetime | None = None
     name: str | None = None
diff --git a/src/db/models/impl/url/core/pydantic/insert.py b/src/db/models/impl/url/core/pydantic/insert.py
index b893e9fa..18743f1b 100644
--- a/src/db/models/impl/url/core/pydantic/insert.py
+++ b/src/db/models/impl/url/core/pydantic/insert.py
@@ -16,6 +16,6 @@ def sa_model(cls) -> type[Base]:
     url: str
     collector_metadata: dict | None = None
     name: str | None = None
-    status: URLStatus = URLStatus.PENDING
+    status: URLStatus = URLStatus.OK
     record_type: RecordType | None = None
     source: URLSource
\ No newline at end of file
diff --git a/src/db/models/impl/url/core/sqlalchemy.py b/src/db/models/impl/url/core/sqlalchemy.py
index b9c38732..2001f9ed 100644
--- a/src/db/models/impl/url/core/sqlalchemy.py
+++ b/src/db/models/impl/url/core/sqlalchemy.py
@@ -40,7 +40,7 @@ class URL(UpdatedAtMixin, CreatedAtMixin, WithIDBase):
         "Batch",
         secondary="link_batch_urls",
         back_populates="urls",
-        uselist=False
+        uselist=False,
     )
     duplicates = relationship("Duplicate", back_populates="original_url")
     html_content = relationship("URLHTMLContent", back_populates="url", cascade="all, delete-orphan")
@@ -50,8 +50,9 @@ class URL(UpdatedAtMixin, CreatedAtMixin, WithIDBase):
         secondary="link_task_urls",
         back_populates="urls",
     )
-    automated_agency_suggestions = relationship(
-        "AutomatedUrlAgencySuggestion", back_populates="url")
+    auto_agency_subtasks = relationship(
+        "URLAutoAgencyIDSubtask"
+    )
     user_agency_suggestion = relationship(
         "UserUrlAgencySuggestion", uselist=False, back_populates="url")
     auto_record_type_suggestion = relationship(
diff --git a/src/db/models/impl/url/suggestion/agency/auto.py b/src/db/models/impl/url/suggestion/agency/auto.py
deleted file mode 100644
index 5ecfdf0a..00000000
--- a/src/db/models/impl/url/suggestion/agency/auto.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from sqlalchemy import Column, Boolean, UniqueConstraint
-from sqlalchemy.orm import relationship
-
-from src.db.models.helpers import get_agency_id_foreign_column
-from src.db.models.mixins import URLDependentMixin
-from src.db.models.templates_.with_id import WithIDBase
-
-
-class AutomatedUrlAgencySuggestion(URLDependentMixin, WithIDBase):
-    __tablename__ = "automated_url_agency_suggestions"
-
-    agency_id = get_agency_id_foreign_column(nullable=True)
-    is_unknown = Column(Boolean, nullable=True)
-
-    agency = relationship("Agency", back_populates="automated_suggestions")
-    url = relationship("URL", back_populates="automated_agency_suggestions")
-
-    __table_args__ = (
-        UniqueConstraint("agency_id", "url_id", name="uq_automated_url_agency_suggestions"),
-    )
diff --git a/src/db/models/impl/url/suggestion/agency/subtask/__init__.py b/src/db/models/impl/url/suggestion/agency/subtask/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/db/models/impl/url/suggestion/agency/subtask/enum.py b/src/db/models/impl/url/suggestion/agency/subtask/enum.py
new file mode 100644
index 00000000..f3ee7c3f
--- /dev/null
+++ b/src/db/models/impl/url/suggestion/agency/subtask/enum.py
@@ -0,0 +1,14 @@
+from enum import Enum
+
+
+class AutoAgencyIDSubtaskType(Enum):
+    HOMEPAGE_MATCH = "homepage_match"
+    NLP_LOCATION_MATCH = "nlp_location_match"
+    MUCKROCK = "muckrock_match"
+    CKAN = "ckan_match"
+
+class SubtaskDetailCode(Enum):
+    NO_DETAILS = "no details"
+    RETRIEVAL_ERROR = "retrieval error"
+    HOMEPAGE_SINGLE_AGENCY = "homepage-single agency"
+    HOMEPAGE_MULTI_AGENCY = "homepage-multi agency"
\ No newline at end of file
diff --git a/src/db/models/impl/url/suggestion/agency/subtask/pydantic.py b/src/db/models/impl/url/suggestion/agency/subtask/pydantic.py
new file mode 100644
index 00000000..f2e9be57
--- /dev/null
+++ b/src/db/models/impl/url/suggestion/agency/subtask/pydantic.py
@@ -0,0 +1,17 @@
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode
+from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask
+from src.db.models.templates_.base import Base
+from src.db.templates.markers.bulk.insert import BulkInsertableModel
+
+type_alias = type
+
+class URLAutoAgencyIDSubtaskPydantic(BulkInsertableModel):
+    task_id: int
+    url_id: int
+    type: AutoAgencyIDSubtaskType
+    agencies_found: bool
+    detail: SubtaskDetailCode = SubtaskDetailCode.NO_DETAILS
+
+    @classmethod
+    def sa_model(cls) -> type_alias[Base]:
+        return URLAutoAgencyIDSubtask
\ No newline at end of file
diff --git a/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py b/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py
new file mode 100644
index 00000000..89371498
--- /dev/null
+++ b/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py
@@ -0,0 +1,35 @@
+from sqlalchemy.orm import relationship
+
+from src.db.models.helpers import enum_column
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode
+from src.db.models.mixins import URLDependentMixin, CreatedAtMixin, TaskDependentMixin
+from src.db.models.templates_.with_id import WithIDBase
+
+import sqlalchemy as sa
+
+class URLAutoAgencyIDSubtask(
+    WithIDBase,
+    URLDependentMixin,
+    TaskDependentMixin,
+    CreatedAtMixin
+):
+
+    __tablename__ = "url_auto_agency_id_subtasks"
+
+    type = enum_column(
+        AutoAgencyIDSubtaskType,
+        name="agency_auto_suggestion_method"
+    )
+    agencies_found = sa.Column(
+        sa.Boolean(),
+        nullable=False
+    )
+    detail = enum_column(
+        SubtaskDetailCode,
+        name="agency_id_subtask_detail_code",
+    )
+
+    suggestions = relationship(
+        "AgencyIDSubtaskSuggestion",
+        cascade="all, delete-orphan"
+    )
\ No newline at end of file
diff --git a/src/db/models/impl/url/suggestion/agency/suggestion/__init__.py b/src/db/models/impl/url/suggestion/agency/suggestion/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/db/models/impl/url/suggestion/agency/suggestion/pydantic.py b/src/db/models/impl/url/suggestion/agency/suggestion/pydantic.py
new file mode 100644
index 00000000..5a0fd2b8
--- /dev/null
+++ b/src/db/models/impl/url/suggestion/agency/suggestion/pydantic.py
@@ -0,0 +1,16 @@
+from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion
+from src.db.models.templates_.base import Base
+from src.db.templates.markers.bulk.insert import BulkInsertableModel
+
+
+class AgencyIDSubtaskSuggestionPydantic(
+    BulkInsertableModel,
+):
+    subtask_id: int
+    agency_id: int
+    confidence: int
+
+    @classmethod
+    def sa_model(cls) -> type[Base]:
+        """Defines the SQLAlchemy model."""
+        return AgencyIDSubtaskSuggestion
\ No newline at end of file
diff --git a/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py b/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py
new file mode 100644
index 00000000..de6ee029
--- /dev/null
+++ b/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py
@@ -0,0 +1,28 @@
+import sqlalchemy as sa
+from sqlalchemy.orm import relationship
+
+from src.db.models.mixins import CreatedAtMixin, AgencyDependentMixin
+from src.db.models.templates_.with_id import WithIDBase
+
+
+class AgencyIDSubtaskSuggestion(
+    WithIDBase,
+    CreatedAtMixin,
+    AgencyDependentMixin,
+):
+    __tablename__ = "agency_id_subtask_suggestions"
+
+    subtask_id = sa.Column(
+        sa.Integer,
+        sa.ForeignKey("url_auto_agency_id_subtasks.id"),
+        nullable=False
+    )
+    confidence = sa.Column(
+        sa.Integer,
+        sa.CheckConstraint(
+            "confidence BETWEEN 0 and 100"
+        ),
+        nullable=False,
+    )
+
+    agency = relationship("Agency", viewonly=True)
\ No newline at end of file
diff --git a/src/db/models/mixins.py b/src/db/models/mixins.py
index 541e5d09..d0dbbcab 100644
--- a/src/db/models/mixins.py
+++ b/src/db/models/mixins.py
@@ -1,5 +1,8 @@
-from sqlalchemy import Column, Integer, ForeignKey, TIMESTAMP
+from typing import ClassVar
 
+from sqlalchemy import Column, Integer, ForeignKey, TIMESTAMP, event
+
+from src.db.models.exceptions import WriteToViewError
 from src.db.models.helpers import get_created_at_column, CURRENT_TIME_SERVER_DEFAULT
 
 
@@ -58,3 +61,17 @@ class UpdatedAtMixin:
         server_default=CURRENT_TIME_SERVER_DEFAULT,
         onupdate=CURRENT_TIME_SERVER_DEFAULT
     )
+
+class ViewMixin:
+    """Attach to any mapped class that represents a DB view."""
+    __is_view__: ClassVar[bool] = True
+
+    @classmethod
+    def __declare_last__(cls) -> None:
+        # Block writes on this mapped class
+        for evt in ("before_insert", "before_update", "before_delete"):
+            event.listen(cls, evt, cls._block_write)
+
+    @staticmethod
+    def _block_write(mapper, connection, target):
+        raise WriteToViewError(f"{type(target).__name__} is a read-only view.")
diff --git a/src/db/models/views/__init__.py b/src/db/models/views/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/db/models/views/meta_url.py b/src/db/models/views/meta_url.py
new file mode 100644
index 00000000..bc963e11
--- /dev/null
+++ b/src/db/models/views/meta_url.py
@@ -0,0 +1,26 @@
+"""
+    CREATE OR REPLACE VIEW meta_url_view AS
+        SELECT
+            urls.id
+        FROM urls
+        INNER JOIN flag_url_validated fuv on fuv.url_id = urls.id
+        where fuv.type = 'meta url'
+"""
+
+from sqlalchemy import PrimaryKeyConstraint
+
+from src.db.models.mixins import ViewMixin, URLDependentMixin
+from src.db.models.templates_.base import Base
+
+
+class MetaURL(
+    Base,
+    ViewMixin,
+    URLDependentMixin,
+):
+
+    __tablename__ = "meta_url_view"
+    __table_args__ = (
+        PrimaryKeyConstraint("url_id"),
+        {"info": "view"}
+    )
\ No newline at end of file
diff --git a/src/db/models/views/unvalidated_url.py b/src/db/models/views/unvalidated_url.py
new file mode 100644
index 00000000..767ee960
--- /dev/null
+++ b/src/db/models/views/unvalidated_url.py
@@ -0,0 +1,27 @@
+"""
+select
+    u.id as url_id
+from
+    urls u
+    left join flag_url_validated fuv
+              on fuv.url_id = u.id
+where
+    fuv.type is null
+"""
+from sqlalchemy import PrimaryKeyConstraint
+
+from src.db.models.mixins import ViewMixin, URLDependentMixin
+from src.db.models.templates_.base import Base
+
+
+class UnvalidatedURL(
+    Base,
+    ViewMixin,
+    URLDependentMixin,
+):
+
+    __tablename__ = "unvalidated_url_view"
+    __table_args__ = (
+        PrimaryKeyConstraint("url_id"),
+        {"info": "view"}
+    )
\ No newline at end of file
diff --git a/src/db/models/views/url_annotations_flags.py b/src/db/models/views/url_annotations_flags.py
new file mode 100644
index 00000000..7289020f
--- /dev/null
+++ b/src/db/models/views/url_annotations_flags.py
@@ -0,0 +1,49 @@
+"""
+CREATE OR REPLACE VIEW url_annotation_flags AS
+(
+SELECT u.id,
+       CASE WHEN arts.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_auto_record_type_suggestion,
+       CASE WHEN ars.url_id IS NOT NULL THEN TRUE ELSE FALSE END  AS has_auto_relevant_suggestion,
+       CASE WHEN auas.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_auto_agency_suggestion,
+       CASE WHEN urts.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_user_record_type_suggestion,
+       CASE WHEN urs.url_id IS NOT NULL THEN TRUE ELSE FALSE END  AS has_user_relevant_suggestion,
+       CASE WHEN uuas.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_user_agency_suggestion,
+       CASE WHEN cua.url_id IS NOT NULL THEN TRUE ELSE FALSE END  AS has_confirmed_agency,
+       CASE WHEN ruu.url_id IS NOT NULL THEN TRUE ELSE FALSE END  AS was_reviewed
+FROM urls u
+         LEFT JOIN public.auto_record_type_suggestions arts ON u.id = arts.url_id
+         LEFT JOIN public.auto_relevant_suggestions ars ON u.id = ars.url_id
+         LEFT JOIN public.{URL_AUTO_AGENCY_SUGGESTIONS_TABLE_NAME} auas ON u.id = auas.url_id
+         LEFT JOIN public.user_record_type_suggestions urts ON u.id = urts.url_id
+         LEFT JOIN public.user_relevant_suggestions urs ON u.id = urs.url_id
+         LEFT JOIN public.user_url_agency_suggestions uuas ON u.id = uuas.url_id
+         LEFT JOIN public.reviewing_user_url ruu ON u.id = ruu.url_id
+         LEFT JOIN public.link_urls_agency cua on u.id = cua.url_id
+    )
+"""
+
+from sqlalchemy import PrimaryKeyConstraint, Column, Boolean
+
+from src.db.models.mixins import ViewMixin, URLDependentMixin
+from src.db.models.templates_.base import Base
+
+
+class URLAnnotationFlagsView(
+    Base,
+    ViewMixin,
+    URLDependentMixin
+):
+    __tablename__ = "url_annotation_flags"
+    __table_args__ = (
+        PrimaryKeyConstraint("url_id"),
+        {"info": "view"}
+    )
+
+    has_auto_record_type_suggestion = Column(Boolean, nullable=False)
+    has_auto_relevant_suggestion = Column(Boolean, nullable=False)
+    has_auto_agency_suggestion = Column(Boolean, nullable=False)
+    has_user_record_type_suggestion = Column(Boolean, nullable=False)
+    has_user_relevant_suggestion = Column(Boolean, nullable=False)
+    has_user_agency_suggestion = Column(Boolean, nullable=False)
+    has_confirmed_agency = Column(Boolean, nullable=False)
+    was_reviewed = Column(Boolean, nullable=False)
\ No newline at end of file
diff --git a/src/db/queries/implementations/core/common/annotation_exists_/__init__.py b/src/db/queries/implementations/core/common/annotation_exists_/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/db/queries/implementations/core/common/annotation_exists_/constants.py b/src/db/queries/implementations/core/common/annotation_exists_/constants.py
new file mode 100644
index 00000000..ead32bc0
--- /dev/null
+++ b/src/db/queries/implementations/core/common/annotation_exists_/constants.py
@@ -0,0 +1,15 @@
+from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask
+from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion
+from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion
+from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion
+from src.db.models.impl.url.suggestion.relevant.auto.sqlalchemy import AutoRelevantSuggestion
+from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion
+
+ALL_ANNOTATION_MODELS = [
+    AutoRecordTypeSuggestion,
+    AutoRelevantSuggestion,
+    URLAutoAgencyIDSubtask,
+    UserRelevantSuggestion,
+    UserRecordTypeSuggestion,
+    UserUrlAgencySuggestion
+]
diff --git a/src/db/queries/implementations/core/common/annotation_exists.py b/src/db/queries/implementations/core/common/annotation_exists_/core.py
similarity index 79%
rename from src/db/queries/implementations/core/common/annotation_exists.py
rename to src/db/queries/implementations/core/common/annotation_exists_/core.py
index f8dfa654..53e8bcf6 100644
--- a/src/db/queries/implementations/core/common/annotation_exists.py
+++ b/src/db/queries/implementations/core/common/annotation_exists_/core.py
@@ -17,7 +17,8 @@
 from sqlalchemy import case, func, Select, select
 
 from src.collectors.enums import URLStatus
-from src.db.constants import ALL_ANNOTATION_MODELS
+from src.db.queries.implementations.core.common.annotation_exists_.constants import ALL_ANNOTATION_MODELS
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.models.mixins import URLDependentMixin
 from src.db.queries.base.builder import QueryBuilderBase
@@ -29,7 +30,7 @@ class AnnotationExistsCTEQueryBuilder(QueryBuilderBase):
     def url_id(self):
         return self.query.c.url_id
 
-    def get_exists_label(self, model: Type[URLDependentMixin]):
+    def get_exists_label(self, model: Type[URLDependentMixin]) -> str:
         return f"{model.__name__}_exists"
 
     def get_all(self) -> list[Any]:
@@ -67,6 +68,13 @@ async def build(self) -> Any:
             *annotation_exists_cases_all
         )
         anno_exists_query = await self._outer_join_models(anno_exists_query)
-        anno_exists_query = anno_exists_query.where(URL.status == URLStatus.PENDING.value)
+        anno_exists_query = anno_exists_query.outerjoin(
+            FlagURLValidated,
+            FlagURLValidated.url_id == URL.id
+        )
+        anno_exists_query = anno_exists_query.where(
+            URL.status == URLStatus.OK.value,
+            FlagURLValidated.url_id.is_(None)
+        )
         anno_exists_query = anno_exists_query.group_by(URL.id).cte("annotations_exist")
         self.query = anno_exists_query
diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/builder.py b/src/db/queries/implementations/core/get/recent_batch_summaries/builder.py
index f9bb2ef8..86983b5c 100644
--- a/src/db/queries/implementations/core/get/recent_batch_summaries/builder.py
+++ b/src/db/queries/implementations/core/get/recent_batch_summaries/builder.py
@@ -9,6 +9,7 @@
 from src.core.enums import BatchStatus
 from src.db.models.impl.batch.sqlalchemy import Batch
 from src.db.queries.base.builder import QueryBuilderBase
+from src.db.queries.implementations.core.get.recent_batch_summaries.pending_url.cte import PENDING_URL_CTE
 from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.builder import URLCountsCTEQueryBuilder
 from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.labels import URLCountsLabels
 
@@ -24,9 +25,9 @@ def __init__(
         batch_id: int | None = None,
     ):
         super().__init__()
+        self.has_pending_urls = has_pending_urls
         self.url_counts_cte = URLCountsCTEQueryBuilder(
             page=page,
-            has_pending_urls=has_pending_urls,
             collector_type=collector_type,
             status=status,
             batch_id=batch_id,
@@ -49,6 +50,14 @@ async def run(self, session: AsyncSession) -> list[BatchSummary]:
             builder.query,
             builder.get(count_labels.batch_id) == Batch.id,
         )
+        if self.has_pending_urls is not None:
+            query = query.join(
+                PENDING_URL_CTE,
+                PENDING_URL_CTE.c.batch_id == Batch.id,
+            ).where(
+                PENDING_URL_CTE.c.has_pending_urls == self.has_pending_urls
+            )
+
         raw_results = await session.execute(query)
 
         summaries: list[BatchSummary] = []
diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/pending_url/__init__.py b/src/db/queries/implementations/core/get/recent_batch_summaries/pending_url/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/pending_url/cte.py b/src/db/queries/implementations/core/get/recent_batch_summaries/pending_url/cte.py
new file mode 100644
index 00000000..a0722229
--- /dev/null
+++ b/src/db/queries/implementations/core/get/recent_batch_summaries/pending_url/cte.py
@@ -0,0 +1,30 @@
+from sqlalchemy import select, func, case, and_
+
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+
+PENDING_URL_CTE = (
+    select(
+        Batch.id.label("batch_id"),
+        case(
+            (
+                and_(
+                    func.count(LinkBatchURL.url_id) > func.count(FlagURLValidated.url_id),
+                )
+                , True),
+            else_=False
+        ).label("has_pending_urls")
+    )
+    .outerjoin(
+        LinkBatchURL,
+        LinkBatchURL.batch_id == Batch.id,
+    )
+    .outerjoin(
+        FlagURLValidated,
+        FlagURLValidated.url_id == LinkBatchURL.url_id,
+    )
+    .group_by(
+        Batch.id
+    ).cte("has_pending_urls")
+)
\ No newline at end of file
diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py
index 72a33336..634cf419 100644
--- a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py
+++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py
@@ -1,15 +1,22 @@
-from typing import Optional
-
 from sqlalchemy import Select, case, Label, and_, exists
-from sqlalchemy.sql.functions import count, coalesce
+from sqlalchemy.sql.functions import count, coalesce, func
 
 from src.collectors.enums import URLStatus, CollectorType
 from src.core.enums import BatchStatus
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource
 from src.db.queries.base.builder import QueryBuilderBase
 from src.db.queries.helpers import add_page_offset
+from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.all import ALL_CTE
+from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.duplicate import DUPLICATE_CTE
+from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.error import ERROR_CTE
+from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.not_relevant import NOT_RELEVANT_CTE
+from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.pending import PENDING_CTE
+from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.submitted import SUBMITTED_CTE
 from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.labels import URLCountsLabels
 
 
@@ -18,14 +25,12 @@ class URLCountsCTEQueryBuilder(QueryBuilderBase):
     def __init__(
         self,
         page: int = 1,
-        has_pending_urls: bool | None = None,
         collector_type: CollectorType | None = None,
         status: BatchStatus | None = None,
         batch_id: int | None = None
     ):
         super().__init__(URLCountsLabels())
         self.page = page
-        self.has_pending_urls = has_pending_urls
         self.collector_type = collector_type
         self.status = status
         self.batch_id = batch_id
@@ -33,31 +38,31 @@ def __init__(
 
     def get_core_query(self):
         labels: URLCountsLabels = self.labels
-        return (
+        query = (
             Select(
                 Batch.id.label(labels.batch_id),
-                coalesce(count(URL.id), 0).label(labels.total),
-                self.count_case_url_status(URLStatus.PENDING, labels.pending),
-                self.count_case_url_status(URLStatus.SUBMITTED, labels.submitted),
-                self.count_case_url_status(URLStatus.NOT_RELEVANT, labels.not_relevant),
-                self.count_case_url_status(URLStatus.ERROR, labels.error),
-                self.count_case_url_status(URLStatus.DUPLICATE, labels.duplicate),
+                func.coalesce(DUPLICATE_CTE.count, 0).label(labels.duplicate),
+                func.coalesce(SUBMITTED_CTE.count, 0).label(labels.submitted),
+                func.coalesce(PENDING_CTE.count, 0).label(labels.pending),
+                func.coalesce(ALL_CTE.count, 0).label(labels.total),
+                func.coalesce(NOT_RELEVANT_CTE.count, 0).label(labels.not_relevant),
+                func.coalesce(ERROR_CTE.count, 0).label(labels.error),
             )
             .select_from(Batch)
-            .outerjoin(LinkBatchURL)
-            .outerjoin(
-                URL
-            )
         )
+        for cte in [DUPLICATE_CTE, SUBMITTED_CTE, PENDING_CTE, ALL_CTE, NOT_RELEVANT_CTE, ERROR_CTE]:
+            query = query.outerjoin(
+                cte.cte,
+                Batch.id == cte.batch_id
+            )
+        return query
 
 
     def build(self):
         query = self.get_core_query()
-        query = self.apply_pending_urls_filter(query)
         query = self.apply_collector_type_filter(query)
         query = self.apply_status_filter(query)
         query = self.apply_batch_id_filter(query)
-        query = query.group_by(Batch.id)
         query = add_page_offset(query, page=self.page)
         query = query.order_by(Batch.id)
         self.query = query.cte("url_counts")
@@ -67,23 +72,6 @@ def apply_batch_id_filter(self, query: Select):
             return query
         return query.where(Batch.id == self.batch_id)
 
-    def apply_pending_urls_filter(self, query: Select):
-        if self.has_pending_urls is None:
-            return query
-        pending_url_subquery = (
-            exists(
-                Select(URL).join(LinkBatchURL).where(
-                    and_(
-                        LinkBatchURL.batch_id == Batch.id,
-                        URL.status == URLStatus.PENDING.value
-                    )
-                )
-            )
-        ).correlate(Batch)
-        if self.has_pending_urls:
-            return query.where(pending_url_subquery)
-        return query.where(~pending_url_subquery)
-
     def apply_collector_type_filter(self, query: Select):
         if self.collector_type is None:
             return query
@@ -93,18 +81,3 @@ def apply_status_filter(self, query: Select):
         if self.status is None:
             return query
         return query.where(Batch.status == self.status.value)
-
-    @staticmethod
-    def count_case_url_status(
-        url_status: URLStatus,
-        label: str
-    ) -> Label:
-        return (
-            coalesce(
-                count(
-                    case(
-                        (URL.status == url_status.value, 1)
-                    )
-                )
-            , 0).label(label)
-        )
diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/__init__.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/all.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/all.py
new file mode 100644
index 00000000..5cab51cf
--- /dev/null
+++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/all.py
@@ -0,0 +1,20 @@
+from sqlalchemy import select, func
+
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte_container import \
+    URLCountsCTEContainer
+
+ALL_CTE = URLCountsCTEContainer(
+    select(
+        Batch.id,
+        func.count(LinkBatchURL.url_id).label("total_count")
+    )
+    .join(
+        LinkBatchURL,
+        LinkBatchURL.batch_id == Batch.id,
+    )
+    .group_by(
+        Batch.id
+    ).cte("total_count")
+)
\ No newline at end of file
diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/duplicate.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/duplicate.py
new file mode 100644
index 00000000..906dd49c
--- /dev/null
+++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/duplicate.py
@@ -0,0 +1,29 @@
+from sqlalchemy import select, func
+
+from src.collectors.enums import URLStatus
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte_container import \
+    URLCountsCTEContainer
+
+DUPLICATE_CTE = URLCountsCTEContainer(
+    select(
+        Batch.id,
+        func.count(URL.id).label("duplicate_count")
+    )
+    .join(
+        LinkBatchURL,
+        LinkBatchURL.batch_id == Batch.id,
+    )
+    .join(
+        URL,
+        URL.id == LinkBatchURL.url_id,
+    )
+    .where(
+        URL.status == URLStatus.DUPLICATE
+    )
+    .group_by(
+        Batch.id
+    ).cte("duplicate_count")
+)
\ No newline at end of file
diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/error.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/error.py
new file mode 100644
index 00000000..b74020c4
--- /dev/null
+++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/error.py
@@ -0,0 +1,29 @@
+from sqlalchemy import select, func
+
+from src.collectors.enums import URLStatus
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte_container import \
+    URLCountsCTEContainer
+
+ERROR_CTE = URLCountsCTEContainer(
+    select(
+        Batch.id,
+        func.count(URL.id).label("error_count")
+    )
+    .join(
+        LinkBatchURL,
+        LinkBatchURL.batch_id == Batch.id,
+    )
+    .join(
+        URL,
+        URL.id == LinkBatchURL.url_id,
+    )
+    .where(
+        URL.status == URLStatus.ERROR
+    )
+    .group_by(
+        Batch.id
+    ).cte("error_count")
+)
\ No newline at end of file
diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/not_relevant.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/not_relevant.py
new file mode 100644
index 00000000..e84f597b
--- /dev/null
+++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/not_relevant.py
@@ -0,0 +1,34 @@
+from sqlalchemy import select, func
+
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte_container import \
+    URLCountsCTEContainer
+
+NOT_RELEVANT_CTE = URLCountsCTEContainer(
+    select(
+        Batch.id,
+        func.count(URL.id).label("not_relevant_count")
+    )
+    .join(
+        LinkBatchURL,
+        LinkBatchURL.batch_id == Batch.id,
+    )
+    .join(
+        URL,
+        URL.id == LinkBatchURL.url_id,
+    )
+    .join(
+        FlagURLValidated,
+        FlagURLValidated.url_id == URL.id,
+    )
+    .where(
+        FlagURLValidated.type == URLValidatedType.NOT_RELEVANT
+    )
+    .group_by(
+        Batch.id
+    ).cte("not_relevant_count")
+)
\ No newline at end of file
diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/pending.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/pending.py
new file mode 100644
index 00000000..b7e4594c
--- /dev/null
+++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/pending.py
@@ -0,0 +1,33 @@
+from sqlalchemy import select, func
+
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte_container import \
+    URLCountsCTEContainer
+
+PENDING_CTE = URLCountsCTEContainer(
+    select(
+        Batch.id,
+        func.count(URL.id).label("pending_count")
+    )
+    .join(
+        LinkBatchURL,
+        LinkBatchURL.batch_id == Batch.id,
+    )
+    .join(
+        URL,
+        URL.id == LinkBatchURL.url_id,
+    )
+    .outerjoin(
+        FlagURLValidated,
+        FlagURLValidated.url_id == URL.id,
+    )
+    .where(
+        FlagURLValidated.type.is_(None)
+    )
+    .group_by(
+        Batch.id
+    ).cte("pending_count")
+)
\ No newline at end of file
diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/submitted.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/submitted.py
new file mode 100644
index 00000000..5ab305cc
--- /dev/null
+++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/submitted.py
@@ -0,0 +1,32 @@
+
+
+from sqlalchemy import select, func
+
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource
+from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte_container import \
+    URLCountsCTEContainer
+
+SUBMITTED_CTE = URLCountsCTEContainer(
+    select(
+        Batch.id,
+        func.count(URL.id).label("submitted_count")
+    )
+    .join(
+        LinkBatchURL,
+        LinkBatchURL.batch_id == Batch.id,
+    )
+    .join(
+        URL,
+        URL.id == LinkBatchURL.url_id,
+    )
+    .join(
+        URLDataSource,
+        URLDataSource.url_id == URL.id,
+    )
+    .group_by(
+        Batch.id
+    ).cte("submitted_count")
+)
\ No newline at end of file
diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte_container.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte_container.py
new file mode 100644
index 00000000..7f769c76
--- /dev/null
+++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte_container.py
@@ -0,0 +1,18 @@
+from sqlalchemy import CTE, Column
+
+
+class URLCountsCTEContainer:
+
+    def __init__(
+        self,
+        cte: CTE
+    ):
+        self.cte = cte
+
+    @property
+    def batch_id(self) -> Column:
+        return self.cte.columns[0]
+
+    @property
+    def count(self) -> Column:
+        return self.cte.columns[1]
diff --git a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py
index 269dfced..5d69be2a 100644
--- a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py
+++ b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py
@@ -11,7 +11,7 @@
 from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion
 from src.db.models.mixins import URLDependentMixin
 from src.db.queries.base.builder import QueryBuilderBase
-from src.db.queries.implementations.core.common.annotation_exists import AnnotationExistsCTEQueryBuilder
+from src.db.queries.implementations.core.common.annotation_exists_.core import AnnotationExistsCTEQueryBuilder
 
 class PendingAnnotationExistsCTEQueryBuilder(AnnotationExistsCTEQueryBuilder):
 
@@ -44,7 +44,7 @@ async def build(self) -> Any:
                 URL.id == self.url_id
             )
             .where(
-                URL.status == URLStatus.PENDING.value
+                URL.status == URLStatus.OK.value
             ).cte("pending")
         )
 
diff --git a/src/db/statement_composer.py b/src/db/statement_composer.py
index 45a281de..8e172733 100644
--- a/src/db/statement_composer.py
+++ b/src/db/statement_composer.py
@@ -2,22 +2,19 @@
 from typing import Any
 
 from sqlalchemy import Select, select, exists, func, Subquery, and_, not_, ColumnElement
-from sqlalchemy.orm import aliased, selectinload
+from sqlalchemy.orm import selectinload
 
 from src.collectors.enums import URLStatus
 from src.core.enums import BatchStatus
 from src.db.constants import STANDARD_ROW_LIMIT
 from src.db.enums import TaskType
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.link.task_url import LinkTaskURL
-from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
 from src.db.models.impl.task.core import Task
-from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent
-from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata
 from src.db.models.impl.url.core.sqlalchemy import URL
-from src.db.models.impl.batch.sqlalchemy import Batch
+from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata
 from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo
-from src.db.models.impl.url.suggestion.agency.auto import AutomatedUrlAgencySuggestion
 from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata
 from src.db.types import UserSuggestionType
 
@@ -75,28 +72,11 @@ def simple_count_subquery(model, attribute: str, label: str) -> Subquery:
             func.count(attr_value).label(label)
         ).group_by(attr_value).subquery()
 
-    @staticmethod
-    def exclude_urls_with_agency_suggestions(
-            statement: Select
-    ):
-        # Aliases for clarity
-        AutomatedSuggestion = aliased(AutomatedUrlAgencySuggestion)
-
-        # Exclude if automated suggestions exist
-        statement = statement.where(
-            ~exists().where(AutomatedSuggestion.url_id == URL.id)
-        )
-        # Exclude if confirmed agencies exist
-        statement = statement.where(
-            ~exists().where(LinkURLAgency.url_id == URL.id)
-        )
-        return statement
-
     @staticmethod
     def pending_urls_missing_miscellaneous_metadata_query() -> Select:
         query = select(URL).where(
             and_(
-                URL.status == URLStatus.PENDING.value,
+                URL.status == URLStatus.OK.value,
                 URL.name == None,
                 URL.description == None,
                 URLOptionalDataSourceMetadata.url_id == None
diff --git a/src/db/templates/requester.py b/src/db/templates/requester.py
new file mode 100644
index 00000000..b56af87f
--- /dev/null
+++ b/src/db/templates/requester.py
@@ -0,0 +1,20 @@
+"""
+A requester is a class that contains a session and provides methods for
+performing database operations.
+"""
+from abc import ABC
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+import src.db.helpers.session.session_helper as sh
+from src.db.queries.base.builder import QueryBuilderBase
+
+
+class RequesterBase(ABC):
+
+    def __init__(self, session: AsyncSession):
+        self.session = session
+        self.session_helper = sh
+
+    async def run_query_builder(self, query_builder: QueryBuilderBase):
+        return await query_builder.run(session=self.session)
\ No newline at end of file
diff --git a/src/external/pdap/client.py b/src/external/pdap/client.py
index ee357ad4..0e0d5a39 100644
--- a/src/external/pdap/client.py
+++ b/src/external/pdap/client.py
@@ -1,10 +1,14 @@
-from typing import Optional
+from datetime import date
+from typing import Any
 
-from pdap_access_manager import AccessManager, DataSourcesNamespaces, RequestInfo, RequestType
+from pdap_access_manager import AccessManager, DataSourcesNamespaces, RequestInfo, RequestType, ResponseInfo
 
 from src.core.tasks.scheduled.impl.sync.agency.dtos.parameters import AgencySyncParameters
 from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters
 from src.core.tasks.url.operators.submit_approved.tdo import SubmitApprovedURLTDO, SubmittedURLInfo
+from src.external.pdap.dtos.search_agency_by_location.params import SearchAgencyByLocationParams
+from src.external.pdap.dtos.search_agency_by_location.response import SearchAgencyByLocationResponse, \
+    SearchAgencyByLocationOuterResponse
 from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInnerInfo, AgenciesSyncResponseInfo
 from src.external.pdap.dtos.match_agency.post import MatchAgencyInfo
 from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse
@@ -21,6 +25,38 @@ def __init__(
     ):
         self.access_manager = access_manager
 
+    async def search_agency_by_location(
+        self,
+        params: list[SearchAgencyByLocationParams]
+    ) -> list[SearchAgencyByLocationResponse]:
+        request_url: str = self.access_manager.build_url(
+            namespace=DataSourcesNamespaces.SOURCE_COLLECTOR,
+            subdomains=["agencies", "search", "location"]
+        )
+        headers: dict[str, str] = await self.access_manager.jwt_header()
+        headers['Content-Type']: str = "application/json"
+
+        json_params: list[dict[str, Any]] = [
+            param.model_dump(mode='json')
+            for param in params
+        ]
+
+        request_info = RequestInfo(
+            type_=RequestType.POST,
+            url=request_url,
+            headers=headers,
+            json_={
+                "requests": json_params
+            }
+        )
+        response_info: ResponseInfo = await self.access_manager.make_request(request_info)
+
+        outer_response = SearchAgencyByLocationOuterResponse(
+            **response_info.data
+        )
+
+        return outer_response.responses
+
     async def match_agency(
         self,
         name: str,
@@ -31,13 +67,13 @@ async def match_agency(
         """
         Returns agencies, if any, that match or partially match the search criteria
         """
-        url = self.access_manager.build_url(
+        url: str = self.access_manager.build_url(
             namespace=DataSourcesNamespaces.MATCH,
             subdomains=["agency"]
         )
 
-        headers = await self.access_manager.jwt_header()
-        headers['Content-Type'] = "application/json"
+        headers: dict[str, str] = await self.access_manager.jwt_header()
+        headers['Content-Type']: str = "application/json"
         request_info = RequestInfo(
             type_=RequestType.POST,
             url=url,
@@ -49,15 +85,15 @@ async def match_agency(
                 "locality": locality
             }
         )
-        response_info = await self.access_manager.make_request(request_info)
-        matches = []
+        response_info: ResponseInfo = await self.access_manager.make_request(request_info)
+        matches: list[MatchAgencyInfo] = []
         for agency in response_info.data["agencies"]:
             mai = MatchAgencyInfo(
                 id=agency['id'],
                 submitted_name=agency['name']
             )
             if len(agency['locations']) > 0:
-                first_location = agency['locations'][0]
+                first_location: dict[str, Any] = agency['locations'][0]
                 mai.state = first_location['state']
                 mai.county = first_location['county']
                 mai.locality = first_location['locality']
@@ -75,7 +111,7 @@ async def is_url_duplicate(
         """
         Check if a URL is unique. Returns duplicate info otherwise
         """
-        url = self.access_manager.build_url(
+        url: str = self.access_manager.build_url(
             namespace=DataSourcesNamespaces.CHECK,
             subdomains=["unique-url"]
         )
@@ -86,9 +122,11 @@ async def is_url_duplicate(
                 "url": url_to_check
             }
         )
-        response_info = await self.access_manager.make_request(request_info)
-        duplicates = [UniqueURLDuplicateInfo(**entry) for entry in response_info.data["duplicates"]]
-        is_duplicate = (len(duplicates) != 0)
+        response_info: ResponseInfo = await self.access_manager.make_request(request_info)
+        duplicates: list[UniqueURLDuplicateInfo] = [
+            UniqueURLDuplicateInfo(**entry) for entry in response_info.data["duplicates"]
+        ]
+        is_duplicate: bool = (len(duplicates) != 0)
         return is_duplicate
 
     async def submit_urls(
@@ -105,11 +143,11 @@ async def submit_urls(
         )
 
         # Build url-id dictionary
-        url_id_dict = {}
+        url_id_dict: dict[str, int] = {}
         for tdo in tdos:
             url_id_dict[tdo.url] = tdo.url_id
 
-        data_sources_json = []
+        data_sources_json: list[dict[str, Any]] = []
         for tdo in tdos:
             data_sources_json.append(
                 {
@@ -125,7 +163,7 @@ async def submit_urls(
                 }
             )
 
-        headers = await self.access_manager.jwt_header()
+        headers: dict[str, str] = await self.access_manager.jwt_header()
         request_info = RequestInfo(
             type_=RequestType.POST,
             url=request_url,
@@ -134,12 +172,12 @@ async def submit_urls(
                 "data_sources": data_sources_json
             }
         )
-        response_info = await self.access_manager.make_request(request_info)
-        data_sources_response_json = response_info.data["data_sources"]
+        response_info: ResponseInfo = await self.access_manager.make_request(request_info)
+        data_sources_response_json: list[dict[str, Any]] = response_info.data["data_sources"]
 
-        results = []
+        results: list[SubmittedURLInfo] = []
         for data_source in data_sources_response_json:
-            url = data_source["url"]
+            url: str = data_source["url"]
             response_object = SubmittedURLInfo(
                 url_id=url_id_dict[url],
                 data_source_id=data_source["data_source_id"],
@@ -153,25 +191,28 @@ async def sync_agencies(
         self,
         params: AgencySyncParameters
     ) -> AgenciesSyncResponseInfo:
-        url =self.access_manager.build_url(
+        url: str = self.access_manager.build_url(
             namespace=DataSourcesNamespaces.SOURCE_COLLECTOR,
             subdomains=[
                 "agencies",
                 "sync"
             ]
         )
-        headers = await self.access_manager.jwt_header()
-        headers['Content-Type'] = "application/json"
+        headers: dict[str, str] = await self.access_manager.jwt_header()
+        headers['Content-Type']: str = "application/json"
+        request_params: dict[str, Any] = {
+            "page": params.page
+        }
+        if params.cutoff_date is not None:
+            params["updated_at"]: date = params.cutoff_date
+
         request_info = RequestInfo(
             type_=RequestType.GET,
             url=url,
             headers=headers,
-            params={
-                "page": params.page,
-                "updated_at": params.cutoff_date
-            }
+            params=request_params
         )
-        response_info = await self.access_manager.make_request(request_info)
+        response_info: ResponseInfo = await self.access_manager.make_request(request_info)
         return AgenciesSyncResponseInfo(
             agencies=[
                 AgenciesSyncResponseInnerInfo(**entry)
@@ -183,18 +224,18 @@ async def sync_data_sources(
         self,
         params: DataSourcesSyncParameters
     ) -> DataSourcesSyncResponseInfo:
-        url = self.access_manager.build_url(
+        url: str = self.access_manager.build_url(
             namespace=DataSourcesNamespaces.SOURCE_COLLECTOR,
             subdomains=[
                 "data-sources",
                 "sync"
             ]
         )
-        headers = await self.access_manager.jwt_header()
-        headers['Content-Type'] = "application/json"
-        params_dict = {"page": params.page}
+        headers: dict[str, str] = await self.access_manager.jwt_header()
+        headers['Content-Type']: str = "application/json"
+        params_dict: dict[str, Any] = {"page": params.page}
         if params.cutoff_date is not None:
-            params_dict["updated_at"] = params.cutoff_date
+            params_dict["updated_at"]: date = params.cutoff_date
 
         request_info = RequestInfo(
             type_=RequestType.GET,
@@ -202,10 +243,10 @@ async def sync_data_sources(
             headers=headers,
             params=params_dict
         )
-        response_info = await self.access_manager.make_request(request_info)
+        response_info: ResponseInfo = await self.access_manager.make_request(request_info)
         return DataSourcesSyncResponseInfo(
             data_sources=[
                 DataSourcesSyncResponseInnerInfo(**entry)
                 for entry in response_info.data["data_sources"]
             ]
-        )
\ No newline at end of file
+        )
diff --git a/src/external/pdap/dtos/search_agency_by_location/__init__.py b/src/external/pdap/dtos/search_agency_by_location/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/external/pdap/dtos/search_agency_by_location/params.py b/src/external/pdap/dtos/search_agency_by_location/params.py
new file mode 100644
index 00000000..ca5a6213
--- /dev/null
+++ b/src/external/pdap/dtos/search_agency_by_location/params.py
@@ -0,0 +1,11 @@
+from pydantic import BaseModel, Field
+
+
+class SearchAgencyByLocationParams(BaseModel):
+    request_id: int
+    query: str
+    iso: str = Field(
+        description="US State ISO Code",
+        max_length=2,
+
+    )
\ No newline at end of file
diff --git a/src/external/pdap/dtos/search_agency_by_location/response.py b/src/external/pdap/dtos/search_agency_by_location/response.py
new file mode 100644
index 00000000..92242b5a
--- /dev/null
+++ b/src/external/pdap/dtos/search_agency_by_location/response.py
@@ -0,0 +1,12 @@
+from pydantic import BaseModel, Field
+
+class SearchAgencyByLocationAgencyInfo(BaseModel):
+    agency_id: int
+    similarity: float = Field(ge=0, le=1)
+
+class SearchAgencyByLocationResponse(BaseModel):
+    request_id: int
+    results: list[SearchAgencyByLocationAgencyInfo] = Field(min_length=1)
+
+class SearchAgencyByLocationOuterResponse(BaseModel):
+    responses: list[SearchAgencyByLocationResponse]
\ No newline at end of file
diff --git a/src/external/pdap/dtos/sync/agencies.py b/src/external/pdap/dtos/sync/agencies.py
index 99483107..7e569a81 100644
--- a/src/external/pdap/dtos/sync/agencies.py
+++ b/src/external/pdap/dtos/sync/agencies.py
@@ -3,6 +3,8 @@
 
 from pydantic import BaseModel
 
+
+
 class AgenciesSyncResponseInnerInfo(BaseModel):
     display_name: str
     agency_id: int
@@ -10,6 +12,7 @@ class AgenciesSyncResponseInnerInfo(BaseModel):
     county_name: str | None
     locality_name: str | None
     updated_at: datetime.datetime
+    meta_urls: list[str] = []
 
 class AgenciesSyncResponseInfo(BaseModel):
     agencies: list[AgenciesSyncResponseInnerInfo]
diff --git a/src/util/alembic_helpers.py b/src/util/alembic_helpers.py
index 47a24cac..9df2be52 100644
--- a/src/util/alembic_helpers.py
+++ b/src/util/alembic_helpers.py
@@ -8,6 +8,7 @@ def switch_enum_type(
         new_enum_values,
         drop_old_enum=True,
         check_constraints_to_drop: list[str] = None,
+        conversion_mappings: dict[str, str] = None
 ):
     """
     Switches an ENUM type in a PostgreSQL column by:
@@ -21,6 +22,8 @@ def switch_enum_type(
     :param enum_name: Name of the ENUM type in PostgreSQL.
     :param new_enum_values: List of new ENUM values.
     :param drop_old_enum: Whether to drop the old ENUM type.
+    :param check_constraints_to_drop: List of check constraints to drop before switching the ENUM type.
+    :param conversion_mappings: Dictionary of old values to new values for the ENUM type.
     """
 
     # 1. Drop check constraints that reference the enum
@@ -38,7 +41,21 @@ def switch_enum_type(
     new_enum_type.create(op.get_bind())
 
     # Alter the column type to use the new enum type
-    op.execute(f'ALTER TABLE "{table_name}" ALTER COLUMN "{column_name}" TYPE "{enum_name}" USING "{column_name}"::text::{enum_name}')
+    if conversion_mappings is None:
+        op.execute(f'ALTER TABLE "{table_name}" ALTER COLUMN "{column_name}" TYPE "{enum_name}" USING "{column_name}"::text::{enum_name}')
+    if conversion_mappings is not None:
+        case_when: str = ""
+        for old_value, new_value in conversion_mappings.items():
+            case_when += f"WHEN '{old_value}' THEN '{new_value}'\n"
+
+        op.execute(f"""
+            ALTER TABLE "{table_name}"
+            ALTER COLUMN "{column_name}" TYPE "{enum_name}" 
+            USING CASE {column_name}::text
+            {case_when}
+            ELSE "{column_name}"::text
+            END::{enum_name};
+        """)
 
     # Drop the old enum type
     if drop_old_enum:
@@ -86,6 +103,18 @@ def updated_at_column() -> sa.Column:
         comment='The last time the row was updated.'
     )
 
+def task_id_column() -> sa.Column:
+    return sa.Column(
+        'task_id',
+        sa.Integer(),
+        sa.ForeignKey(
+            'tasks.id',
+            ondelete='CASCADE'
+        ),
+        nullable=False,
+        comment='A foreign key to the `tasks` table.'
+    )
+
 def url_id_column(name: str = 'url_id') -> sa.Column:
     return sa.Column(
         name,
@@ -108,4 +137,16 @@ def batch_id_column(nullable=False) -> sa.Column:
         ),
         nullable=nullable,
         comment='A foreign key to the `batches` table.'
+    )
+
+def agency_id_column(nullable=False) -> sa.Column:
+    return sa.Column(
+        'agency_id',
+        sa.Integer(),
+        sa.ForeignKey(
+            'agencies.agency_id',
+            ondelete='CASCADE'
+        ),
+        nullable=nullable,
+        comment='A foreign key to the `agencies` table.'
     )
\ No newline at end of file
diff --git a/tests/automated/integration/api/annotate/__init__.py b/tests/automated/integration/api/annotate/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/api/annotate/agency/__init__.py b/tests/automated/integration/api/annotate/agency/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/api/annotate/agency/test_multiple_auto_suggestions.py b/tests/automated/integration/api/annotate/agency/test_multiple_auto_suggestions.py
new file mode 100644
index 00000000..65b20b0c
--- /dev/null
+++ b/tests/automated/integration/api/annotate/agency/test_multiple_auto_suggestions.py
@@ -0,0 +1,46 @@
+import pytest
+
+from src.core.enums import SuggestionType
+from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo
+
+
+@pytest.mark.asyncio
+async def test_annotate_agency_multiple_auto_suggestions(api_test_helper):
+    """
+    Test Scenario: Multiple Auto Suggestions
+    A URL has multiple Agency Auto Suggestion and has not been annotated by the User
+    The user should receive all of the auto suggestions with full detail
+    """
+    ath = api_test_helper
+    buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls(
+        url_count=1,
+        with_html_content=True
+    )
+    await ath.db_data_creator.auto_suggestions(
+        url_ids=buci.url_ids,
+        num_suggestions=2,
+        suggestion_type=SuggestionType.AUTO_SUGGESTION
+    )
+
+    # User requests next annotation
+    response = await ath.request_validator.get_next_agency_annotation()
+
+    assert response.next_annotation
+    next_annotation = response.next_annotation
+    # Check that url_id matches the one we inserted
+    assert next_annotation.url_info.url_id == buci.url_ids[0]
+
+    # Check that html data is present
+    assert next_annotation.html_info.description != ""
+    assert next_annotation.html_info.title != ""
+
+    # Check that two agency_suggestions exist
+    assert len(next_annotation.agency_suggestions) == 2
+
+    for agency_suggestion in next_annotation.agency_suggestions:
+        assert agency_suggestion.suggestion_type == SuggestionType.AUTO_SUGGESTION
+        assert agency_suggestion.pdap_agency_id is not None
+        assert agency_suggestion.agency_name is not None
+        assert agency_suggestion.state is not None
+        assert agency_suggestion.county is not None
+        assert agency_suggestion.locality is not None
diff --git a/tests/automated/integration/api/annotate/agency/test_multiple_auto_suggestions_no_html.py b/tests/automated/integration/api/annotate/agency/test_multiple_auto_suggestions_no_html.py
new file mode 100644
index 00000000..5bcb4569
--- /dev/null
+++ b/tests/automated/integration/api/annotate/agency/test_multiple_auto_suggestions_no_html.py
@@ -0,0 +1,35 @@
+import pytest
+
+from src.core.enums import SuggestionType
+from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo
+
+
+@pytest.mark.asyncio
+async def test_annotate_agency_multiple_auto_suggestions_no_html(api_test_helper):
+    """
+    Test Scenario: Multiple Auto Suggestions
+    A URL has multiple Agency Auto Suggestion and has not been annotated by the User
+    The user should receive all of the auto suggestions with full detail
+    """
+    ath = api_test_helper
+    buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls(
+        url_count=1,
+        with_html_content=False
+    )
+    await ath.db_data_creator.auto_suggestions(
+        url_ids=buci.url_ids,
+        num_suggestions=2,
+        suggestion_type=SuggestionType.AUTO_SUGGESTION
+    )
+
+    # User requests next annotation
+    response = await ath.request_validator.get_next_agency_annotation()
+
+    assert response.next_annotation
+    next_annotation = response.next_annotation
+    # Check that url_id matches the one we inserted
+    assert next_annotation.url_info.url_id == buci.url_ids[0]
+
+    # Check that html data is not present
+    assert next_annotation.html_info.description == ""
+    assert next_annotation.html_info.title == ""
diff --git a/tests/automated/integration/api/annotate/agency/test_other_user_annotation.py b/tests/automated/integration/api/annotate/agency/test_other_user_annotation.py
new file mode 100644
index 00000000..a3ecae79
--- /dev/null
+++ b/tests/automated/integration/api/annotate/agency/test_other_user_annotation.py
@@ -0,0 +1,44 @@
+import pytest
+
+from tests.automated.integration.api.conftest import MOCK_USER_ID
+from tests.helpers.setup.annotate_agency.core import setup_for_annotate_agency
+from tests.helpers.setup.annotate_agency.model import AnnotateAgencySetupInfo
+
+
+@pytest.mark.asyncio
+async def test_annotate_agency_other_user_annotation(api_test_helper):
+    """
+    Test Scenario: Other User Annotation
+    A URL has been annotated by another User
+    Our user should still receive this URL to annotate
+    """
+    ath = api_test_helper
+    setup_info: AnnotateAgencySetupInfo = await setup_for_annotate_agency(
+        db_data_creator=ath.db_data_creator,
+        url_count=1
+    )
+    url_ids = setup_info.url_ids
+
+    response = await ath.request_validator.get_next_agency_annotation()
+
+    assert response.next_annotation
+    next_annotation = response.next_annotation
+    # Check that url_id matches the one we inserted
+    assert next_annotation.url_info.url_id == url_ids[0]
+
+    # Check that html data is present
+    assert next_annotation.html_info.description != ""
+    assert next_annotation.html_info.title != ""
+
+    # Check that one agency_suggestion exists
+    assert len(next_annotation.agency_suggestions) == 1
+
+    # Test that another user can insert a suggestion
+    await ath.db_data_creator.manual_suggestion(
+        user_id=MOCK_USER_ID + 1,
+        url_id=url_ids[0],
+    )
+
+    # After this, text that our user does not receive this URL
+    response = await ath.request_validator.get_next_agency_annotation()
+    assert response.next_annotation is None
diff --git a/tests/automated/integration/api/annotate/agency/test_single_confirmed_agency.py b/tests/automated/integration/api/annotate/agency/test_single_confirmed_agency.py
new file mode 100644
index 00000000..e38421e1
--- /dev/null
+++ b/tests/automated/integration/api/annotate/agency/test_single_confirmed_agency.py
@@ -0,0 +1,22 @@
+import pytest
+
+from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo
+
+
+@pytest.mark.asyncio
+async def test_annotate_agency_single_confirmed_agency(api_test_helper):
+    """
+    Test Scenario: Single Confirmed Agency
+    A URL has a single Confirmed Agency and has not been annotated by the User
+    The user should not receive this URL to annotate
+    """
+    ath = api_test_helper
+    buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls(
+        url_count=1,
+        with_html_content=True
+    )
+    await ath.db_data_creator.confirmed_suggestions(
+        url_ids=buci.url_ids,
+    )
+    response = await ath.request_validator.get_next_agency_annotation()
+    assert response.next_annotation is None
diff --git a/tests/automated/integration/api/annotate/agency/test_single_unknown_auto_suggestions.py b/tests/automated/integration/api/annotate/agency/test_single_unknown_auto_suggestions.py
new file mode 100644
index 00000000..f911bba5
--- /dev/null
+++ b/tests/automated/integration/api/annotate/agency/test_single_unknown_auto_suggestions.py
@@ -0,0 +1,45 @@
+import pytest
+
+from src.core.enums import SuggestionType
+from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo
+
+
+@pytest.mark.asyncio
+async def test_annotate_agency_single_unknown_auto_suggestion(api_test_helper):
+    """
+    Test Scenario: Single Unknown Auto Suggestion
+    A URL has a single Unknown Agency Auto Suggestion and has not been annotated by the User
+    The user should receive a single Unknown Auto Suggestion lacking other detail
+    """
+    ath = api_test_helper
+    buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls(
+        url_count=1,
+        with_html_content=True
+    )
+    await ath.db_data_creator.auto_suggestions(
+        url_ids=buci.url_ids,
+        num_suggestions=1,
+        suggestion_type=SuggestionType.UNKNOWN
+    )
+    response = await ath.request_validator.get_next_agency_annotation()
+
+    assert response.next_annotation
+    next_annotation = response.next_annotation
+    # Check that url_id matches the one we inserted
+    assert next_annotation.url_info.url_id == buci.url_ids[0]
+
+    # Check that html data is present
+    assert next_annotation.html_info.description != ""
+    assert next_annotation.html_info.title != ""
+
+    # Check that one agency_suggestion exists
+    assert len(next_annotation.agency_suggestions) == 1
+
+    agency_suggestion = next_annotation.agency_suggestions[0]
+
+    assert agency_suggestion.suggestion_type == SuggestionType.UNKNOWN
+    assert agency_suggestion.pdap_agency_id is None
+    assert agency_suggestion.agency_name is None
+    assert agency_suggestion.state is None
+    assert agency_suggestion.county is None
+    assert agency_suggestion.locality is None
diff --git a/tests/automated/integration/api/annotate/agency/test_submit_and_get_next.py b/tests/automated/integration/api/annotate/agency/test_submit_and_get_next.py
new file mode 100644
index 00000000..91049daa
--- /dev/null
+++ b/tests/automated/integration/api/annotate/agency/test_submit_and_get_next.py
@@ -0,0 +1,42 @@
+import pytest
+
+from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo
+from tests.helpers.setup.annotate_agency.core import setup_for_annotate_agency
+from tests.helpers.setup.annotate_agency.model import AnnotateAgencySetupInfo
+
+
+@pytest.mark.asyncio
+async def test_annotate_agency_submit_and_get_next(api_test_helper):
+    """
+    Test Scenario: Submit and Get Next (no other URL available)
+    A URL has been annotated by our User, and no other valid URLs have not been annotated
+    Our user should not receive another URL to annotate
+    Until another relevant URL is added
+    """
+    ath = api_test_helper
+    setup_info: AnnotateAgencySetupInfo = await setup_for_annotate_agency(
+        db_data_creator=ath.db_data_creator,
+        url_count=2
+    )
+    url_ids = setup_info.url_ids
+
+    # User should submit an annotation and receive the next
+    response = await ath.request_validator.post_agency_annotation_and_get_next(
+        url_id=url_ids[0],
+        agency_annotation_post_info=URLAgencyAnnotationPostInfo(
+            suggested_agency=await ath.db_data_creator.agency(),
+            is_new=False
+        )
+
+    )
+    assert response.next_annotation is not None
+
+    # User should submit this annotation and receive none for the next
+    response = await ath.request_validator.post_agency_annotation_and_get_next(
+        url_id=url_ids[1],
+        agency_annotation_post_info=URLAgencyAnnotationPostInfo(
+            suggested_agency=await ath.db_data_creator.agency(),
+            is_new=False
+        )
+    )
+    assert response.next_annotation is None
diff --git a/tests/automated/integration/api/annotate/agency/test_submit_new.py b/tests/automated/integration/api/annotate/agency/test_submit_new.py
new file mode 100644
index 00000000..e82c767f
--- /dev/null
+++ b/tests/automated/integration/api/annotate/agency/test_submit_new.py
@@ -0,0 +1,38 @@
+import pytest
+
+from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo
+from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion
+from tests.helpers.setup.annotate_agency.core import setup_for_annotate_agency
+from tests.helpers.setup.annotate_agency.model import AnnotateAgencySetupInfo
+
+
+@pytest.mark.asyncio
+async def test_annotate_agency_submit_new(api_test_helper):
+    """
+    Test Scenario: Submit New
+    Our user receives an annotation and marks it as `NEW`
+    This should complete successfully
+    And within the database the annotation should be marked as `NEW`
+    """
+    ath = api_test_helper
+    adb_client = ath.adb_client()
+    setup_info: AnnotateAgencySetupInfo = await setup_for_annotate_agency(
+        db_data_creator=ath.db_data_creator,
+        url_count=1
+    )
+    url_ids = setup_info.url_ids
+
+    # User should submit an annotation and mark it as New
+    response = await ath.request_validator.post_agency_annotation_and_get_next(
+        url_id=url_ids[0],
+        agency_annotation_post_info=URLAgencyAnnotationPostInfo(
+            suggested_agency=await ath.db_data_creator.agency(),
+            is_new=True
+        )
+    )
+    assert response.next_annotation is None
+
+    # Within database, the annotation should be marked as `NEW`
+    all_manual_suggestions = await adb_client.get_all(UserUrlAgencySuggestion)
+    assert len(all_manual_suggestions) == 1
+    assert all_manual_suggestions[0].is_new
diff --git a/tests/automated/integration/api/annotate/all/__init__.py b/tests/automated/integration/api/annotate/all/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/api/annotate/all/test_happy_path.py b/tests/automated/integration/api/annotate/all/test_happy_path.py
new file mode 100644
index 00000000..5003f08f
--- /dev/null
+++ b/tests/automated/integration/api/annotate/all/test_happy_path.py
@@ -0,0 +1,88 @@
+import pytest
+
+from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo
+from src.api.endpoints.annotate.all.post.dto import AllAnnotationPostInfo
+from src.core.enums import SuggestedStatus, RecordType
+from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion
+from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion
+from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion
+from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review
+
+
+@pytest.mark.asyncio
+async def test_annotate_all(api_test_helper):
+    """
+    Test the happy path workflow for the all-annotations endpoint
+    The user should be able to get a valid URL (filtering on batch id if needed),
+    submit a full annotation, and receive another URL
+    """
+    ath = api_test_helper
+    adb_client = ath.adb_client()
+    setup_info_1 =  await setup_for_get_next_url_for_final_review(
+        db_data_creator=ath.db_data_creator, include_user_annotations=False
+    )
+    url_mapping_1 = setup_info_1.url_mapping
+    setup_info_2 = await setup_for_get_next_url_for_final_review(
+        db_data_creator=ath.db_data_creator, include_user_annotations=False
+    )
+    url_mapping_2 = setup_info_2.url_mapping
+
+    # First, get a valid URL to annotate
+    get_response_1 = await ath.request_validator.get_next_url_for_all_annotations()
+
+    # Apply the second batch id as a filter and see that a different URL is returned
+    get_response_2 = await ath.request_validator.get_next_url_for_all_annotations(
+        batch_id=setup_info_2.batch_id
+    )
+
+    assert get_response_1.next_annotation.url_info.url_id != get_response_2.next_annotation.url_info.url_id
+
+    # Annotate the first and submit
+    agency_id = await ath.db_data_creator.agency()
+    post_response_1 = await ath.request_validator.post_all_annotations_and_get_next(
+        url_id=url_mapping_1.url_id,
+        all_annotations_post_info=AllAnnotationPostInfo(
+            suggested_status=SuggestedStatus.RELEVANT,
+            record_type=RecordType.ACCIDENT_REPORTS,
+            agency=URLAgencyAnnotationPostInfo(
+                is_new=False,
+                suggested_agency=agency_id
+            )
+        )
+    )
+    assert post_response_1.next_annotation is not None
+
+    # Confirm the second is received
+    assert post_response_1.next_annotation.url_info.url_id == url_mapping_2.url_id
+
+    # Upon submitting the second, confirm that no more URLs are returned through either POST or GET
+    post_response_2 = await ath.request_validator.post_all_annotations_and_get_next(
+        url_id=url_mapping_2.url_id,
+        all_annotations_post_info=AllAnnotationPostInfo(
+            suggested_status=SuggestedStatus.NOT_RELEVANT,
+        )
+    )
+    assert post_response_2.next_annotation is None
+
+    get_response_3 = await ath.request_validator.get_next_url_for_all_annotations()
+    assert get_response_3.next_annotation is None
+
+
+    # Check that all annotations are present in the database
+
+    # Should be two relevance annotations, one True and one False
+    all_relevance_suggestions: list[UserRelevantSuggestion] = await adb_client.get_all(UserRelevantSuggestion)
+    assert len(all_relevance_suggestions) == 2
+    assert all_relevance_suggestions[0].suggested_status == SuggestedStatus.RELEVANT.value
+    assert all_relevance_suggestions[1].suggested_status == SuggestedStatus.NOT_RELEVANT.value
+
+    # Should be one agency
+    all_agency_suggestions = await adb_client.get_all(UserUrlAgencySuggestion)
+    assert len(all_agency_suggestions) == 1
+    assert all_agency_suggestions[0].is_new == False
+    assert all_agency_suggestions[0].agency_id == agency_id
+
+    # Should be one record type
+    all_record_type_suggestions = await adb_client.get_all(UserRecordTypeSuggestion)
+    assert len(all_record_type_suggestions) == 1
+    assert all_record_type_suggestions[0].record_type == RecordType.ACCIDENT_REPORTS.value
diff --git a/tests/automated/integration/api/annotate/all/test_post_batch_filtering.py b/tests/automated/integration/api/annotate/all/test_post_batch_filtering.py
new file mode 100644
index 00000000..a11c43a3
--- /dev/null
+++ b/tests/automated/integration/api/annotate/all/test_post_batch_filtering.py
@@ -0,0 +1,41 @@
+import pytest
+
+from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo
+from src.api.endpoints.annotate.all.post.dto import AllAnnotationPostInfo
+from src.core.enums import SuggestedStatus, RecordType
+from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review
+
+
+@pytest.mark.asyncio
+async def test_annotate_all_post_batch_filtering(api_test_helper):
+    """
+    Batch filtering should also work when posting annotations
+    """
+    ath = api_test_helper
+    adb_client = ath.adb_client()
+    setup_info_1 =  await setup_for_get_next_url_for_final_review(
+        db_data_creator=ath.db_data_creator, include_user_annotations=False
+    )
+    url_mapping_1 = setup_info_1.url_mapping
+    setup_info_2 = await setup_for_get_next_url_for_final_review(
+        db_data_creator=ath.db_data_creator, include_user_annotations=False
+    )
+    setup_info_3 = await setup_for_get_next_url_for_final_review(
+        db_data_creator=ath.db_data_creator, include_user_annotations=False
+    )
+    url_mapping_3 = setup_info_3.url_mapping
+
+    # Submit the first annotation, using the third batch id, and receive the third URL
+    post_response_1 = await ath.request_validator.post_all_annotations_and_get_next(
+        url_id=url_mapping_1.url_id,
+        batch_id=setup_info_3.batch_id,
+        all_annotations_post_info=AllAnnotationPostInfo(
+            suggested_status=SuggestedStatus.RELEVANT,
+            record_type=RecordType.ACCIDENT_REPORTS,
+            agency=URLAgencyAnnotationPostInfo(
+                is_new=True
+            )
+        )
+    )
+
+    assert post_response_1.next_annotation.url_info.url_id == url_mapping_3.url_id
diff --git a/tests/automated/integration/api/annotate/all/test_validation_error.py b/tests/automated/integration/api/annotate/all/test_validation_error.py
new file mode 100644
index 00000000..b805a435
--- /dev/null
+++ b/tests/automated/integration/api/annotate/all/test_validation_error.py
@@ -0,0 +1,27 @@
+import pytest
+
+from src.api.endpoints.annotate.all.post.dto import AllAnnotationPostInfo
+from src.core.enums import SuggestedStatus, RecordType
+from src.core.exceptions import FailedValidationException
+from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review
+
+
+@pytest.mark.asyncio
+async def test_annotate_all_validation_error(api_test_helper):
+    """
+    Validation errors in the PostInfo DTO should result in a 400 BAD REQUEST response
+    """
+    ath = api_test_helper
+    setup_info_1 =  await setup_for_get_next_url_for_final_review(
+        db_data_creator=ath.db_data_creator, include_user_annotations=False
+    )
+    url_mapping_1 = setup_info_1.url_mapping
+
+    with pytest.raises(FailedValidationException) as e:
+        response = await ath.request_validator.post_all_annotations_and_get_next(
+            url_id=url_mapping_1.url_id,
+            all_annotations_post_info=AllAnnotationPostInfo(
+                suggested_status=SuggestedStatus.NOT_RELEVANT,
+                record_type=RecordType.ACCIDENT_REPORTS
+            )
+        )
diff --git a/tests/automated/integration/api/annotate/helpers.py b/tests/automated/integration/api/annotate/helpers.py
new file mode 100644
index 00000000..39cfedab
--- /dev/null
+++ b/tests/automated/integration/api/annotate/helpers.py
@@ -0,0 +1,22 @@
+from src.core.tasks.url.operators.html.scraper.parser.dtos.response_html import ResponseHTMLInfo
+from src.db.dtos.url.mapping import URLMapping
+
+
+def check_url_mappings_match(
+    map_1: URLMapping,
+    map_2: URLMapping
+):
+    assert map_1.url_id == map_2.url_id
+    assert map_2.url == map_2.url
+
+
+def check_html_info_not_empty(
+    html_info: ResponseHTMLInfo
+):
+    assert not html_info_empty(html_info)
+
+
+def html_info_empty(
+    html_info: ResponseHTMLInfo
+) -> bool:
+    return html_info.description == "" and html_info.title == ""
diff --git a/tests/automated/integration/api/annotate/record_type/__init__.py b/tests/automated/integration/api/annotate/record_type/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/api/annotate/record_type/test_record_type.py b/tests/automated/integration/api/annotate/record_type/test_record_type.py
new file mode 100644
index 00000000..5e6d8917
--- /dev/null
+++ b/tests/automated/integration/api/annotate/record_type/test_record_type.py
@@ -0,0 +1,166 @@
+from http import HTTPStatus
+
+import pytest
+from fastapi import HTTPException
+
+from src.api.endpoints.annotate.dtos.record_type.post import RecordTypeAnnotationPostInfo
+from src.api.endpoints.annotate.dtos.record_type.response import GetNextRecordTypeAnnotationResponseOuterInfo
+from src.core.enums import RecordType
+from src.core.error_manager.enums import ErrorTypes
+from src.db.dtos.url.insert import InsertURLsInfo
+from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion
+from tests.automated.integration.api.annotate.helpers import check_url_mappings_match, check_html_info_not_empty, \
+    html_info_empty
+from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo
+
+
+@pytest.mark.asyncio
+async def test_annotate_record_type(api_test_helper):
+    ath = api_test_helper
+
+    batch_id = ath.db_data_creator.batch()
+
+    # Create 2 URLs with outcome `pending`
+    iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2)
+
+    url_1 = iui.url_mappings[0]
+    url_2 = iui.url_mappings[1]
+
+    # Add record type attribute with value `Accident Reports` to 1st URL
+    await ath.db_data_creator.auto_record_type_suggestions(
+        url_id=url_1.url_id,
+        record_type=RecordType.ACCIDENT_REPORTS
+    )
+
+    # Add 'Record Type' attribute with value `Dispatch Recordings` to 2nd URL
+    await ath.db_data_creator.auto_record_type_suggestions(
+        url_id=url_2.url_id,
+        record_type=RecordType.DISPATCH_RECORDINGS
+    )
+
+    # Add HTML data to both
+    await ath.db_data_creator.html_data([url_1.url_id, url_2.url_id])
+
+    # Call `GET` `/annotate/record-type` and receive next URL
+    request_info_1: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_record_type_annotation()
+    inner_info_1 = request_info_1.next_annotation
+
+    check_url_mappings_match(inner_info_1.url_info, url_1)
+    check_html_info_not_empty(inner_info_1.html_info)
+
+    # Validate that the correct record type is returned
+    assert inner_info_1.suggested_record_type == RecordType.ACCIDENT_REPORTS
+
+    # Annotate with value 'Personnel Records' and get next URL
+    request_info_2: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.post_record_type_annotation_and_get_next(
+        url_id=inner_info_1.url_info.url_id,
+        record_type_annotation_post_info=RecordTypeAnnotationPostInfo(
+            record_type=RecordType.PERSONNEL_RECORDS
+        )
+    )
+
+    inner_info_2 = request_info_2.next_annotation
+
+    check_url_mappings_match(inner_info_2.url_info, url_2)
+    check_html_info_not_empty(inner_info_2.html_info)
+
+    request_info_3: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.post_record_type_annotation_and_get_next(
+        url_id=inner_info_2.url_info.url_id,
+        record_type_annotation_post_info=RecordTypeAnnotationPostInfo(
+            record_type=RecordType.ANNUAL_AND_MONTHLY_REPORTS
+        )
+    )
+
+    assert request_info_3.next_annotation is None
+
+    # Get all URL annotations. Confirm they exist for user
+    adb_client = ath.adb_client()
+    results: list[UserRecordTypeSuggestion] = await adb_client.get_all(UserRecordTypeSuggestion)
+    result_1 = results[0]
+    result_2 = results[1]
+
+    assert result_1.url_id == inner_info_1.url_info.url_id
+    assert result_1.record_type == RecordType.PERSONNEL_RECORDS.value
+
+    assert result_2.url_id == inner_info_2.url_info.url_id
+    assert result_2.record_type == RecordType.ANNUAL_AND_MONTHLY_REPORTS.value
+
+    # If user submits annotation for same URL, the URL should be overwritten
+
+    request_info_4: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.post_record_type_annotation_and_get_next(
+        url_id=inner_info_1.url_info.url_id,
+        record_type_annotation_post_info=RecordTypeAnnotationPostInfo(
+            record_type=RecordType.BOOKING_REPORTS
+        )
+    )
+
+    assert request_info_4.next_annotation is None
+
+    results: list[UserRecordTypeSuggestion] = await adb_client.get_all(UserRecordTypeSuggestion)
+    assert len(results) == 2
+
+    for result in results:
+        if result.url_id == inner_info_1.url_info.url_id:
+            assert result.record_type == RecordType.BOOKING_REPORTS.value
+
+
+@pytest.mark.asyncio
+async def test_annotate_record_type_already_annotated_by_different_user(
+        api_test_helper
+):
+    ath = api_test_helper
+
+    creation_info: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls(
+        url_count=1
+    )
+
+    await ath.db_data_creator.user_record_type_suggestion(
+        url_id=creation_info.url_ids[0],
+        user_id=2,
+        record_type=RecordType.ACCIDENT_REPORTS
+    )
+
+    # Annotate with different user (default is 1) and get conflict error
+    try:
+        response = await ath.request_validator.post_record_type_annotation_and_get_next(
+            url_id=creation_info.url_ids[0],
+            record_type_annotation_post_info=RecordTypeAnnotationPostInfo(
+                record_type=RecordType.ANNUAL_AND_MONTHLY_REPORTS
+            )
+        )
+    except HTTPException as e:
+        assert e.status_code == HTTPStatus.CONFLICT
+        assert e.detail["detail"]["code"] == ErrorTypes.ANNOTATION_EXISTS.value
+        assert e.detail["detail"]["message"] == f"Annotation of type RECORD_TYPE already exists for url {creation_info.url_ids[0]}"
+
+
+@pytest.mark.asyncio
+async def test_annotate_record_type_no_html_info(api_test_helper):
+    ath = api_test_helper
+
+    batch_id = ath.db_data_creator.batch()
+
+    # Create 2 URLs with outcome `pending`
+    iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2)
+
+    url_1 = iui.url_mappings[0]
+    url_2 = iui.url_mappings[1]
+
+    # Add record type attribute with value `Accident Reports` to 1st URL
+    await ath.db_data_creator.auto_record_type_suggestions(
+        url_id=url_1.url_id,
+        record_type=RecordType.ACCIDENT_REPORTS
+    )
+
+    # Add 'Record Type' attribute with value `Dispatch Recordings` to 2nd URL
+    await ath.db_data_creator.auto_record_type_suggestions(
+        url_id=url_2.url_id,
+        record_type=RecordType.DISPATCH_RECORDINGS
+    )
+
+    # Call `GET` `/annotate/record-type` and receive next URL
+    request_info_1: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_record_type_annotation()
+    inner_info_1 = request_info_1.next_annotation
+
+    check_url_mappings_match(inner_info_1.url_info, url_1)
+    assert html_info_empty(inner_info_1.html_info)
diff --git a/tests/automated/integration/api/annotate/relevancy/__init__.py b/tests/automated/integration/api/annotate/relevancy/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/api/annotate/relevancy/test_relevancy.py b/tests/automated/integration/api/annotate/relevancy/test_relevancy.py
new file mode 100644
index 00000000..387d68c0
--- /dev/null
+++ b/tests/automated/integration/api/annotate/relevancy/test_relevancy.py
@@ -0,0 +1,213 @@
+from http import HTTPStatus
+
+import pytest
+from fastapi import HTTPException
+
+from src.api.endpoints.annotate.relevance.get.dto import GetNextRelevanceAnnotationResponseOuterInfo
+from src.api.endpoints.annotate.relevance.post.dto import RelevanceAnnotationPostInfo
+from src.core.enums import SuggestedStatus
+from src.core.error_manager.enums import ErrorTypes
+from src.db.dtos.url.insert import InsertURLsInfo
+from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion
+from tests.automated.integration.api.annotate.helpers import check_url_mappings_match, check_html_info_not_empty, \
+    html_info_empty
+from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo
+
+
+@pytest.mark.asyncio
+async def test_annotate_relevancy(api_test_helper):
+    ath = api_test_helper
+
+    batch_id = ath.db_data_creator.batch()
+
+    # Create 2 URLs with outcome `pending`
+    iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2)
+
+    url_1 = iui.url_mappings[0]
+    url_2 = iui.url_mappings[1]
+
+    # Add `Relevancy` attribute with value `True` to 1st URL
+    await ath.db_data_creator.auto_relevant_suggestions(
+        url_id=url_1.url_id,
+        relevant=True
+    )
+
+    # Add 'Relevancy' attribute with value `False` to 2nd URL
+    await ath.db_data_creator.auto_relevant_suggestions(
+        url_id=url_2.url_id,
+        relevant=False
+    )
+
+    # Add HTML data to both
+    await ath.db_data_creator.html_data([url_1.url_id, url_2.url_id])
+    # Call `GET` `/annotate/relevance` and receive next URL
+    request_info_1: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_relevance_annotation()
+    inner_info_1 = request_info_1.next_annotation
+
+    check_url_mappings_match(inner_info_1.url_info, url_1)
+    check_html_info_not_empty(inner_info_1.html_info)
+
+    # Validate that the correct relevant value is returned
+    assert inner_info_1.annotation.is_relevant is True
+
+    # A second user should see the same URL
+
+
+    #  Annotate with value 'False' and get next URL
+    request_info_2: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next(
+        url_id=inner_info_1.url_info.url_id,
+        relevance_annotation_post_info=RelevanceAnnotationPostInfo(
+            suggested_status=SuggestedStatus.NOT_RELEVANT
+        )
+    )
+
+    inner_info_2 = request_info_2.next_annotation
+
+    check_url_mappings_match(
+        inner_info_2.url_info,
+        url_2
+    )
+    check_html_info_not_empty(inner_info_2.html_info)
+
+    request_info_3: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next(
+        url_id=inner_info_2.url_info.url_id,
+        relevance_annotation_post_info=RelevanceAnnotationPostInfo(
+            suggested_status=SuggestedStatus.RELEVANT
+        )
+    )
+
+    assert request_info_3.next_annotation is None
+
+    # Get all URL annotations. Confirm they exist for user
+    adb_client = ath.adb_client()
+    results: list[UserRelevantSuggestion] = await adb_client.get_all(UserRelevantSuggestion)
+    result_1 = results[0]
+    result_2 = results[1]
+
+    assert result_1.url_id == inner_info_1.url_info.url_id
+    assert result_1.suggested_status == SuggestedStatus.NOT_RELEVANT.value
+
+    assert result_2.url_id == inner_info_2.url_info.url_id
+    assert result_2.suggested_status == SuggestedStatus.RELEVANT.value
+
+    # If user submits annotation for same URL, the URL should be overwritten
+    request_info_4: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next(
+        url_id=inner_info_1.url_info.url_id,
+        relevance_annotation_post_info=RelevanceAnnotationPostInfo(
+            suggested_status=SuggestedStatus.RELEVANT
+        )
+    )
+
+    assert request_info_4.next_annotation is None
+
+    results: list[UserRelevantSuggestion] = await adb_client.get_all(UserRelevantSuggestion)
+    assert len(results) == 2
+
+    for result in results:
+        if result.url_id == inner_info_1.url_info.url_id:
+            assert results[0].suggested_status == SuggestedStatus.RELEVANT.value
+
+
+async def post_and_validate_relevancy_annotation(ath, url_id, annotation: SuggestedStatus):
+    response = ath.request_validator.post_relevance_annotation_and_get_next(
+        url_id=url_id,
+        relevance_annotation_post_info=RelevanceAnnotationPostInfo(
+            suggested_status=annotation
+        )
+    )
+
+    assert response.next_annotation is None
+
+    results: list[UserRelevantSuggestion] = await ath.adb_client().get_all(UserRelevantSuggestion)
+    assert len(results) == 1
+    assert results[0].suggested_status == annotation.value
+
+
+@pytest.mark.asyncio
+async def test_annotate_relevancy_broken_page(api_test_helper):
+    ath = api_test_helper
+
+    creation_info = await ath.db_data_creator.batch_and_urls(url_count=1, with_html_content=False)
+
+    await post_and_validate_relevancy_annotation(
+        ath,
+        url_id=creation_info.url_ids[0],
+        annotation=SuggestedStatus.BROKEN_PAGE_404
+    )
+
+
+@pytest.mark.asyncio
+async def test_annotate_relevancy_individual_record(api_test_helper):
+    ath = api_test_helper
+
+    creation_info: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls(
+        url_count=1
+    )
+
+    await post_and_validate_relevancy_annotation(
+        ath,
+        url_id=creation_info.url_ids[0],
+        annotation=SuggestedStatus.INDIVIDUAL_RECORD
+    )
+
+
+@pytest.mark.asyncio
+async def test_annotate_relevancy_already_annotated_by_different_user(
+        api_test_helper
+):
+    ath = api_test_helper
+
+    creation_info: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls(
+        url_count=1
+    )
+
+    await ath.db_data_creator.user_relevant_suggestion(
+        url_id=creation_info.url_ids[0],
+        user_id=2,
+        suggested_status=SuggestedStatus.RELEVANT
+    )
+
+    # Annotate with different user (default is 1) and get conflict error
+    try:
+        response = await ath.request_validator.post_relevance_annotation_and_get_next(
+            url_id=creation_info.url_ids[0],
+            relevance_annotation_post_info=RelevanceAnnotationPostInfo(
+                suggested_status=SuggestedStatus.NOT_RELEVANT
+            )
+        )
+    except HTTPException as e:
+        assert e.status_code == HTTPStatus.CONFLICT
+        assert e.detail["detail"]["code"] == ErrorTypes.ANNOTATION_EXISTS.value
+        assert e.detail["detail"]["message"] == f"Annotation of type RELEVANCE already exists for url {creation_info.url_ids[0]}"
+
+
+@pytest.mark.asyncio
+async def test_annotate_relevancy_no_html(api_test_helper):
+    ath = api_test_helper
+
+    batch_id = ath.db_data_creator.batch()
+
+    # Create 2 URLs with outcome `pending`
+    iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2)
+
+    url_1 = iui.url_mappings[0]
+    url_2 = iui.url_mappings[1]
+
+    # Add `Relevancy` attribute with value `True` to 1st URL
+    await ath.db_data_creator.auto_relevant_suggestions(
+        url_id=url_1.url_id,
+        relevant=True
+    )
+
+    # Add 'Relevancy' attribute with value `False` to 2nd URL
+    await ath.db_data_creator.auto_relevant_suggestions(
+        url_id=url_2.url_id,
+        relevant=False
+    )
+
+    # Call `GET` `/annotate/relevance` and receive next URL
+    request_info_1: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_relevance_annotation()
+    inner_info_1 = request_info_1.next_annotation
+
+    check_url_mappings_match(inner_info_1.url_info, url_1)
+    assert html_info_empty(inner_info_1.html_info)
diff --git a/tests/automated/integration/api/annotate/test_.py b/tests/automated/integration/api/annotate/test_.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/api/batch/__init__.py b/tests/automated/integration/api/batch/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/api/batch/summaries/__init__.py b/tests/automated/integration/api/batch/summaries/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/api/batch/summaries/test_happy_path.py b/tests/automated/integration/api/batch/summaries/test_happy_path.py
new file mode 100644
index 00000000..d91e1a8c
--- /dev/null
+++ b/tests/automated/integration/api/batch/summaries/test_happy_path.py
@@ -0,0 +1,95 @@
+import pytest
+
+from src.core.enums import BatchStatus
+from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
+from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters
+
+
+@pytest.mark.asyncio
+async def test_get_batch_summaries(api_test_helper):
+    ath = api_test_helper
+
+    batch_params = [
+        TestBatchCreationParameters(
+            urls=[
+                TestURLCreationParameters(
+                    count=1,
+                    status=URLCreationEnum.OK
+                ),
+                TestURLCreationParameters(
+                    count=2,
+                    status=URLCreationEnum.SUBMITTED
+                )
+            ]
+        ),
+        TestBatchCreationParameters(
+            urls=[
+                TestURLCreationParameters(
+                    count=4,
+                    status=URLCreationEnum.NOT_RELEVANT
+                ),
+                TestURLCreationParameters(
+                    count=3,
+                    status=URLCreationEnum.ERROR
+                )
+            ]
+        ),
+        TestBatchCreationParameters(
+            urls=[
+                TestURLCreationParameters(
+                    count=7,
+                    status=URLCreationEnum.DUPLICATE
+                ),
+                TestURLCreationParameters(
+                    count=1,
+                    status=URLCreationEnum.SUBMITTED
+                )
+            ]
+        )
+    ]
+
+    batch_1_creation_info = await ath.db_data_creator.batch_v2(batch_params[0])
+    batch_2_creation_info = await ath.db_data_creator.batch_v2(batch_params[1])
+    batch_3_creation_info = await ath.db_data_creator.batch_v2(batch_params[2])
+
+    batch_1_id = batch_1_creation_info.batch_id
+    batch_2_id = batch_2_creation_info.batch_id
+    batch_3_id = batch_3_creation_info.batch_id
+
+
+    response = ath.request_validator.get_batch_statuses()
+    results = response.results
+
+    assert len(results) == 3
+
+    result_1 = results[0]
+    assert result_1.id == batch_1_id
+    assert result_1.status == BatchStatus.READY_TO_LABEL
+    counts_1 = result_1.url_counts
+    assert counts_1.total == 3
+    assert counts_1.pending == 1
+    assert counts_1.submitted == 2
+    assert counts_1.not_relevant == 0
+    assert counts_1.duplicate == 0
+    assert counts_1.errored == 0
+
+    result_2 = results[1]
+    assert result_2.id == batch_2_id
+    counts_2 = result_2.url_counts
+    assert counts_2.total == 7
+    assert counts_2.not_relevant == 4
+    assert counts_2.errored == 3
+    assert counts_2.pending == 3
+    assert counts_2.submitted == 0
+    assert counts_2.duplicate == 0
+
+    result_3 = results[2]
+    assert result_3.id == batch_3_id
+    counts_3 = result_3.url_counts
+    assert counts_3.total == 8
+    assert counts_3.not_relevant == 0
+    assert counts_3.errored == 0
+    assert counts_3.pending == 7
+    assert counts_3.submitted == 1
+    assert counts_3.duplicate == 7
diff --git a/tests/automated/integration/api/batch/summaries/test_pending_url_filter.py b/tests/automated/integration/api/batch/summaries/test_pending_url_filter.py
new file mode 100644
index 00000000..7fdc96b1
--- /dev/null
+++ b/tests/automated/integration/api/batch/summaries/test_pending_url_filter.py
@@ -0,0 +1,75 @@
+import pytest
+
+from src.collectors.enums import CollectorType
+from src.core.enums import BatchStatus
+from src.db.dtos.url.mapping import URLMapping
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
+from tests.helpers.data_creator.core import DBDataCreator
+
+
+@pytest.mark.asyncio
+async def test_get_batch_summaries_pending_url_filter(api_test_helper):
+    ath = api_test_helper
+    dbdc: DBDataCreator = ath.db_data_creator
+
+    # Add an errored out batch
+    batch_error: int = await dbdc.create_batch(status=BatchStatus.ERROR)
+
+    # Add a batch with pending urls
+    batch_pending = await ath.db_data_creator.batch_and_urls(
+        strategy=CollectorType.EXAMPLE,
+        url_count=2,
+        batch_status=BatchStatus.READY_TO_LABEL,
+        with_html_content=True,
+        url_status=URLCreationEnum.OK
+    )
+
+    # Add a batch with submitted URLs
+    batch_submitted: int = await dbdc.create_batch(status=BatchStatus.READY_TO_LABEL)
+    submitted_url_mappings: list[URLMapping] = await dbdc.create_submitted_urls(count=2)
+    submitted_url_ids: list[int] = [url_mapping.url_id for url_mapping in submitted_url_mappings]
+    await dbdc.create_batch_url_links(
+        batch_id=batch_submitted,
+        url_ids=submitted_url_ids
+    )
+
+    # Add an aborted batch
+    batch_aborted: int = await dbdc.create_batch(status=BatchStatus.ABORTED)
+
+    # Add a batch with validated URLs
+    batch_validated: int = await dbdc.create_batch(status=BatchStatus.READY_TO_LABEL)
+    validated_url_mappings: list[URLMapping] = await dbdc.create_validated_urls(
+        count=2
+    )
+    validated_url_ids: list[int] = [url_mapping.url_id for url_mapping in validated_url_mappings]
+    await dbdc.create_batch_url_links(
+        batch_id=batch_validated,
+        url_ids=validated_url_ids
+    )
+
+    # Test filter for pending URLs and only retrieve the second batch
+    pending_urls_results = ath.request_validator.get_batch_statuses(
+        has_pending_urls=True
+    )
+
+    assert len(pending_urls_results.results) == 1
+    assert pending_urls_results.results[0].id == batch_pending.batch_id
+
+    # Test filter without pending URLs and retrieve the other four batches
+    no_pending_urls_results = ath.request_validator.get_batch_statuses(
+        has_pending_urls=False
+    )
+
+    assert len(no_pending_urls_results.results) == 4
+    for result in no_pending_urls_results.results:
+        assert result.id in [
+            batch_error,
+            batch_submitted,
+            batch_validated,
+            batch_aborted
+        ]
+
+    # Test no filter for pending URLs and retrieve all batches
+    no_filter_results = ath.request_validator.get_batch_statuses()
+
+    assert len(no_filter_results.results) == 5
diff --git a/tests/automated/integration/api/batch/test_batch.py b/tests/automated/integration/api/batch/test_batch.py
new file mode 100644
index 00000000..86f35cfc
--- /dev/null
+++ b/tests/automated/integration/api/batch/test_batch.py
@@ -0,0 +1,64 @@
+from src.db.models.impl.batch.pydantic.info import BatchInfo
+from src.db.dtos.url.insert import InsertURLsInfo
+from src.collectors.impl.example.dtos.input import ExampleInputDTO
+from src.core.enums import BatchStatus
+
+
+def test_abort_batch(api_test_helper):
+    ath = api_test_helper
+
+    dto = ExampleInputDTO(
+            sleep_time=1
+        )
+
+    batch_id = ath.request_validator.example_collector(dto=dto)["batch_id"]
+
+    response = ath.request_validator.abort_batch(batch_id=batch_id)
+
+    assert response.message == "Batch aborted."
+
+    bi: BatchInfo = ath.request_validator.get_batch_info(batch_id=batch_id)
+
+    assert bi.status == BatchStatus.ABORTED
+
+def test_get_batch_urls(api_test_helper):
+
+    # Insert batch and urls into database
+    ath = api_test_helper
+    batch_id = ath.db_data_creator.batch()
+    iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=101)
+
+    response = ath.request_validator.get_batch_urls(batch_id=batch_id, page=1)
+    assert len(response.urls) == 100
+    # Check that the first url corresponds to the first url inserted
+    assert response.urls[0].url == iui.url_mappings[0].url
+    # Check that the last url corresponds to the 100th url inserted
+    assert response.urls[-1].url == iui.url_mappings[99].url
+
+
+    # Check that a more limited set of urls exist
+    response = ath.request_validator.get_batch_urls(batch_id=batch_id, page=2)
+    assert len(response.urls) == 1
+    # Check that this url corresponds to the last url inserted
+    assert response.urls[0].url == iui.url_mappings[-1].url
+
+def test_get_duplicate_urls(api_test_helper):
+
+    # Insert batch and url into database
+    ath = api_test_helper
+    batch_id = ath.db_data_creator.batch()
+    iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=101)
+    # Get a list of all url ids
+    url_ids = [url.url_id for url in iui.url_mappings]
+
+    # Create a second batch which will be associated with the duplicates
+    dup_batch_id = ath.db_data_creator.batch()
+
+    # Insert duplicate urls into database
+    ath.db_data_creator.duplicate_urls(duplicate_batch_id=dup_batch_id, url_ids=url_ids)
+
+    response = ath.request_validator.get_batch_url_duplicates(batch_id=dup_batch_id, page=1)
+    assert len(response.duplicates) == 100
+
+    response = ath.request_validator.get_batch_url_duplicates(batch_id=dup_batch_id, page=2)
+    assert len(response.duplicates) == 1
\ No newline at end of file
diff --git a/tests/automated/integration/api/example_collector/test_happy_path.py b/tests/automated/integration/api/example_collector/test_happy_path.py
index bbb52789..d580f546 100644
--- a/tests/automated/integration/api/example_collector/test_happy_path.py
+++ b/tests/automated/integration/api/example_collector/test_happy_path.py
@@ -6,7 +6,7 @@
 from src.api.endpoints.batch.dtos.get.summaries.response import GetBatchSummariesResponse
 from src.api.endpoints.batch.dtos.get.summaries.summary import BatchSummary
 from src.db.client.async_ import AsyncDatabaseClient
-from src.db.models.impl.batch.pydantic import BatchInfo
+from src.db.models.impl.batch.pydantic.info import BatchInfo
 from src.collectors.impl.example.dtos.input import ExampleInputDTO
 from src.collectors.enums import CollectorType
 from src.core.logger import AsyncCoreLogger
diff --git a/tests/automated/integration/api/metrics/batches/test_aggregated.py b/tests/automated/integration/api/metrics/batches/test_aggregated.py
index 084762b9..4b7b4f75 100644
--- a/tests/automated/integration/api/metrics/batches/test_aggregated.py
+++ b/tests/automated/integration/api/metrics/batches/test_aggregated.py
@@ -2,44 +2,65 @@
 
 from src.collectors.enums import CollectorType, URLStatus
 from src.core.enums import BatchStatus
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.dtos.url.mapping import URLMapping
+from src.db.helpers.connect import get_postgres_connection_string
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
 from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters
-from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters
+from tests.helpers.data_creator.create import create_batch, create_url_data_sources, create_urls, \
+    create_batch_url_links, create_validated_flags
+from tests.helpers.setup.wipe import wipe_database
 
 
 @pytest.mark.asyncio
-async def test_get_batches_aggregated_metrics(api_test_helper):
+async def test_get_batches_aggregated_metrics(
+    api_test_helper,
+    wiped_database
+):
     ath = api_test_helper
+    adb_client: AsyncDatabaseClient = ath.adb_client()
     # Create successful batches with URLs of different statuses
-    all_params = []
     for i in range(3):
-        params = TestBatchCreationParameters(
+        batch_id = await create_batch(
+            adb_client=adb_client,
             strategy=CollectorType.MANUAL,
-            urls=[
-                TestURLCreationParameters(
-                    count=1,
-                    status=URLStatus.PENDING
-                ),
-                TestURLCreationParameters(
-                    count=2,
-                    status=URLStatus.SUBMITTED
-                ),
-                TestURLCreationParameters(
-                    count=3,
-                    status=URLStatus.NOT_RELEVANT
-                ),
-                TestURLCreationParameters(
-                    count=4,
-                    status=URLStatus.ERROR
-                ),
-                TestURLCreationParameters(
-                    count=5,
-                    status=URLStatus.VALIDATED
-                )
-            ]
         )
-        all_params.append(params)
-
+        url_mappings_error: list[URLMapping] = await create_urls(
+            adb_client=adb_client,
+            status=URLStatus.ERROR,
+            count=4,
+        )
+        url_mappings_ok: list[URLMapping] = await create_urls(
+            adb_client=adb_client,
+            status=URLStatus.OK,
+            count=11,
+        )
+        url_mappings_all: list[URLMapping] = url_mappings_error + url_mappings_ok
+        url_ids_all: list[int] = [url_mapping.url_id for url_mapping in url_mappings_all]
+        await create_batch_url_links(
+            adb_client=adb_client,
+            batch_id=batch_id,
+            url_ids=url_ids_all,
+        )
+        urls_submitted: list[int] = url_ids_all[:2]
+        urls_not_relevant: list[int] = url_ids_all[2:5]
+        urls_validated: list[int] = url_ids_all[5:10]
+        await create_validated_flags(
+            adb_client=adb_client,
+            url_ids=urls_validated + urls_submitted,
+            validation_type=URLValidatedType.DATA_SOURCE,
+        )
+        await create_validated_flags(
+            adb_client=adb_client,
+            url_ids=urls_not_relevant,
+            validation_type=URLValidatedType.NOT_RELEVANT,
+        )
+        await create_url_data_sources(
+            adb_client=adb_client,
+            url_ids=urls_submitted,
+        )
 
+    all_params = []
     # Create failed batches
     for i in range(2):
         params = TestBatchCreationParameters(
@@ -66,8 +87,8 @@ async def test_get_batches_aggregated_metrics(api_test_helper):
     assert inner_dto_manual.count_urls == 45
     assert inner_dto_manual.count_successful_batches == 3
     assert inner_dto_manual.count_failed_batches == 0
-    assert inner_dto_manual.count_urls_pending == 3
+    assert inner_dto_manual.count_urls_pending == 15
     assert inner_dto_manual.count_urls_submitted == 6
     assert inner_dto_manual.count_urls_rejected == 9
     assert inner_dto_manual.count_urls_errors == 12
-    assert inner_dto_manual.count_urls_validated == 15
+    assert inner_dto_manual.count_urls_validated == 30
diff --git a/tests/automated/integration/api/metrics/batches/test_breakdown.py b/tests/automated/integration/api/metrics/batches/test_breakdown.py
index 0cce8740..0657c66f 100644
--- a/tests/automated/integration/api/metrics/batches/test_breakdown.py
+++ b/tests/automated/integration/api/metrics/batches/test_breakdown.py
@@ -1,79 +1,102 @@
+from datetime import datetime, timedelta
+
 import pendulum
 import pytest
 
 from src.collectors.enums import CollectorType, URLStatus
 from src.core.enums import BatchStatus
-from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters
-from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from tests.helpers.data_creator.create import create_batch, create_urls, create_batch_url_links, create_validated_flags, \
+    create_url_data_sources
 
 
 @pytest.mark.asyncio
 async def test_get_batches_breakdown_metrics(api_test_helper):
     # Create a different batch for each month, with different URLs
-    today = pendulum.parse('2021-01-01')
+    today = datetime.now()
     ath = api_test_helper
+    adb_client: AsyncDatabaseClient = ath.adb_client()
 
-    batch_1_params = TestBatchCreationParameters(
+    batch_id_1 = await create_batch(
+        adb_client=adb_client,
         strategy=CollectorType.MANUAL,
-        urls=[
-            TestURLCreationParameters(
-                count=1,
-                status=URLStatus.PENDING
-            ),
-            TestURLCreationParameters(
-                count=2,
-                status=URLStatus.SUBMITTED
-            ),
-        ]
     )
-    batch_1 = await ath.db_data_creator.batch_v2(batch_1_params)
-    batch_2_params = TestBatchCreationParameters(
-        strategy=CollectorType.EXAMPLE,
-        outcome=BatchStatus.ERROR,
-        created_at=today.subtract(weeks=1),
+    url_mappings_1: list[URLMapping] = await create_urls(
+        adb_client=adb_client,
+        count=3,
+    )
+    url_ids_1: list[int] = [url_mapping.url_id for url_mapping in url_mappings_1]
+    await create_batch_url_links(adb_client=adb_client, batch_id=batch_id_1, url_ids=url_ids_1)
+    await create_validated_flags(
+        adb_client=adb_client,
+        url_ids=url_ids_1[:2],
+        validation_type=URLValidatedType.DATA_SOURCE
+    )
+    await create_url_data_sources(
+        adb_client=adb_client,
+        url_ids=url_ids_1[:2],
     )
-    batch_2 = await ath.db_data_creator.batch_v2(batch_2_params)
-    batch_3_params = TestBatchCreationParameters(
+
+    batch_id_2 = await create_batch(
+        adb_client=adb_client,
+        status=BatchStatus.ERROR,
+        date_generated=today - timedelta(days=7),
+    )
+
+    batch_id_3 = await create_batch(
+        adb_client=adb_client,
         strategy=CollectorType.AUTO_GOOGLER,
-        created_at=today.subtract(weeks=2),
-        urls=[
-            TestURLCreationParameters(
-                count=3,
-                status=URLStatus.NOT_RELEVANT
-            ),
-            TestURLCreationParameters(
-                count=4,
-                status=URLStatus.ERROR
-            ),
-            TestURLCreationParameters(
-                count=5,
-                status=URLStatus.VALIDATED
-            ),
-        ]
+        date_generated=today - timedelta(days=14)
     )
-    batch_3 = await ath.db_data_creator.batch_v2(batch_3_params)
+    error_url_mappings: list[URLMapping] = await create_urls(
+        adb_client=adb_client,
+        status=URLStatus.ERROR,
+        count=4,
+    )
+    error_url_ids: list[int] = [url_mapping.url_id for url_mapping in error_url_mappings]
+    validated_url_mappings: list[URLMapping] = await create_urls(
+        adb_client=adb_client,
+        count=8,
+    )
+    validated_url_ids: list[int] = [url_mapping.url_id for url_mapping in validated_url_mappings]
+    await create_validated_flags(
+        adb_client=adb_client,
+        url_ids=validated_url_ids[:3],
+        validation_type=URLValidatedType.NOT_RELEVANT,
+    )
+    await create_validated_flags(
+        adb_client=adb_client,
+        url_ids=validated_url_ids[4:9],
+        validation_type=URLValidatedType.DATA_SOURCE,
+    )
+    await create_batch_url_links(
+        adb_client=adb_client,
+        batch_id=batch_id_3,
+        url_ids=error_url_ids + validated_url_ids,
+    )
+
 
     dto_1 = await ath.request_validator.get_batches_breakdown_metrics(
         page=1
     )
     assert len(dto_1.batches) == 3
     dto_batch_1 = dto_1.batches[2]
-    assert dto_batch_1.batch_id == batch_1.batch_id
+    assert dto_batch_1.batch_id == batch_id_1
     assert dto_batch_1.strategy == CollectorType.MANUAL
     assert dto_batch_1.status == BatchStatus.READY_TO_LABEL
-    assert pendulum.instance(dto_batch_1.created_at) > today
     assert dto_batch_1.count_url_total == 3
     assert dto_batch_1.count_url_pending == 1
     assert dto_batch_1.count_url_submitted == 2
     assert dto_batch_1.count_url_rejected == 0
     assert dto_batch_1.count_url_error == 0
-    assert dto_batch_1.count_url_validated == 0
+    assert dto_batch_1.count_url_validated == 2
 
     dto_batch_2 = dto_1.batches[1]
-    assert dto_batch_2.batch_id == batch_2.batch_id
+    assert dto_batch_2.batch_id == batch_id_2
     assert dto_batch_2.status == BatchStatus.ERROR
     assert dto_batch_2.strategy == CollectorType.EXAMPLE
-    assert pendulum.instance(dto_batch_2.created_at) == today.subtract(weeks=1)
     assert dto_batch_2.count_url_total == 0
     assert dto_batch_2.count_url_submitted == 0
     assert dto_batch_2.count_url_pending == 0
@@ -82,16 +105,15 @@ async def test_get_batches_breakdown_metrics(api_test_helper):
     assert dto_batch_2.count_url_validated == 0
 
     dto_batch_3 = dto_1.batches[0]
-    assert dto_batch_3.batch_id == batch_3.batch_id
+    assert dto_batch_3.batch_id == batch_id_3
     assert dto_batch_3.status == BatchStatus.READY_TO_LABEL
     assert dto_batch_3.strategy == CollectorType.AUTO_GOOGLER
-    assert pendulum.instance(dto_batch_3.created_at) == today.subtract(weeks=2)
     assert dto_batch_3.count_url_total == 12
-    assert dto_batch_3.count_url_pending == 0
+    assert dto_batch_3.count_url_pending == 5
     assert dto_batch_3.count_url_submitted == 0
     assert dto_batch_3.count_url_rejected == 3
     assert dto_batch_3.count_url_error == 4
-    assert dto_batch_3.count_url_validated == 5
+    assert dto_batch_3.count_url_validated == 7
 
     dto_2 = await ath.request_validator.get_batches_breakdown_metrics(
         page=2
diff --git a/tests/automated/integration/api/metrics/test_backlog.py b/tests/automated/integration/api/metrics/test_backlog.py
index a6807a23..e48db202 100644
--- a/tests/automated/integration/api/metrics/test_backlog.py
+++ b/tests/automated/integration/api/metrics/test_backlog.py
@@ -3,9 +3,13 @@
 
 from src.collectors.enums import CollectorType, URLStatus
 from src.core.enums import SuggestedStatus
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
 from tests.helpers.batch_creation_parameters.annotation_info import AnnotationInfo
 from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters
+from tests.helpers.data_creator.core import DBDataCreator
 
 
 @pytest.mark.asyncio
@@ -14,29 +18,22 @@ async def test_get_backlog_metrics(api_test_helper):
 
     ath = api_test_helper
     adb_client = ath.adb_client()
+    ddc: DBDataCreator = ath.db_data_creator
 
 
     # Populate the backlog table and test that backlog metrics returned on a monthly basis
     # Ensure that multiple days in each month are added to the backlog table, with different values
 
-
-    batch_1_params = TestBatchCreationParameters(
-        strategy=CollectorType.MANUAL,
-        urls=[
-            TestURLCreationParameters(
-                count=1,
-                status=URLStatus.PENDING,
-                annotation_info=AnnotationInfo(
-                    user_relevant=SuggestedStatus.NOT_RELEVANT
-                )
-            ),
-            TestURLCreationParameters(
-                count=2,
-                status=URLStatus.SUBMITTED
-            ),
-        ]
+    batch_1_id: int = await ddc.create_batch()
+    url_mappings_1: list[URLMapping] = await ddc.create_urls(count=3)
+    url_ids_1: list[int] = [url_mapping.url_id for url_mapping in url_mappings_1]
+    await ddc.create_batch_url_links(url_ids=url_ids_1, batch_id=batch_1_id)
+    submitted_url_ids_1: list[int] = url_ids_1[:2]
+    await ddc.create_validated_flags(
+        url_ids=submitted_url_ids_1,
+        validation_type=URLValidatedType.DATA_SOURCE
     )
-    batch_1 = await ath.db_data_creator.batch_v2(batch_1_params)
+    await ddc.create_url_data_sources(url_ids=submitted_url_ids_1)
 
     await adb_client.populate_backlog_snapshot(
         dt=today.subtract(months=3).naive()
@@ -46,23 +43,20 @@ async def test_get_backlog_metrics(api_test_helper):
         dt=today.subtract(months=2, days=3).naive()
     )
 
-    batch_2_params = TestBatchCreationParameters(
-        strategy=CollectorType.AUTO_GOOGLER,
-        urls=[
-            TestURLCreationParameters(
-                count=4,
-                status=URLStatus.PENDING,
-                annotation_info=AnnotationInfo(
-                    user_relevant=SuggestedStatus.NOT_RELEVANT
-                )
-            ),
-            TestURLCreationParameters(
-                count=2,
-                status=URLStatus.ERROR
-            ),
-        ]
+    batch_2_id: int = await ddc.create_batch()
+    not_relevant_url_mappings_2: list[URLMapping] = await ddc.create_urls(count=6)
+    not_relevant_url_ids_2: list[int] = [url_mapping.url_id for url_mapping in not_relevant_url_mappings_2]
+    await ddc.create_batch_url_links(url_ids=not_relevant_url_ids_2, batch_id=batch_2_id)
+    await ddc.create_validated_flags(
+        url_ids=not_relevant_url_ids_2[:4],
+        validation_type=URLValidatedType.NOT_RELEVANT
+    )
+    error_url_mappings_2: list[URLMapping] = await ddc.create_urls(
+        status=URLStatus.ERROR,
+        count=2
     )
-    batch_2 = await ath.db_data_creator.batch_v2(batch_2_params)
+    error_url_ids_2: list[int] = [url_mapping.url_id for url_mapping in error_url_mappings_2]
+    await ddc.create_batch_url_links(url_ids=error_url_ids_2, batch_id=batch_2_id)
 
     await adb_client.populate_backlog_snapshot(
         dt=today.subtract(months=2).naive()
@@ -72,23 +66,15 @@ async def test_get_backlog_metrics(api_test_helper):
         dt=today.subtract(months=1, days=4).naive()
     )
 
-    batch_3_params = TestBatchCreationParameters(
-        strategy=CollectorType.AUTO_GOOGLER,
-        urls=[
-            TestURLCreationParameters(
-                count=7,
-                status=URLStatus.PENDING,
-                annotation_info=AnnotationInfo(
-                    user_relevant=SuggestedStatus.NOT_RELEVANT
-                )
-            ),
-            TestURLCreationParameters(
-                count=5,
-                status=URLStatus.VALIDATED
-            ),
-        ]
+    batch_3_id: int = await ddc.create_batch()
+    url_mappings_3: list[URLMapping] = await ddc.create_urls(count=12)
+    url_ids_3: list[int] = [url_mapping.url_id for url_mapping in url_mappings_3]
+    await ddc.create_batch_url_links(url_ids=url_ids_3, batch_id=batch_3_id)
+    await ddc.create_validated_flags(
+        url_ids=url_ids_3[:5],
+        validation_type=URLValidatedType.DATA_SOURCE
     )
-    batch_3 = await ath.db_data_creator.batch_v2(batch_3_params)
+
 
     await adb_client.populate_backlog_snapshot(
         dt=today.subtract(months=1).naive()
@@ -100,5 +86,5 @@ async def test_get_backlog_metrics(api_test_helper):
 
     # Test that the count closest to the beginning of the month is returned for each month
     assert dto.entries[0].count_pending_total == 1
-    assert dto.entries[1].count_pending_total == 5
-    assert dto.entries[2].count_pending_total == 12
+    assert dto.entries[1].count_pending_total == 3
+    assert dto.entries[2].count_pending_total == 10
diff --git a/tests/automated/integration/api/metrics/urls/aggregated/test_core.py b/tests/automated/integration/api/metrics/urls/aggregated/test_core.py
index c8957952..08c52845 100644
--- a/tests/automated/integration/api/metrics/urls/aggregated/test_core.py
+++ b/tests/automated/integration/api/metrics/urls/aggregated/test_core.py
@@ -1,75 +1,70 @@
+from datetime import datetime, timedelta, timezone
+
 import pendulum
 import pytest
 
 from src.collectors.enums import CollectorType, URLStatus
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
 from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters
+from tests.helpers.data_creator.core import DBDataCreator
 
 
 @pytest.mark.asyncio
 async def test_get_urls_aggregated_metrics(api_test_helper):
     ath = api_test_helper
-    today = pendulum.parse('2021-01-01')
+    today = datetime.now()
+
+    ddc: DBDataCreator = ath.db_data_creator
 
     batch_0_params = TestBatchCreationParameters(
         strategy=CollectorType.MANUAL,
-        created_at=today.subtract(days=1),
+        created_at=today - timedelta(days=1),
         urls=[
             TestURLCreationParameters(
                 count=1,
-                status=URLStatus.PENDING,
+                status=URLCreationEnum.OK,
             ),
         ]
     )
-    batch_0 = await ath.db_data_creator.batch_v2(batch_0_params)
-    oldest_url_id = batch_0.urls_by_status[URLStatus.PENDING].url_mappings[0].url_id
-
+    batch_0: int = await ddc.create_batch(
+        strategy=CollectorType.MANUAL,
+        date_generated=today - timedelta(days=1)
+    )
+    url_mappings_0: list[URLMapping] = await ddc.create_urls(batch_id=batch_0)
+    oldest_url_id: int = url_mappings_0[0].url_id
 
-    batch_1_params = TestBatchCreationParameters(
+    batch_1: int = await ddc.create_batch(
         strategy=CollectorType.MANUAL,
-        urls=[
-            TestURLCreationParameters(
-                count=1,
-                status=URLStatus.PENDING,
-            ),
-            TestURLCreationParameters(
-                count=2,
-                status=URLStatus.SUBMITTED
-            ),
-        ]
     )
-    batch_1 = await ath.db_data_creator.batch_v2(batch_1_params)
+    url_mappings_1_ok: list[URLMapping] = await ddc.create_urls(batch_id=batch_1, count=1)
+    url_mappings_1_submitted: list[URLMapping] = await ddc.create_submitted_urls(count=2)
+    url_ids_1_submitted: list[int] = [url_mapping.url_id for url_mapping in url_mappings_1_submitted]
+    await ddc.create_batch_url_links(url_ids=url_ids_1_submitted, batch_id=batch_1)
 
-    batch_2_params = TestBatchCreationParameters(
+    batch_2: int = await ddc.create_batch(
         strategy=CollectorType.AUTO_GOOGLER,
-        urls=[
-            TestURLCreationParameters(
-                count=4,
-                status=URLStatus.PENDING,
-            ),
-            TestURLCreationParameters(
-                count=2,
-                status=URLStatus.ERROR
-            ),
-            TestURLCreationParameters(
-                count=1,
-                status=URLStatus.VALIDATED
-            ),
-            TestURLCreationParameters(
-                count=5,
-                status=URLStatus.NOT_RELEVANT
-            ),
-        ]
     )
-    batch_2 = await ath.db_data_creator.batch_v2(batch_2_params)
+    url_mappings_2_ok: list[URLMapping] = await ddc.create_urls(batch_id=batch_2, count=4, status=URLStatus.OK)
+    url_mappings_2_error: list[URLMapping] = await ddc.create_urls(batch_id=batch_2, count=2, status=URLStatus.ERROR)
+    url_mappings_2_validated: list[URLMapping] = await ddc.create_validated_urls(count=1, validation_type=URLValidatedType.DATA_SOURCE)
+    url_mappings_2_not_relevant: list[URLMapping] = await ddc.create_validated_urls(count=5, validation_type=URLValidatedType.NOT_RELEVANT)
+    url_ids_2_validated: list[int] = [url_mapping.url_id for url_mapping in url_mappings_2_validated]
+    url_ids_2_not_relevant: list[int] = [url_mapping.url_id for url_mapping in url_mappings_2_not_relevant]
+    await ddc.create_batch_url_links(
+        url_ids=url_ids_2_validated + url_ids_2_not_relevant,
+        batch_id=batch_2
+    )
+
+
 
     dto = await ath.request_validator.get_urls_aggregated_metrics()
 
     assert dto.oldest_pending_url_id == oldest_url_id
-    assert dto.oldest_pending_url_created_at == today.subtract(days=1).in_timezone('UTC').naive()
-    assert dto.count_urls_pending == 6
     assert dto.count_urls_rejected == 5
     assert dto.count_urls_errors == 2
-    assert dto.count_urls_validated == 1
+    assert dto.count_urls_validated == 8
     assert dto.count_urls_submitted == 2
     assert dto.count_urls_total == 16
diff --git a/tests/automated/integration/api/metrics/urls/breakdown/test_pending.py b/tests/automated/integration/api/metrics/urls/breakdown/test_pending.py
index e81d6ec7..02f1aae2 100644
--- a/tests/automated/integration/api/metrics/urls/breakdown/test_pending.py
+++ b/tests/automated/integration/api/metrics/urls/breakdown/test_pending.py
@@ -6,6 +6,7 @@
 from src.core.enums import SuggestedStatus, RecordType
 from tests.helpers.batch_creation_parameters.annotation_info import AnnotationInfo
 from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters
 
 
@@ -27,14 +28,14 @@ async def test_get_urls_breakdown_pending_metrics(api_test_helper):
         urls=[
             TestURLCreationParameters(
                 count=1,
-                status=URLStatus.PENDING,
+                status=URLCreationEnum.OK,
                 annotation_info=AnnotationInfo(
                     user_relevant=SuggestedStatus.NOT_RELEVANT
                 )
             ),
             TestURLCreationParameters(
                 count=2,
-                status=URLStatus.SUBMITTED
+                status=URLCreationEnum.SUBMITTED
             ),
         ]
     )
@@ -44,7 +45,7 @@ async def test_get_urls_breakdown_pending_metrics(api_test_helper):
         urls=[
             TestURLCreationParameters(
                 count=3,
-                status=URLStatus.PENDING,
+                status=URLCreationEnum.OK,
                 annotation_info=AnnotationInfo(
                     user_relevant=SuggestedStatus.RELEVANT,
                     user_record_type=RecordType.CALLS_FOR_SERVICE
@@ -60,15 +61,15 @@ async def test_get_urls_breakdown_pending_metrics(api_test_helper):
         urls=[
             TestURLCreationParameters(
                 count=3,
-                status=URLStatus.SUBMITTED
+                status=URLCreationEnum.SUBMITTED
             ),
             TestURLCreationParameters(
                 count=4,
-                status=URLStatus.ERROR
+                status=URLCreationEnum.ERROR
             ),
             TestURLCreationParameters(
                 count=5,
-                status=URLStatus.PENDING,
+                status=URLCreationEnum.OK,
                 annotation_info=AnnotationInfo(
                     user_relevant=SuggestedStatus.RELEVANT,
                     user_record_type=RecordType.INCARCERATION_RECORDS,
diff --git a/tests/automated/integration/api/metrics/urls/breakdown/test_submitted.py b/tests/automated/integration/api/metrics/urls/breakdown/test_submitted.py
index 71e00e51..cbd30f8b 100644
--- a/tests/automated/integration/api/metrics/urls/breakdown/test_submitted.py
+++ b/tests/automated/integration/api/metrics/urls/breakdown/test_submitted.py
@@ -3,6 +3,7 @@
 
 from src.collectors.enums import CollectorType, URLStatus
 from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters
 
 
@@ -18,11 +19,11 @@ async def test_get_urls_breakdown_submitted_metrics(api_test_helper):
         urls=[
             TestURLCreationParameters(
                 count=1,
-                status=URLStatus.PENDING
+                status=URLCreationEnum.OK
             ),
             TestURLCreationParameters(
                 count=2,
-                status=URLStatus.SUBMITTED
+                status=URLCreationEnum.SUBMITTED
             ),
         ]
     )
@@ -32,7 +33,7 @@ async def test_get_urls_breakdown_submitted_metrics(api_test_helper):
         urls=[
             TestURLCreationParameters(
                 count=3,
-                status=URLStatus.SUBMITTED
+                status=URLCreationEnum.SUBMITTED
             )
         ],
         created_at=today.subtract(weeks=1),
@@ -44,15 +45,15 @@ async def test_get_urls_breakdown_submitted_metrics(api_test_helper):
         urls=[
             TestURLCreationParameters(
                 count=3,
-                status=URLStatus.SUBMITTED
+                status=URLCreationEnum.SUBMITTED
             ),
             TestURLCreationParameters(
                 count=4,
-                status=URLStatus.ERROR
+                status=URLCreationEnum.ERROR
             ),
             TestURLCreationParameters(
                 count=5,
-                status=URLStatus.VALIDATED
+                status=URLCreationEnum.VALIDATED
             ),
         ]
     )
diff --git a/tests/automated/integration/api/review/conftest.py b/tests/automated/integration/api/review/conftest.py
index e4345821..59d76930 100644
--- a/tests/automated/integration/api/review/conftest.py
+++ b/tests/automated/integration/api/review/conftest.py
@@ -5,32 +5,18 @@
 from src.core.enums import SuggestedStatus, RecordType
 from tests.helpers.batch_creation_parameters.annotation_info import AnnotationInfo
 from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters
 
 
 @pytest_asyncio.fixture
 async def batch_url_creation_info(db_data_creator):
-    simple_parameter_statuses = [
-        URLStatus.VALIDATED,
-        URLStatus.SUBMITTED,
-        URLStatus.INDIVIDUAL_RECORD,
-        URLStatus.NOT_RELEVANT,
-        URLStatus.ERROR,
-        URLStatus.DUPLICATE,
-        URLStatus.NOT_FOUND
-    ]
-    simple_parameters = [
-        TestURLCreationParameters(
-            status=status
-        ) for status in simple_parameter_statuses
-    ]
 
     parameters = TestBatchCreationParameters(
         urls=[
-            *simple_parameters,
             TestURLCreationParameters(
                 count=2,
-                status=URLStatus.PENDING,
+                status=URLCreationEnum.OK,
                 annotation_info=AnnotationInfo(
                     user_relevant=SuggestedStatus.RELEVANT,
                     user_record_type=RecordType.ARREST_RECORDS,
diff --git a/tests/automated/integration/api/review/rejection/test_individual_record.py b/tests/automated/integration/api/review/rejection/test_individual_record.py
index 6e81d378..33addd91 100644
--- a/tests/automated/integration/api/review/rejection/test_individual_record.py
+++ b/tests/automated/integration/api/review/rejection/test_individual_record.py
@@ -2,14 +2,21 @@
 
 from src.api.endpoints.review.enums import RejectionReason
 from src.collectors.enums import URLStatus
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from tests.automated.integration.api.review.rejection.helpers import run_rejection_test
+from tests.helpers.api_test_helper import APITestHelper
 
 
 @pytest.mark.asyncio
-async def test_rejection_individual_record(api_test_helper):
+async def test_rejection_individual_record(api_test_helper: APITestHelper):
     await run_rejection_test(
         api_test_helper,
         rejection_reason=RejectionReason.INDIVIDUAL_RECORD,
-        url_status=URLStatus.INDIVIDUAL_RECORD
+        url_status=URLStatus.OK
     )
 
+    # Get FlagURLValidated and confirm Individual Record
+    flag: FlagURLValidated = (await api_test_helper.adb_client().get_all(FlagURLValidated))[0]
+    assert flag.type == URLValidatedType.INDIVIDUAL_RECORD
+
diff --git a/tests/automated/integration/api/review/rejection/test_not_relevant.py b/tests/automated/integration/api/review/rejection/test_not_relevant.py
index 1ad2847f..03ee72d3 100644
--- a/tests/automated/integration/api/review/rejection/test_not_relevant.py
+++ b/tests/automated/integration/api/review/rejection/test_not_relevant.py
@@ -2,6 +2,8 @@
 
 from src.api.endpoints.review.enums import RejectionReason
 from src.collectors.enums import URLStatus
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from tests.automated.integration.api.review.rejection.helpers import run_rejection_test
 
 
@@ -10,5 +12,9 @@ async def test_rejection_not_relevant(api_test_helper):
     await run_rejection_test(
         api_test_helper,
         rejection_reason=RejectionReason.NOT_RELEVANT,
-        url_status=URLStatus.NOT_RELEVANT
+        url_status=URLStatus.OK
     )
+
+    # Get FlagURLValidated and confirm Not Relevant
+    flag: FlagURLValidated = (await api_test_helper.adb_client().get_all(FlagURLValidated))[0]
+    assert flag.type == URLValidatedType.NOT_RELEVANT
\ No newline at end of file
diff --git a/tests/automated/integration/api/review/test_approve_and_get_next_source.py b/tests/automated/integration/api/review/test_approve_and_get_next_source.py
index bfa126b1..69cf13d2 100644
--- a/tests/automated/integration/api/review/test_approve_and_get_next_source.py
+++ b/tests/automated/integration/api/review/test_approve_and_get_next_source.py
@@ -6,6 +6,8 @@
 from src.core.enums import RecordType
 from src.db.constants import PLACEHOLDER_AGENCY_NAME
 from src.db.models.impl.agency.sqlalchemy import Agency
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata
@@ -55,7 +57,7 @@ async def test_approve_and_get_next_source_for_review(api_test_helper):
     url = urls[0]
     assert url.id == url_mapping.url_id
     assert url.record_type == RecordType.ARREST_RECORDS
-    assert url.status == URLStatus.VALIDATED
+    assert url.status == URLStatus.OK
     assert url.name == "New Test Name"
     assert url.description == "New Test Description"
 
@@ -76,3 +78,8 @@ async def test_approve_and_get_next_source_for_review(api_test_helper):
     for agency in agencies:
         if agency.agency_id == additional_agency:
             assert agency.name == PLACEHOLDER_AGENCY_NAME
+
+    # Confirm presence of FlagURLValidated
+    flag_url_validated = await adb_client.get_all(FlagURLValidated)
+    assert len(flag_url_validated) == 1
+    assert flag_url_validated[0].type == URLValidatedType.DATA_SOURCE
\ No newline at end of file
diff --git a/tests/automated/integration/api/review/test_batch_filtering.py b/tests/automated/integration/api/review/test_batch_filtering.py
index 2e8aa63c..481f7e90 100644
--- a/tests/automated/integration/api/review/test_batch_filtering.py
+++ b/tests/automated/integration/api/review/test_batch_filtering.py
@@ -1,21 +1,37 @@
 import pytest
 
+from src.collectors.enums import URLStatus
+from src.db.dtos.url.mapping import URLMapping
+from tests.helpers.data_creator.core import DBDataCreator
+from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo
+
 
 @pytest.mark.asyncio
 async def test_batch_filtering(
-    batch_url_creation_info,
+    batch_url_creation_info: BatchURLCreationInfo,
     api_test_helper
 ):
     ath = api_test_helper
     rv = ath.request_validator
 
+    dbdc: DBDataCreator = ath.db_data_creator
+
+    batch_id: int = batch_url_creation_info.batch_id
+
+    validated_url_mappings: list[URLMapping] = await dbdc.create_validated_urls(count=4)
+    validated_url_ids: list[int] = [url_mapping.url_id for url_mapping in validated_url_mappings]
+    await dbdc.create_batch_url_links(
+        url_ids=validated_url_ids,
+        batch_id=batch_id
+    )
+
     # Receive null batch info if batch id not provided
     outer_result_no_batch_info = await rv.review_next_source()
     assert outer_result_no_batch_info.next_source.batch_info is None
 
     # Get batch info if batch id is provided
     outer_result = await ath.request_validator.review_next_source(
-        batch_id=batch_url_creation_info.batch_id
+        batch_id=batch_id
     )
     assert outer_result.remaining == 2
     batch_info = outer_result.next_source.batch_info
diff --git a/tests/automated/integration/api/test_annotate.py b/tests/automated/integration/api/test_annotate.py
deleted file mode 100644
index 51688765..00000000
--- a/tests/automated/integration/api/test_annotate.py
+++ /dev/null
@@ -1,756 +0,0 @@
-from http import HTTPStatus
-
-import pytest
-from fastapi import HTTPException
-
-from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo
-from src.api.endpoints.annotate.all.post.dto import AllAnnotationPostInfo
-from src.api.endpoints.annotate.dtos.record_type.post import RecordTypeAnnotationPostInfo
-from src.api.endpoints.annotate.dtos.record_type.response import GetNextRecordTypeAnnotationResponseOuterInfo
-from src.api.endpoints.annotate.relevance.get.dto import GetNextRelevanceAnnotationResponseOuterInfo
-from src.api.endpoints.annotate.relevance.post.dto import RelevanceAnnotationPostInfo
-from src.core.tasks.url.operators.html.scraper.parser.dtos.response_html import ResponseHTMLInfo
-from src.db.dtos.url.insert import InsertURLsInfo
-from src.db.dtos.url.mapping import URLMapping
-from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion
-from src.core.error_manager.enums import ErrorTypes
-from src.core.enums import RecordType, SuggestionType, SuggestedStatus
-from src.core.exceptions import FailedValidationException
-from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion
-from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion
-from tests.helpers.setup.annotate_agency.model import AnnotateAgencySetupInfo
-from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review
-from tests.helpers.setup.annotate_agency.core import setup_for_annotate_agency
-from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo
-from tests.automated.integration.api.conftest import MOCK_USER_ID
-
-def check_url_mappings_match(
-    map_1: URLMapping,
-    map_2: URLMapping
-):
-    assert map_1.url_id == map_2.url_id
-    assert map_2.url == map_2.url
-
-def check_html_info_not_empty(
-    html_info: ResponseHTMLInfo
-):
-    assert not html_info_empty(html_info)
-
-def html_info_empty(
-    html_info: ResponseHTMLInfo
-) -> bool:
-    return html_info.description == "" and html_info.title == ""
-
-@pytest.mark.asyncio
-async def test_annotate_relevancy(api_test_helper):
-    ath = api_test_helper
-
-    batch_id = ath.db_data_creator.batch()
-
-    # Create 2 URLs with outcome `pending`
-    iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2)
-
-    url_1 = iui.url_mappings[0]
-    url_2 = iui.url_mappings[1]
-
-    # Add `Relevancy` attribute with value `True` to 1st URL
-    await ath.db_data_creator.auto_relevant_suggestions(
-        url_id=url_1.url_id,
-        relevant=True
-    )
-
-    # Add 'Relevancy' attribute with value `False` to 2nd URL
-    await ath.db_data_creator.auto_relevant_suggestions(
-        url_id=url_2.url_id,
-        relevant=False
-    )
-
-    # Add HTML data to both
-    await ath.db_data_creator.html_data([url_1.url_id, url_2.url_id])
-    # Call `GET` `/annotate/relevance` and receive next URL
-    request_info_1: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_relevance_annotation()
-    inner_info_1 = request_info_1.next_annotation
-
-    check_url_mappings_match(inner_info_1.url_info, url_1)
-    check_html_info_not_empty(inner_info_1.html_info)
-
-    # Validate that the correct relevant value is returned
-    assert inner_info_1.annotation.is_relevant is True
-
-    # A second user should see the same URL
-
-
-    #  Annotate with value 'False' and get next URL
-    request_info_2: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next(
-        url_id=inner_info_1.url_info.url_id,
-        relevance_annotation_post_info=RelevanceAnnotationPostInfo(
-            suggested_status=SuggestedStatus.NOT_RELEVANT
-        )
-    )
-
-    inner_info_2 = request_info_2.next_annotation
-
-    check_url_mappings_match(
-        inner_info_2.url_info,
-        url_2
-    )
-    check_html_info_not_empty(inner_info_2.html_info)
-
-    request_info_3: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next(
-        url_id=inner_info_2.url_info.url_id,
-        relevance_annotation_post_info=RelevanceAnnotationPostInfo(
-            suggested_status=SuggestedStatus.RELEVANT
-        )
-    )
-
-    assert request_info_3.next_annotation is None
-
-    # Get all URL annotations. Confirm they exist for user
-    adb_client = ath.adb_client()
-    results: list[UserRelevantSuggestion] = await adb_client.get_all(UserRelevantSuggestion)
-    result_1 = results[0]
-    result_2 = results[1]
-
-    assert result_1.url_id == inner_info_1.url_info.url_id
-    assert result_1.suggested_status == SuggestedStatus.NOT_RELEVANT.value
-
-    assert result_2.url_id == inner_info_2.url_info.url_id
-    assert result_2.suggested_status == SuggestedStatus.RELEVANT.value
-
-    # If user submits annotation for same URL, the URL should be overwritten
-    request_info_4: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next(
-        url_id=inner_info_1.url_info.url_id,
-        relevance_annotation_post_info=RelevanceAnnotationPostInfo(
-            suggested_status=SuggestedStatus.RELEVANT
-        )
-    )
-
-    assert request_info_4.next_annotation is None
-
-    results: list[UserRelevantSuggestion] = await adb_client.get_all(UserRelevantSuggestion)
-    assert len(results) == 2
-
-    for result in results:
-        if result.url_id == inner_info_1.url_info.url_id:
-            assert results[0].suggested_status == SuggestedStatus.RELEVANT.value
-
-async def post_and_validate_relevancy_annotation(ath, url_id, annotation: SuggestedStatus):
-    response = ath.request_validator.post_relevance_annotation_and_get_next(
-        url_id=url_id,
-        relevance_annotation_post_info=RelevanceAnnotationPostInfo(
-            suggested_status=annotation
-        )
-    )
-
-    assert response.next_annotation is None
-
-    results: list[UserRelevantSuggestion] = await ath.adb_client().get_all(UserRelevantSuggestion)
-    assert len(results) == 1
-    assert results[0].suggested_status == annotation.value
-
-@pytest.mark.asyncio
-async def test_annotate_relevancy_broken_page(api_test_helper):
-    ath = api_test_helper
-
-    creation_info = await ath.db_data_creator.batch_and_urls(url_count=1, with_html_content=False)
-
-    await post_and_validate_relevancy_annotation(
-        ath,
-        url_id=creation_info.url_ids[0],
-        annotation=SuggestedStatus.BROKEN_PAGE_404
-    )
-
-@pytest.mark.asyncio
-async def test_annotate_relevancy_individual_record(api_test_helper):
-    ath = api_test_helper
-
-    creation_info: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls(
-        url_count=1
-    )
-
-    await post_and_validate_relevancy_annotation(
-        ath,
-        url_id=creation_info.url_ids[0],
-        annotation=SuggestedStatus.INDIVIDUAL_RECORD
-    )
-
-@pytest.mark.asyncio
-async def test_annotate_relevancy_already_annotated_by_different_user(
-        api_test_helper
-):
-    ath = api_test_helper
-
-    creation_info: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls(
-        url_count=1
-    )
-
-    await ath.db_data_creator.user_relevant_suggestion(
-        url_id=creation_info.url_ids[0],
-        user_id=2,
-        suggested_status=SuggestedStatus.RELEVANT
-    )
-
-    # Annotate with different user (default is 1) and get conflict error
-    try:
-        response = await ath.request_validator.post_relevance_annotation_and_get_next(
-            url_id=creation_info.url_ids[0],
-            relevance_annotation_post_info=RelevanceAnnotationPostInfo(
-                suggested_status=SuggestedStatus.NOT_RELEVANT
-            )
-        )
-    except HTTPException as e:
-        assert e.status_code == HTTPStatus.CONFLICT
-        assert e.detail["detail"]["code"] == ErrorTypes.ANNOTATION_EXISTS.value
-        assert e.detail["detail"]["message"] == f"Annotation of type RELEVANCE already exists for url {creation_info.url_ids[0]}"
-
-
-@pytest.mark.asyncio
-async def test_annotate_relevancy_no_html(api_test_helper):
-    ath = api_test_helper
-
-    batch_id = ath.db_data_creator.batch()
-
-    # Create 2 URLs with outcome `pending`
-    iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2)
-
-    url_1 = iui.url_mappings[0]
-    url_2 = iui.url_mappings[1]
-
-    # Add `Relevancy` attribute with value `True` to 1st URL
-    await ath.db_data_creator.auto_relevant_suggestions(
-        url_id=url_1.url_id,
-        relevant=True
-    )
-
-    # Add 'Relevancy' attribute with value `False` to 2nd URL
-    await ath.db_data_creator.auto_relevant_suggestions(
-        url_id=url_2.url_id,
-        relevant=False
-    )
-
-    # Call `GET` `/annotate/relevance` and receive next URL
-    request_info_1: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_relevance_annotation()
-    inner_info_1 = request_info_1.next_annotation
-
-    check_url_mappings_match(inner_info_1.url_info, url_1)
-    assert html_info_empty(inner_info_1.html_info)
-
-@pytest.mark.asyncio
-async def test_annotate_record_type(api_test_helper):
-    ath = api_test_helper
-
-    batch_id = ath.db_data_creator.batch()
-
-    # Create 2 URLs with outcome `pending`
-    iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2)
-
-    url_1 = iui.url_mappings[0]
-    url_2 = iui.url_mappings[1]
-
-    # Add record type attribute with value `Accident Reports` to 1st URL
-    await ath.db_data_creator.auto_record_type_suggestions(
-        url_id=url_1.url_id,
-        record_type=RecordType.ACCIDENT_REPORTS
-    )
-
-    # Add 'Record Type' attribute with value `Dispatch Recordings` to 2nd URL
-    await ath.db_data_creator.auto_record_type_suggestions(
-        url_id=url_2.url_id,
-        record_type=RecordType.DISPATCH_RECORDINGS
-    )
-
-    # Add HTML data to both
-    await ath.db_data_creator.html_data([url_1.url_id, url_2.url_id])
-
-    # Call `GET` `/annotate/record-type` and receive next URL
-    request_info_1: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_record_type_annotation()
-    inner_info_1 = request_info_1.next_annotation
-
-    check_url_mappings_match(inner_info_1.url_info, url_1)
-    check_html_info_not_empty(inner_info_1.html_info)
-
-    # Validate that the correct record type is returned
-    assert inner_info_1.suggested_record_type == RecordType.ACCIDENT_REPORTS
-
-    # Annotate with value 'Personnel Records' and get next URL
-    request_info_2: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.post_record_type_annotation_and_get_next(
-        url_id=inner_info_1.url_info.url_id,
-        record_type_annotation_post_info=RecordTypeAnnotationPostInfo(
-            record_type=RecordType.PERSONNEL_RECORDS
-        )
-    )
-
-    inner_info_2 = request_info_2.next_annotation
-
-    check_url_mappings_match(inner_info_2.url_info, url_2)
-    check_html_info_not_empty(inner_info_2.html_info)
-
-    request_info_3: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.post_record_type_annotation_and_get_next(
-        url_id=inner_info_2.url_info.url_id,
-        record_type_annotation_post_info=RecordTypeAnnotationPostInfo(
-            record_type=RecordType.ANNUAL_AND_MONTHLY_REPORTS
-        )
-    )
-
-    assert request_info_3.next_annotation is None
-
-    # Get all URL annotations. Confirm they exist for user
-    adb_client = ath.adb_client()
-    results: list[UserRecordTypeSuggestion] = await adb_client.get_all(UserRecordTypeSuggestion)
-    result_1 = results[0]
-    result_2 = results[1]
-
-    assert result_1.url_id == inner_info_1.url_info.url_id
-    assert result_1.record_type == RecordType.PERSONNEL_RECORDS.value
-
-    assert result_2.url_id == inner_info_2.url_info.url_id
-    assert result_2.record_type == RecordType.ANNUAL_AND_MONTHLY_REPORTS.value
-
-    # If user submits annotation for same URL, the URL should be overwritten
-
-    request_info_4: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.post_record_type_annotation_and_get_next(
-        url_id=inner_info_1.url_info.url_id,
-        record_type_annotation_post_info=RecordTypeAnnotationPostInfo(
-            record_type=RecordType.BOOKING_REPORTS
-        )
-    )
-
-    assert request_info_4.next_annotation is None
-
-    results: list[UserRecordTypeSuggestion] = await adb_client.get_all(UserRecordTypeSuggestion)
-    assert len(results) == 2
-
-    for result in results:
-        if result.url_id == inner_info_1.url_info.url_id:
-            assert result.record_type == RecordType.BOOKING_REPORTS.value
-
-@pytest.mark.asyncio
-async def test_annotate_record_type_already_annotated_by_different_user(
-        api_test_helper
-):
-    ath = api_test_helper
-
-    creation_info: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls(
-        url_count=1
-    )
-
-    await ath.db_data_creator.user_record_type_suggestion(
-        url_id=creation_info.url_ids[0],
-        user_id=2,
-        record_type=RecordType.ACCIDENT_REPORTS
-    )
-
-    # Annotate with different user (default is 1) and get conflict error
-    try:
-        response = await ath.request_validator.post_record_type_annotation_and_get_next(
-            url_id=creation_info.url_ids[0],
-            record_type_annotation_post_info=RecordTypeAnnotationPostInfo(
-                record_type=RecordType.ANNUAL_AND_MONTHLY_REPORTS
-            )
-        )
-    except HTTPException as e:
-        assert e.status_code == HTTPStatus.CONFLICT
-        assert e.detail["detail"]["code"] == ErrorTypes.ANNOTATION_EXISTS.value
-        assert e.detail["detail"]["message"] == f"Annotation of type RECORD_TYPE already exists for url {creation_info.url_ids[0]}"
-
-
-@pytest.mark.asyncio
-async def test_annotate_record_type_no_html_info(api_test_helper):
-    ath = api_test_helper
-
-    batch_id = ath.db_data_creator.batch()
-
-    # Create 2 URLs with outcome `pending`
-    iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2)
-
-    url_1 = iui.url_mappings[0]
-    url_2 = iui.url_mappings[1]
-
-    # Add record type attribute with value `Accident Reports` to 1st URL
-    await ath.db_data_creator.auto_record_type_suggestions(
-        url_id=url_1.url_id,
-        record_type=RecordType.ACCIDENT_REPORTS
-    )
-
-    # Add 'Record Type' attribute with value `Dispatch Recordings` to 2nd URL
-    await ath.db_data_creator.auto_record_type_suggestions(
-        url_id=url_2.url_id,
-        record_type=RecordType.DISPATCH_RECORDINGS
-    )
-
-    # Call `GET` `/annotate/record-type` and receive next URL
-    request_info_1: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_record_type_annotation()
-    inner_info_1 = request_info_1.next_annotation
-
-    check_url_mappings_match(inner_info_1.url_info, url_1)
-    assert html_info_empty(inner_info_1.html_info)
-
-@pytest.mark.asyncio
-async def test_annotate_agency_multiple_auto_suggestions(api_test_helper):
-    """
-    Test Scenario: Multiple Auto Suggestions
-    A URL has multiple Agency Auto Suggestion and has not been annotated by the User
-    The user should receive all of the auto suggestions with full detail
-    """
-    ath = api_test_helper
-    buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls(
-        url_count=1,
-        with_html_content=True
-    )
-    await ath.db_data_creator.auto_suggestions(
-        url_ids=buci.url_ids,
-        num_suggestions=2,
-        suggestion_type=SuggestionType.AUTO_SUGGESTION
-    )
-
-    # User requests next annotation
-    response = await ath.request_validator.get_next_agency_annotation()
-
-    assert response.next_annotation
-    next_annotation = response.next_annotation
-    # Check that url_id matches the one we inserted
-    assert next_annotation.url_info.url_id == buci.url_ids[0]
-
-    # Check that html data is present
-    assert next_annotation.html_info.description != ""
-    assert next_annotation.html_info.title != ""
-
-    # Check that two agency_suggestions exist
-    assert len(next_annotation.agency_suggestions) == 2
-
-    for agency_suggestion in next_annotation.agency_suggestions:
-        assert agency_suggestion.suggestion_type == SuggestionType.AUTO_SUGGESTION
-        assert agency_suggestion.pdap_agency_id is not None
-        assert agency_suggestion.agency_name is not None
-        assert agency_suggestion.state is not None
-        assert agency_suggestion.county is not None
-        assert agency_suggestion.locality is not None
-
-
-@pytest.mark.asyncio
-async def test_annotate_agency_multiple_auto_suggestions_no_html(api_test_helper):
-    """
-    Test Scenario: Multiple Auto Suggestions
-    A URL has multiple Agency Auto Suggestion and has not been annotated by the User
-    The user should receive all of the auto suggestions with full detail
-    """
-    ath = api_test_helper
-    buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls(
-        url_count=1,
-        with_html_content=False
-    )
-    await ath.db_data_creator.auto_suggestions(
-        url_ids=buci.url_ids,
-        num_suggestions=2,
-        suggestion_type=SuggestionType.AUTO_SUGGESTION
-    )
-
-    # User requests next annotation
-    response = await ath.request_validator.get_next_agency_annotation()
-
-    assert response.next_annotation
-    next_annotation = response.next_annotation
-    # Check that url_id matches the one we inserted
-    assert next_annotation.url_info.url_id == buci.url_ids[0]
-
-    # Check that html data is not present
-    assert next_annotation.html_info.description == ""
-    assert next_annotation.html_info.title == ""
-
-@pytest.mark.asyncio
-async def test_annotate_agency_single_unknown_auto_suggestion(api_test_helper):
-    """
-    Test Scenario: Single Unknown Auto Suggestion
-    A URL has a single Unknown Agency Auto Suggestion and has not been annotated by the User
-    The user should receive a single Unknown Auto Suggestion lacking other detail
-    """
-    ath = api_test_helper
-    buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls(
-        url_count=1,
-        with_html_content=True
-    )
-    await ath.db_data_creator.auto_suggestions(
-        url_ids=buci.url_ids,
-        num_suggestions=1,
-        suggestion_type=SuggestionType.UNKNOWN
-    )
-    response = await ath.request_validator.get_next_agency_annotation()
-
-    assert response.next_annotation
-    next_annotation = response.next_annotation
-    # Check that url_id matches the one we inserted
-    assert next_annotation.url_info.url_id == buci.url_ids[0]
-
-    # Check that html data is present
-    assert next_annotation.html_info.description != ""
-    assert next_annotation.html_info.title != ""
-
-    # Check that one agency_suggestion exists
-    assert len(next_annotation.agency_suggestions) == 1
-
-    agency_suggestion = next_annotation.agency_suggestions[0]
-
-    assert agency_suggestion.suggestion_type == SuggestionType.UNKNOWN
-    assert agency_suggestion.pdap_agency_id is None
-    assert agency_suggestion.agency_name is None
-    assert agency_suggestion.state is None
-    assert agency_suggestion.county is None
-    assert agency_suggestion.locality is None
-
-
-@pytest.mark.asyncio
-async def test_annotate_agency_single_confirmed_agency(api_test_helper):
-    """
-    Test Scenario: Single Confirmed Agency
-    A URL has a single Confirmed Agency and has not been annotated by the User
-    The user should not receive this URL to annotate
-    """
-    ath = api_test_helper
-    buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls(
-        url_count=1,
-        with_html_content=True
-    )
-    await ath.db_data_creator.confirmed_suggestions(
-        url_ids=buci.url_ids,
-    )
-    response = await ath.request_validator.get_next_agency_annotation()
-    assert response.next_annotation is None
-
-@pytest.mark.asyncio
-async def test_annotate_agency_other_user_annotation(api_test_helper):
-    """
-    Test Scenario: Other User Annotation
-    A URL has been annotated by another User
-    Our user should still receive this URL to annotate
-    """
-    ath = api_test_helper
-    setup_info: AnnotateAgencySetupInfo = await setup_for_annotate_agency(
-        db_data_creator=ath.db_data_creator,
-        url_count=1
-    )
-    url_ids = setup_info.url_ids
-
-    response = await ath.request_validator.get_next_agency_annotation()
-
-    assert response.next_annotation
-    next_annotation = response.next_annotation
-    # Check that url_id matches the one we inserted
-    assert next_annotation.url_info.url_id == url_ids[0]
-
-    # Check that html data is present
-    assert next_annotation.html_info.description != ""
-    assert next_annotation.html_info.title != ""
-
-    # Check that one agency_suggestion exists
-    assert len(next_annotation.agency_suggestions) == 1
-
-    # Test that another user can insert a suggestion
-    await ath.db_data_creator.manual_suggestion(
-        user_id=MOCK_USER_ID + 1,
-        url_id=url_ids[0],
-    )
-
-    # After this, text that our user does not receive this URL
-    response = await ath.request_validator.get_next_agency_annotation()
-    assert response.next_annotation is None
-
-@pytest.mark.asyncio
-async def test_annotate_agency_submit_and_get_next(api_test_helper):
-    """
-    Test Scenario: Submit and Get Next (no other URL available)
-    A URL has been annotated by our User, and no other valid URLs have not been annotated
-    Our user should not receive another URL to annotate
-    Until another relevant URL is added
-    """
-    ath = api_test_helper
-    setup_info: AnnotateAgencySetupInfo = await setup_for_annotate_agency(
-        db_data_creator=ath.db_data_creator,
-        url_count=2
-    )
-    url_ids = setup_info.url_ids
-
-    # User should submit an annotation and receive the next
-    response = await ath.request_validator.post_agency_annotation_and_get_next(
-        url_id=url_ids[0],
-        agency_annotation_post_info=URLAgencyAnnotationPostInfo(
-            suggested_agency=await ath.db_data_creator.agency(),
-            is_new=False
-        )
-
-    )
-    assert response.next_annotation is not None
-
-    # User should submit this annotation and receive none for the next
-    response = await ath.request_validator.post_agency_annotation_and_get_next(
-        url_id=url_ids[1],
-        agency_annotation_post_info=URLAgencyAnnotationPostInfo(
-            suggested_agency=await ath.db_data_creator.agency(),
-            is_new=False
-        )
-    )
-    assert response.next_annotation is None
-
-
-@pytest.mark.asyncio
-async def test_annotate_agency_submit_new(api_test_helper):
-    """
-    Test Scenario: Submit New
-    Our user receives an annotation and marks it as `NEW`
-    This should complete successfully
-    And within the database the annotation should be marked as `NEW`
-    """
-    ath = api_test_helper
-    adb_client = ath.adb_client()
-    setup_info: AnnotateAgencySetupInfo = await setup_for_annotate_agency(
-        db_data_creator=ath.db_data_creator,
-        url_count=1
-    )
-    url_ids = setup_info.url_ids
-
-    # User should submit an annotation and mark it as New
-    response = await ath.request_validator.post_agency_annotation_and_get_next(
-        url_id=url_ids[0],
-        agency_annotation_post_info=URLAgencyAnnotationPostInfo(
-            suggested_agency=await ath.db_data_creator.agency(),
-            is_new=True
-        )
-    )
-    assert response.next_annotation is None
-
-    # Within database, the annotation should be marked as `NEW`
-    all_manual_suggestions = await adb_client.get_all(UserUrlAgencySuggestion)
-    assert len(all_manual_suggestions) == 1
-    assert all_manual_suggestions[0].is_new
-
-@pytest.mark.asyncio
-async def test_annotate_all(api_test_helper):
-    """
-    Test the happy path workflow for the all-annotations endpoint
-    The user should be able to get a valid URL (filtering on batch id if needed),
-    submit a full annotation, and receive another URL
-    """
-    ath = api_test_helper
-    adb_client = ath.adb_client()
-    setup_info_1 =  await setup_for_get_next_url_for_final_review(
-        db_data_creator=ath.db_data_creator, include_user_annotations=False
-    )
-    url_mapping_1 = setup_info_1.url_mapping
-    setup_info_2 = await setup_for_get_next_url_for_final_review(
-        db_data_creator=ath.db_data_creator, include_user_annotations=False
-    )
-    url_mapping_2 = setup_info_2.url_mapping
-
-    # First, get a valid URL to annotate
-    get_response_1 = await ath.request_validator.get_next_url_for_all_annotations()
-
-    # Apply the second batch id as a filter and see that a different URL is returned
-    get_response_2 = await ath.request_validator.get_next_url_for_all_annotations(
-        batch_id=setup_info_2.batch_id
-    )
-
-    assert get_response_1.next_annotation.url_info.url_id != get_response_2.next_annotation.url_info.url_id
-
-    # Annotate the first and submit
-    agency_id = await ath.db_data_creator.agency()
-    post_response_1 = await ath.request_validator.post_all_annotations_and_get_next(
-        url_id=url_mapping_1.url_id,
-        all_annotations_post_info=AllAnnotationPostInfo(
-            suggested_status=SuggestedStatus.RELEVANT,
-            record_type=RecordType.ACCIDENT_REPORTS,
-            agency=URLAgencyAnnotationPostInfo(
-                is_new=False,
-                suggested_agency=agency_id
-            )
-        )
-    )
-    assert post_response_1.next_annotation is not None
-
-    # Confirm the second is received
-    assert post_response_1.next_annotation.url_info.url_id == url_mapping_2.url_id
-
-    # Upon submitting the second, confirm that no more URLs are returned through either POST or GET
-    post_response_2 = await ath.request_validator.post_all_annotations_and_get_next(
-        url_id=url_mapping_2.url_id,
-        all_annotations_post_info=AllAnnotationPostInfo(
-            suggested_status=SuggestedStatus.NOT_RELEVANT,
-        )
-    )
-    assert post_response_2.next_annotation is None
-
-    get_response_3 = await ath.request_validator.get_next_url_for_all_annotations()
-    assert get_response_3.next_annotation is None
-
-
-    # Check that all annotations are present in the database
-
-    # Should be two relevance annotations, one True and one False
-    all_relevance_suggestions: list[UserRelevantSuggestion] = await adb_client.get_all(UserRelevantSuggestion)
-    assert len(all_relevance_suggestions) == 2
-    assert all_relevance_suggestions[0].suggested_status == SuggestedStatus.RELEVANT.value
-    assert all_relevance_suggestions[1].suggested_status == SuggestedStatus.NOT_RELEVANT.value
-
-    # Should be one agency
-    all_agency_suggestions = await adb_client.get_all(UserUrlAgencySuggestion)
-    assert len(all_agency_suggestions) == 1
-    assert all_agency_suggestions[0].is_new == False
-    assert all_agency_suggestions[0].agency_id == agency_id
-
-    # Should be one record type
-    all_record_type_suggestions = await adb_client.get_all(UserRecordTypeSuggestion)
-    assert len(all_record_type_suggestions) == 1
-    assert all_record_type_suggestions[0].record_type == RecordType.ACCIDENT_REPORTS.value
-
-@pytest.mark.asyncio
-async def test_annotate_all_post_batch_filtering(api_test_helper):
-    """
-    Batch filtering should also work when posting annotations
-    """
-    ath = api_test_helper
-    adb_client = ath.adb_client()
-    setup_info_1 =  await setup_for_get_next_url_for_final_review(
-        db_data_creator=ath.db_data_creator, include_user_annotations=False
-    )
-    url_mapping_1 = setup_info_1.url_mapping
-    setup_info_2 = await setup_for_get_next_url_for_final_review(
-        db_data_creator=ath.db_data_creator, include_user_annotations=False
-    )
-    setup_info_3 = await setup_for_get_next_url_for_final_review(
-        db_data_creator=ath.db_data_creator, include_user_annotations=False
-    )
-    url_mapping_3 = setup_info_3.url_mapping
-
-    # Submit the first annotation, using the third batch id, and receive the third URL
-    post_response_1 = await ath.request_validator.post_all_annotations_and_get_next(
-        url_id=url_mapping_1.url_id,
-        batch_id=setup_info_3.batch_id,
-        all_annotations_post_info=AllAnnotationPostInfo(
-            suggested_status=SuggestedStatus.RELEVANT,
-            record_type=RecordType.ACCIDENT_REPORTS,
-            agency=URLAgencyAnnotationPostInfo(
-                is_new=True
-            )
-        )
-    )
-
-    assert post_response_1.next_annotation.url_info.url_id == url_mapping_3.url_id
-
-
-@pytest.mark.asyncio
-async def test_annotate_all_validation_error(api_test_helper):
-    """
-    Validation errors in the PostInfo DTO should result in a 400 BAD REQUEST response
-    """
-    ath = api_test_helper
-    setup_info_1 =  await setup_for_get_next_url_for_final_review(
-        db_data_creator=ath.db_data_creator, include_user_annotations=False
-    )
-    url_mapping_1 = setup_info_1.url_mapping
-
-    with pytest.raises(FailedValidationException) as e:
-        response = await ath.request_validator.post_all_annotations_and_get_next(
-            url_id=url_mapping_1.url_id,
-            all_annotations_post_info=AllAnnotationPostInfo(
-                suggested_status=SuggestedStatus.NOT_RELEVANT,
-                record_type=RecordType.ACCIDENT_REPORTS
-            )
-        )
diff --git a/tests/automated/integration/api/test_batch.py b/tests/automated/integration/api/test_batch.py
deleted file mode 100644
index 4dd21a49..00000000
--- a/tests/automated/integration/api/test_batch.py
+++ /dev/null
@@ -1,237 +0,0 @@
-import pytest
-
-from src.db.models.impl.batch.pydantic import BatchInfo
-from src.db.dtos.url.insert import InsertURLsInfo
-from src.collectors.impl.example.dtos.input import ExampleInputDTO
-from src.collectors.enums import CollectorType, URLStatus
-from src.core.enums import BatchStatus
-from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters
-from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters
-
-
-@pytest.mark.asyncio
-async def test_get_batch_summaries(api_test_helper):
-    ath = api_test_helper
-
-    batch_params = [
-        TestBatchCreationParameters(
-            urls=[
-                TestURLCreationParameters(
-                    count=1,
-                    status=URLStatus.PENDING
-                ),
-                TestURLCreationParameters(
-                    count=2,
-                    status=URLStatus.SUBMITTED
-                )
-            ]
-        ),
-        TestBatchCreationParameters(
-            urls=[
-                TestURLCreationParameters(
-                    count=4,
-                    status=URLStatus.NOT_RELEVANT
-                ),
-                TestURLCreationParameters(
-                    count=3,
-                    status=URLStatus.ERROR
-                )
-            ]
-        ),
-        TestBatchCreationParameters(
-            urls=[
-                TestURLCreationParameters(
-                    count=7,
-                    status=URLStatus.DUPLICATE
-                ),
-                TestURLCreationParameters(
-                    count=1,
-                    status=URLStatus.SUBMITTED
-                )
-            ]
-        )
-    ]
-
-    batch_1_creation_info = await ath.db_data_creator.batch_v2(batch_params[0])
-    batch_2_creation_info = await ath.db_data_creator.batch_v2(batch_params[1])
-    batch_3_creation_info = await ath.db_data_creator.batch_v2(batch_params[2])
-
-    batch_1_id = batch_1_creation_info.batch_id
-    batch_2_id = batch_2_creation_info.batch_id
-    batch_3_id = batch_3_creation_info.batch_id
-
-
-    response = ath.request_validator.get_batch_statuses()
-    results = response.results
-
-    assert len(results) == 3
-
-    result_1 = results[0]
-    assert result_1.id == batch_1_id
-    assert result_1.status == BatchStatus.READY_TO_LABEL
-    counts_1 = result_1.url_counts
-    assert counts_1.total == 3
-    assert counts_1.pending == 1
-    assert counts_1.submitted == 2
-    assert counts_1.not_relevant == 0
-    assert counts_1.duplicate == 0
-    assert counts_1.errored == 0
-
-    result_2 = results[1]
-    assert result_2.id == batch_2_id
-    counts_2 = result_2.url_counts
-    assert counts_2.total == 7
-    assert counts_2.not_relevant == 4
-    assert counts_2.errored == 3
-    assert counts_2.pending == 0
-    assert counts_2.submitted == 0
-    assert counts_2.duplicate == 0
-
-    result_3 = results[2]
-    assert result_3.id == batch_3_id
-    counts_3 = result_3.url_counts
-    assert counts_3.total == 8
-    assert counts_3.not_relevant == 0
-    assert counts_3.errored == 0
-    assert counts_3.pending == 0
-    assert counts_3.submitted == 1
-    assert counts_3.duplicate == 7
-
-
-
-
-
-
-@pytest.mark.asyncio
-async def test_get_batch_summaries_pending_url_filter(api_test_helper):
-    ath = api_test_helper
-
-    # Add an errored out batch
-    batch_error = await ath.db_data_creator.batch_and_urls(
-        strategy=CollectorType.EXAMPLE,
-        url_count=2,
-        batch_status=BatchStatus.ERROR
-    )
-
-    # Add a batch with pending urls
-    batch_pending = await ath.db_data_creator.batch_and_urls(
-        strategy=CollectorType.EXAMPLE,
-        url_count=2,
-        batch_status=BatchStatus.READY_TO_LABEL,
-        with_html_content=True,
-        url_status=URLStatus.PENDING
-    )
-
-    # Add a batch with submitted URLs
-    batch_submitted = await ath.db_data_creator.batch_and_urls(
-        strategy=CollectorType.EXAMPLE,
-        url_count=2,
-        batch_status=BatchStatus.READY_TO_LABEL,
-        with_html_content=True,
-        url_status=URLStatus.SUBMITTED
-    )
-
-    # Add an aborted batch
-    batch_aborted = await ath.db_data_creator.batch_and_urls(
-        strategy=CollectorType.EXAMPLE,
-        url_count=2,
-        batch_status=BatchStatus.ABORTED
-    )
-
-    # Add a batch with validated URLs
-    batch_validated = await ath.db_data_creator.batch_and_urls(
-        strategy=CollectorType.EXAMPLE,
-        url_count=2,
-        batch_status=BatchStatus.READY_TO_LABEL,
-        with_html_content=True,
-        url_status=URLStatus.VALIDATED
-    )
-
-    # Test filter for pending URLs and only retrieve the second batch
-    pending_urls_results = ath.request_validator.get_batch_statuses(
-        has_pending_urls=True
-    )
-
-    assert len(pending_urls_results.results) == 1
-    assert pending_urls_results.results[0].id == batch_pending.batch_id
-
-    # Test filter without pending URLs and retrieve the other four batches
-    no_pending_urls_results = ath.request_validator.get_batch_statuses(
-        has_pending_urls=False
-    )
-
-    assert len(no_pending_urls_results.results) == 4
-    for result in no_pending_urls_results.results:
-        assert result.id in [
-            batch_error.batch_id,
-            batch_submitted.batch_id,
-            batch_validated.batch_id,
-            batch_aborted.batch_id
-        ]
-
-    # Test no filter for pending URLs and retrieve all batches
-    no_filter_results = ath.request_validator.get_batch_statuses()
-
-    assert len(no_filter_results.results) == 5
-
-
-
-
-def test_abort_batch(api_test_helper):
-    ath = api_test_helper
-
-    dto = ExampleInputDTO(
-            sleep_time=1
-        )
-
-    batch_id = ath.request_validator.example_collector(dto=dto)["batch_id"]
-
-    response = ath.request_validator.abort_batch(batch_id=batch_id)
-
-    assert response.message == "Batch aborted."
-
-    bi: BatchInfo = ath.request_validator.get_batch_info(batch_id=batch_id)
-
-    assert bi.status == BatchStatus.ABORTED
-
-def test_get_batch_urls(api_test_helper):
-
-    # Insert batch and urls into database
-    ath = api_test_helper
-    batch_id = ath.db_data_creator.batch()
-    iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=101)
-
-    response = ath.request_validator.get_batch_urls(batch_id=batch_id, page=1)
-    assert len(response.urls) == 100
-    # Check that the first url corresponds to the first url inserted
-    assert response.urls[0].url == iui.url_mappings[0].url
-    # Check that the last url corresponds to the 100th url inserted
-    assert response.urls[-1].url == iui.url_mappings[99].url
-
-
-    # Check that a more limited set of urls exist
-    response = ath.request_validator.get_batch_urls(batch_id=batch_id, page=2)
-    assert len(response.urls) == 1
-    # Check that this url corresponds to the last url inserted
-    assert response.urls[0].url == iui.url_mappings[-1].url
-
-def test_get_duplicate_urls(api_test_helper):
-
-    # Insert batch and url into database
-    ath = api_test_helper
-    batch_id = ath.db_data_creator.batch()
-    iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=101)
-    # Get a list of all url ids
-    url_ids = [url.url_id for url in iui.url_mappings]
-
-    # Create a second batch which will be associated with the duplicates
-    dup_batch_id = ath.db_data_creator.batch()
-
-    # Insert duplicate urls into database
-    ath.db_data_creator.duplicate_urls(duplicate_batch_id=dup_batch_id, url_ids=url_ids)
-
-    response = ath.request_validator.get_batch_url_duplicates(batch_id=dup_batch_id, page=1)
-    assert len(response.duplicates) == 100
-
-    response = ath.request_validator.get_batch_url_duplicates(batch_id=dup_batch_id, page=2)
-    assert len(response.duplicates) == 1
\ No newline at end of file
diff --git a/tests/automated/integration/api/test_manual_batch.py b/tests/automated/integration/api/test_manual_batch.py
index 9b3fb326..1d2e595d 100644
--- a/tests/automated/integration/api/test_manual_batch.py
+++ b/tests/automated/integration/api/test_manual_batch.py
@@ -2,7 +2,7 @@
 import pytest
 
 from src.api.endpoints.collector.dtos.manual_batch.post import ManualBatchInnerInputDTO, ManualBatchInputDTO
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.models.impl.batch.sqlalchemy import Batch
diff --git a/tests/automated/integration/core/async_/run_task/test_break_loop.py b/tests/automated/integration/core/async_/run_task/test_break_loop.py
index 0d8a9bc2..71b5704f 100644
--- a/tests/automated/integration/core/async_/run_task/test_break_loop.py
+++ b/tests/automated/integration/core/async_/run_task/test_break_loop.py
@@ -21,9 +21,9 @@ async def test_run_task_break_loop(db_data_creator: DBDataCreator):
     and an alert should be sent to discord
     """
 
-    async def run_task(self, task_id: int) -> TaskOperatorRunInfo:
+    async def run_task(self) -> TaskOperatorRunInfo:
         return TaskOperatorRunInfo(
-            task_id=task_id,
+            task_id=1,
             outcome=TaskOperatorOutcome.SUCCESS,
             task_type=TaskType.HTML
         )
diff --git a/tests/automated/integration/core/async_/run_task/test_prereq_met.py b/tests/automated/integration/core/async_/run_task/test_prereq_met.py
index a7724a45..e5425fd9 100644
--- a/tests/automated/integration/core/async_/run_task/test_prereq_met.py
+++ b/tests/automated/integration/core/async_/run_task/test_prereq_met.py
@@ -18,12 +18,11 @@
 async def test_run_task_prereq_met(db_data_creator: DBDataCreator):
     """
     When a task pre-requisite is met, the task should be run
-    And a task entry should be created in the database
     """
 
-    async def run_task(self, task_id: int) -> TaskOperatorRunInfo:
+    async def run_task(self) -> TaskOperatorRunInfo:
         return TaskOperatorRunInfo(
-            task_id=task_id,
+            task_id=1,
             task_type=TaskType.HTML,
             outcome=TaskOperatorOutcome.SUCCESS,
         )
@@ -48,9 +47,4 @@ async def run_task(self, task_id: int) -> TaskOperatorRunInfo:
     # There should be two calls to meets_task_prerequisites
     mock_operator.meets_task_prerequisites.assert_has_calls([call(), call()])
 
-    results = await db_data_creator.adb_client.get_all(Task)
-
-    assert len(results) == 1
-    assert results[0].task_status == BatchStatus.IN_PROCESS.value
-
     core.task_manager.conclude_task.assert_called_once()
diff --git a/tests/automated/integration/db/client/approve_url/test_basic.py b/tests/automated/integration/db/client/approve_url/test_basic.py
index 2a7f9569..62f215fb 100644
--- a/tests/automated/integration/db/client/approve_url/test_basic.py
+++ b/tests/automated/integration/db/client/approve_url/test_basic.py
@@ -3,6 +3,7 @@
 from src.api.endpoints.review.approve.dto import FinalReviewApprovalInfo
 from src.collectors.enums import URLStatus
 from src.core.enums import RecordType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
 from src.db.models.impl.url.core.sqlalchemy import URL
 from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata
@@ -42,10 +43,16 @@ async def test_approve_url_basic(db_data_creator: DBDataCreator):
     url = urls[0]
     assert url.id == url_mapping.url_id
     assert url.record_type == RecordType.ARREST_RECORDS
-    assert url.status == URLStatus.VALIDATED
+    assert url.status == URLStatus.OK
     assert url.name == "Test Name"
     assert url.description == "Test Description"
 
+    # Confirm presence of validated flag
+    validated_flags: list[FlagURLValidated] = await adb_client.get_all(FlagURLValidated)
+    assert len(validated_flags) == 1
+    assert validated_flags[0].url_id == url_mapping.url_id
+
+
     confirmed_agency: list[LinkURLAgency] = await adb_client.get_all(LinkURLAgency)
     assert len(confirmed_agency) == 1
     assert confirmed_agency[0].url_id == url_mapping.url_id
diff --git a/tests/automated/integration/db/client/get_next_url_for_final_review/test_new_agency.py b/tests/automated/integration/db/client/get_next_url_for_final_review/test_new_agency.py
deleted file mode 100644
index 72430fec..00000000
--- a/tests/automated/integration/db/client/get_next_url_for_final_review/test_new_agency.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import pytest
-
-from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo
-from src.core.enums import SuggestedStatus, RecordType, SuggestionType
-from tests.helpers.batch_creation_parameters.annotation_info import AnnotationInfo
-from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters
-from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters
-from tests.helpers.data_creator.core import DBDataCreator
-
-
-@pytest.mark.asyncio
-async def test_get_next_url_for_final_review_new_agency(db_data_creator: DBDataCreator):
-    """
-    Test that a URL with a new agency is properly returned
-    """
-
-    # Apply batch v2
-    parameters = TestBatchCreationParameters(
-        urls=[
-            TestURLCreationParameters(
-                annotation_info=AnnotationInfo(
-                    user_relevant=SuggestedStatus.RELEVANT,
-                    user_agency=URLAgencyAnnotationPostInfo(
-                        is_new=True
-                    ),
-                    user_record_type=RecordType.ARREST_RECORDS
-                )
-            )
-        ]
-    )
-    creation_info = await db_data_creator.batch_v2(parameters)
-    outer_result = await db_data_creator.adb_client.get_next_url_for_final_review(
-        batch_id=None
-    )
-    result = outer_result.next_source
-
-    assert result is not None
-    user_suggestion = result.annotations.agency.user
-    assert user_suggestion.suggestion_type == SuggestionType.NEW_AGENCY
-    assert user_suggestion.pdap_agency_id is None
-    assert user_suggestion.agency_name is None
diff --git a/tests/automated/integration/db/client/get_next_url_for_final_review/test_only_confirmed_urls.py b/tests/automated/integration/db/client/get_next_url_for_final_review/test_only_confirmed_urls.py
index 7e68ada4..72706aaf 100644
--- a/tests/automated/integration/db/client/get_next_url_for_final_review/test_only_confirmed_urls.py
+++ b/tests/automated/integration/db/client/get_next_url_for_final_review/test_only_confirmed_urls.py
@@ -1,6 +1,7 @@
 import pytest
 
 from src.collectors.enums import URLStatus
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 from tests.helpers.data_creator.core import DBDataCreator
 
 
@@ -14,7 +15,7 @@ async def test_get_next_url_for_final_review_only_confirmed_urls(db_data_creator
     url_mapping = db_data_creator.urls(
         batch_id=batch_id,
         url_count=1,
-        outcome=URLStatus.SUBMITTED
+        outcome=URLCreationEnum.SUBMITTED
     ).url_mappings[0]
 
     result = await db_data_creator.adb_client.get_next_url_for_final_review(
diff --git a/tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/test_validated.py b/tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/test_validated.py
index 95e40847..ab5acd59 100644
--- a/tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/test_validated.py
+++ b/tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/test_validated.py
@@ -1,6 +1,7 @@
 import pytest
 
 from src.collectors.enums import URLStatus
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 from tests.helpers.setup.annotation.core import setup_for_get_next_url_for_annotation
 from tests.helpers.data_creator.core import DBDataCreator
 
@@ -12,19 +13,12 @@ async def test_get_next_url_for_user_relevance_annotation_validated(
     """
     A validated URL should not turn up in get_next_url_for_user_annotation
     """
-
-    setup_info = await setup_for_get_next_url_for_annotation(
-        db_data_creator=db_data_creator,
-        url_count=1,
-        outcome=URLStatus.VALIDATED
-    )
-
-
-    url_1 = setup_info.insert_urls_info.url_mappings[0]
+    dbdc = db_data_creator
+    url_1: int = (await dbdc.create_validated_urls())[0].url_id
 
     # Add `Relevancy` attribute with value `True`
     await db_data_creator.auto_relevant_suggestions(
-        url_id=url_1.url_id,
+        url_id=url_1,
         relevant=True
     )
 
diff --git a/tests/automated/integration/db/client/test_insert_urls.py b/tests/automated/integration/db/client/test_insert_urls.py
index 78578c6b..f2d73f00 100644
--- a/tests/automated/integration/db/client/test_insert_urls.py
+++ b/tests/automated/integration/db/client/test_insert_urls.py
@@ -1,8 +1,8 @@
 import pytest
 
 from src.core.enums import BatchStatus
-from src.db.models.impl.batch.pydantic import BatchInfo
-from src.db.models.impl.link.batch_url import LinkBatchURL
+from src.db.models.impl.batch.pydantic.info import BatchInfo
+from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
 from src.db.models.impl.url.core.enums import URLSource
 from src.db.models.impl.url.core.pydantic.info import URLInfo
 from src.db.models.impl.url.core.sqlalchemy import URL
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/check.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/check.py
new file mode 100644
index 00000000..81bef537
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/check.py
@@ -0,0 +1,30 @@
+from unittest.mock import AsyncMock
+
+from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator
+from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput
+
+
+def check_results_called(
+    operator: PushToHuggingFaceTaskOperator,
+    expected_outputs: list[GetForLoadingToHuggingFaceOutput]
+) -> None:
+    mock_hf_client: AsyncMock = operator.hf_client
+    mock_push: AsyncMock = mock_hf_client.push_data_sources_raw_to_hub
+    outputs: list[GetForLoadingToHuggingFaceOutput] = mock_push.call_args.args[0]
+    outputs = sorted(outputs, key=lambda x: x.url_id)
+    expected_outputs = sorted(expected_outputs, key=lambda x: x.url_id)
+    for output, expected_output in zip(outputs, expected_outputs):
+        assert output.url_id == expected_output.url_id
+        assert output.url == expected_output.url
+        assert output.relevant == expected_output.relevant, f"Expected {expected_output.relevant}, got {output.relevant}"
+        assert output.record_type_fine == expected_output.record_type_fine
+        assert output.record_type_coarse == expected_output.record_type_coarse
+        assert output.html == expected_output.html
+
+
+def check_not_called(
+    operator: PushToHuggingFaceTaskOperator,
+) -> None:
+    mock_hf_client: AsyncMock = operator.hf_client
+    mock_push: AsyncMock =  mock_hf_client.push_data_sources_raw_to_hub
+    mock_push.assert_not_called()
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/data.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/data.py
index 64a16f9f..e7a9a69b 100644
--- a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/data.py
+++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/data.py
@@ -1,71 +1,30 @@
-from src.collectors.enums import URLStatus
 from src.core.enums import RecordType
 from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse
-from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.entry \
-    import TestPushToHuggingFaceURLSetupEntry as Entry
-from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.output import \
-    TestPushToHuggingFaceURLSetupExpectedOutput as Output
-from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.input import \
-    TestPushToHuggingFaceURLSetupEntryInput as Input
+from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput
+
+
+def get_test_url(i: int) -> str:
+    return f"www.testPushToHuggingFaceURLSetupEntry.com/{i}"
+
+def get_test_html(i: int) -> str:
+    return f"<html><div>Test Push to Hugging Face URL Setup Entry {i}</div></html>"
+
+def generate_expected_outputs(
+    url_ids: list[int],
+    relevant: bool,
+    record_type_fine: RecordType,
+    record_type_coarse: RecordTypeCoarse
+) -> list[GetForLoadingToHuggingFaceOutput]:
+    results: list[GetForLoadingToHuggingFaceOutput] = []
+    for i in range(2):
+        output = GetForLoadingToHuggingFaceOutput(
+            url_id=url_ids[i],
+            url=get_test_url(i),
+            relevant=relevant,
+            record_type_fine=record_type_fine,
+            record_type_coarse=record_type_coarse,
+            html=get_test_html(i)
+        )
+        results.append(output)
+    return results
 
-ENTRIES = [
-        # Because pending, should not be picked up
-        Entry(
-            input=Input(
-                status=URLStatus.PENDING,
-                has_html_content=True,
-                record_type=RecordType.INCARCERATION_RECORDS
-            ),
-            expected_output=Output(
-                picked_up=False,
-            )
-        ),
-        # Because no html content, should not be picked up
-        Entry(
-            input=Input(
-                status=URLStatus.SUBMITTED,
-                has_html_content=False,
-                record_type=RecordType.RECORDS_REQUEST_INFO
-            ),
-            expected_output=Output(
-                picked_up=False,
-            )
-        ),
-        # Remainder should be picked up
-        Entry(
-            input=Input(
-                status=URLStatus.VALIDATED,
-                has_html_content=True,
-                record_type=RecordType.RECORDS_REQUEST_INFO
-            ),
-            expected_output=Output(
-                picked_up=True,
-                coarse_record_type=RecordTypeCoarse.AGENCY_PUBLISHED_RESOURCES,
-                relevant=True
-            )
-        ),
-        Entry(
-            input=Input(
-                status=URLStatus.SUBMITTED,
-                has_html_content=True,
-                record_type=RecordType.INCARCERATION_RECORDS
-            ),
-            expected_output=Output(
-                picked_up=True,
-                coarse_record_type=RecordTypeCoarse.JAILS_AND_COURTS,
-                relevant=True
-            )
-        ),
-        Entry(
-            input=Input(
-                status=URLStatus.NOT_RELEVANT,
-                has_html_content=True,
-                record_type=None
-            ),
-            expected_output=Output(
-                picked_up=True,
-                coarse_record_type=RecordTypeCoarse.NOT_RELEVANT,
-                relevant=False
-            )
-        ),
-]
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/enums.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/enums.py
new file mode 100644
index 00000000..0bb8cc87
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/enums.py
@@ -0,0 +1,7 @@
+from enum import Enum
+
+
+class PushToHuggingFaceTestSetupStatusEnum(Enum):
+    NOT_VALIDATED = "NOT_VALIDATED"
+    NOT_RELEVANT = "NOT_RELEVANT"
+    DATA_SOURCE = "DATA_SOURCE"
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/helper.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/helper.py
new file mode 100644
index 00000000..bbb40067
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/helper.py
@@ -0,0 +1,16 @@
+from src.db.client.async_ import AsyncDatabaseClient
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.input import \
+    TestPushToHuggingFaceURLSetupEntryInput
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.queries.setup import \
+    SetupTestPushToHuggingFaceEntryQueryBuilder
+
+
+async def setup_urls(
+    dbc: AsyncDatabaseClient,
+    inp: TestPushToHuggingFaceURLSetupEntryInput
+) -> list[int]:
+    # Set up 2 URLs
+    builder = SetupTestPushToHuggingFaceEntryQueryBuilder(inp)
+    return await dbc.run_query_builder(builder)
+
+
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/manager.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/manager.py
deleted file mode 100644
index d6438472..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/manager.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput
-from src.db.client.async_ import AsyncDatabaseClient
-from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.data import ENTRIES
-from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.record import \
-    TestPushToHuggingFaceRecordSetupRecord as Record, TestPushToHuggingFaceRecordSetupRecord
-from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.queries.setup import \
-    SetupTestPushToHuggingFaceEntryQueryBuilder
-
-
-class PushToHuggingFaceTestSetupManager:
-
-    def __init__(self, adb_client: AsyncDatabaseClient):
-        self.adb_client = adb_client
-        self.entries = ENTRIES
-        # Connects a URL ID to the expectation that it will be picked up
-        self._id_to_record: dict[int, TestPushToHuggingFaceRecordSetupRecord] = {}
-
-    async def setup(self) -> None:
-        records: list[Record] = await self.adb_client.run_query_builder(
-            SetupTestPushToHuggingFaceEntryQueryBuilder(self.entries)
-        )
-        for record in records:
-            if not record.expected_output.picked_up:
-                continue
-            self._id_to_record[record.url_id] = record
-
-    def check_results(self, outputs: list[GetForLoadingToHuggingFaceOutput]) -> None:
-        # Check that both expected and actual results are same length
-        length_expected = len(self._id_to_record.keys())
-        length_actual = len(outputs)
-        assert length_expected == length_actual, f"Expected {length_expected} results, got {length_actual}"
-
-        # Check attributes of each URL match what is expected
-        for output in outputs:
-            url_id = output.url_id
-            record = self._id_to_record[url_id]
-
-            expected_output = record.expected_output
-            assert output.relevant == expected_output.relevant
-            assert output.record_type_coarse == expected_output.coarse_record_type, \
-                f"Expected {expected_output.coarse_record_type} but got {output.record_type_coarse}"
-            assert output.record_type_fine == record.record_type_fine
-
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/entry.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/entry.py
deleted file mode 100644
index 16bb74aa..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/entry.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from pydantic import BaseModel
-
-from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.input import \
-    TestPushToHuggingFaceURLSetupEntryInput
-from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.output import \
-    TestPushToHuggingFaceURLSetupExpectedOutput
-
-
-class TestPushToHuggingFaceURLSetupEntry(BaseModel):
-    input: TestPushToHuggingFaceURLSetupEntryInput
-    expected_output: TestPushToHuggingFaceURLSetupExpectedOutput
-
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/input.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/input.py
index b5128375..2bdf21a5 100644
--- a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/input.py
+++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/input.py
@@ -1,10 +1,11 @@
 from pydantic import BaseModel
 
-from src.collectors.enums import URLStatus
 from src.core.enums import RecordType
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \
+    PushToHuggingFaceTestSetupStatusEnum
 
 
 class TestPushToHuggingFaceURLSetupEntryInput(BaseModel):
-    status: URLStatus
+    status: PushToHuggingFaceTestSetupStatusEnum
     record_type: RecordType | None
     has_html_content: bool
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/output.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/output.py
deleted file mode 100644
index 736bd97e..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/output.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from typing import Self
-
-from pydantic import BaseModel, model_validator
-
-from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse
-
-
-class TestPushToHuggingFaceURLSetupExpectedOutput(BaseModel):
-    picked_up: bool
-    relevant: bool | None = None
-    coarse_record_type: RecordTypeCoarse | None = None
-
-    @model_validator(mode='after')
-    def validate_coarse_record_type_and_relevant(self) -> Self:
-        if not self.picked_up:
-            return self
-        if self.coarse_record_type is None:
-            raise ValueError('Coarse record type should be provided if picked up')
-        if self.relevant is None:
-            raise ValueError('Relevant should be provided if picked up')
-        return self
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/record.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/record.py
deleted file mode 100644
index 4ce15770..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/record.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from pydantic import BaseModel
-
-from src.core.enums import RecordType
-from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.output import \
-    TestPushToHuggingFaceURLSetupExpectedOutput
-
-
-class TestPushToHuggingFaceRecordSetupRecord(BaseModel):
-    expected_output: TestPushToHuggingFaceURLSetupExpectedOutput
-    record_type_fine: RecordType | None
-    url_id: int
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/convert.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/convert.py
new file mode 100644
index 00000000..2fb5b2d0
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/convert.py
@@ -0,0 +1,14 @@
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \
+    PushToHuggingFaceTestSetupStatusEnum
+
+def convert_test_status_to_validated_status(
+    status: PushToHuggingFaceTestSetupStatusEnum
+) -> URLValidatedType:
+    match status:
+        case PushToHuggingFaceTestSetupStatusEnum.DATA_SOURCE:
+            return URLValidatedType.DATA_SOURCE
+        case PushToHuggingFaceTestSetupStatusEnum.NOT_RELEVANT:
+            return URLValidatedType.NOT_RELEVANT
+        case _:
+            raise ValueError(f"Invalid test status for function: {status}")
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py
index 8e01c86b..05b829df 100644
--- a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py
+++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py
@@ -1,57 +1,66 @@
 from sqlalchemy.ext.asyncio import AsyncSession
 
+from src.collectors.enums import URLStatus
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from src.db.models.impl.url.core.enums import URLSource
-from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML
 from src.db.models.impl.url.core.sqlalchemy import URL
+from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML
 from src.db.queries.base.builder import QueryBuilderBase
 from src.db.utils.compression import compress_html
-from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.entry import \
-    TestPushToHuggingFaceURLSetupEntry as Entry
-from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.record import \
-    TestPushToHuggingFaceRecordSetupRecord as Record
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.data import get_test_url, get_test_html
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \
+    PushToHuggingFaceTestSetupStatusEnum
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.input import \
+    TestPushToHuggingFaceURLSetupEntryInput
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.queries.convert import \
+    convert_test_status_to_validated_status
 
 
 class SetupTestPushToHuggingFaceEntryQueryBuilder(QueryBuilderBase):
 
     def __init__(
         self,
-        entries: list[Entry]
+        inp: TestPushToHuggingFaceURLSetupEntryInput
     ):
         super().__init__()
-        self.entries = entries
+        self.inp = inp
 
-    async def run(self, session: AsyncSession) -> list[Record]:
-        records = []
-        for idx, entry in enumerate(self.entries):
-            if idx % 2 == 0:
+    async def run(self, session: AsyncSession) -> list[int]:
+        url_ids: list[int] = []
+        for i in range(2):
+            if i % 2 == 0:
                 name = "Test Push to Hugging Face URL Setup Entry"
                 description = "This is a test push to Hugging Face URL setup entry"
             else:
                 name = None
                 description = None
-            inp = entry.input
             url = URL(
-                url=f"www.testPushToHuggingFaceURLSetupEntry.com/{idx}",
-                status=inp.status,
+                url=get_test_url(i),
+                status=URLStatus.OK,
                 name=name,
                 description=description,
-                record_type=inp.record_type,
+                record_type=self.inp.record_type,
                 source=URLSource.COLLECTOR
             )
             session.add(url)
             await session.flush()
-            if entry.input.has_html_content:
+            url_ids.append(url.id)
+            if self.inp.status in (
+                PushToHuggingFaceTestSetupStatusEnum.DATA_SOURCE,
+                PushToHuggingFaceTestSetupStatusEnum.NOT_RELEVANT
+            ):
+                flag = FlagURLValidated(
+                    url_id=url.id,
+                    type=convert_test_status_to_validated_status(self.inp.status),
+                )
+                session.add(flag)
+
+            if self.inp.has_html_content:
                 compressed_html = URLCompressedHTML(
                     url_id=url.id,
-                    compressed_html=compress_html(f"<html><div>Test Push to Hugging Face URL Setup Entry {idx}</div></html>"),
+                    compressed_html=compress_html(get_test_html(i)),
                 )
                 session.add(compressed_html)
-            record = Record(
-                url_id=url.id,
-                expected_output=entry.expected_output,
-                record_type_fine=inp.record_type
-            )
-            records.append(record)
 
-        return records
+        return url_ids
 
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_happy_path.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_happy_path.py
deleted file mode 100644
index d3c3e056..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_happy_path.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from unittest.mock import AsyncMock
-
-import pytest
-
-from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator
-from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput
-from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
-from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.manager import PushToHuggingFaceTestSetupManager
-from tests.helpers.data_creator.core import DBDataCreator
-
-
-@pytest.mark.asyncio
-async def test_happy_path(
-    operator: PushToHuggingFaceTaskOperator,
-    db_data_creator: DBDataCreator
-):
-    hf_client = operator.hf_client
-    push_function: AsyncMock = hf_client.push_data_sources_raw_to_hub
-
-    # Check, prior to adding URLs, that task does not run
-    task_info = await operator.run_task()
-    assert_task_ran_without_error(task_info)
-    push_function.assert_not_called()
-
-    # Add URLs
-    manager = PushToHuggingFaceTestSetupManager(adb_client=db_data_creator.adb_client)
-    await manager.setup()
-
-    # Run task
-    task_info = await operator.run_task()
-    assert_task_ran_without_error(task_info)
-    push_function.assert_called_once()
-
-    call_args: list[GetForLoadingToHuggingFaceOutput] = push_function.call_args.args[0]
-
-    # Check for calls to HF Client
-    manager.check_results(call_args)
-
-    # Test that after update, running again yields no results
-    task_info = await operator.run_task()
-    assert_task_ran_without_error(task_info)
-    push_function.assert_called_once()
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_no_html_content_not_picked_up.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_no_html_content_not_picked_up.py
new file mode 100644
index 00000000..25c4d09d
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_no_html_content_not_picked_up.py
@@ -0,0 +1,45 @@
+import pytest
+
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator
+from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse
+from src.db.client.async_ import AsyncDatabaseClient
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.check import check_not_called
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \
+    PushToHuggingFaceTestSetupStatusEnum
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.helper import setup_urls
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.input import \
+    TestPushToHuggingFaceURLSetupEntryInput
+from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
+
+
+@pytest.mark.asyncio
+async def test_huggingface_task_no_html_content_not_picked_up(
+    adb_client_test: AsyncDatabaseClient,
+    operator: PushToHuggingFaceTaskOperator
+):
+    record_type = RecordType.ACCIDENT_REPORTS
+
+    # Add URLs with no html content
+    inp = TestPushToHuggingFaceURLSetupEntryInput(
+        record_type=record_type,
+        status=PushToHuggingFaceTestSetupStatusEnum.DATA_SOURCE,
+        has_html_content=False
+    )
+    _ = await setup_urls(adb_client_test, inp=inp)
+
+    # Confirm task does not meet prerequisites
+    assert not await operator.meets_task_prerequisites()
+
+    # Run task as though it did meet prerequisites
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+
+    # Confirm task ran without error
+    assert_task_ran_without_error(run_info)
+
+    # Confirm task still does not meet prerequisites
+    assert not await operator.meets_task_prerequisites()
+
+    # Confirm no URLs were picked up
+    check_not_called(operator)
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_not_relevant_picked_up.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_not_relevant_picked_up.py
new file mode 100644
index 00000000..b4abc0ee
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_not_relevant_picked_up.py
@@ -0,0 +1,58 @@
+import pytest
+
+from src.collectors.enums import URLStatus
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator
+from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse
+from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput
+from src.db.client.async_ import AsyncDatabaseClient
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.check import check_results_called
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.data import generate_expected_outputs
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.helper import setup_urls
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.input import \
+    TestPushToHuggingFaceURLSetupEntryInput
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \
+    PushToHuggingFaceTestSetupStatusEnum
+from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
+
+
+@pytest.mark.asyncio
+async def test_huggingface_task_not_relevant_picked_up(
+    adb_client_test: AsyncDatabaseClient,
+    operator: PushToHuggingFaceTaskOperator
+):
+    record_type = RecordType.COMPLAINTS_AND_MISCONDUCT
+    rt_coarse = RecordTypeCoarse.INFO_ABOUT_OFFICERS
+
+    # Add URLs with not relevant status
+    inp = TestPushToHuggingFaceURLSetupEntryInput(
+        record_type=record_type,
+        status=PushToHuggingFaceTestSetupStatusEnum.NOT_RELEVANT,
+        has_html_content=True
+    )
+    url_ids: list[int] = await setup_urls(adb_client_test, inp=inp)
+
+    # Confirm task meets prerequisites
+    assert await operator.meets_task_prerequisites()
+
+    # Run task
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+
+    # Confirm task ran without error
+    assert_task_ran_without_error(run_info)
+
+    # Confirm task no longer meets prerequisites
+    assert not await operator.meets_task_prerequisites()
+
+    # Confirm expected URLs picked up
+    expected_outputs: list[GetForLoadingToHuggingFaceOutput] = generate_expected_outputs(
+        url_ids=url_ids,
+        relevant=False,
+        record_type_fine=record_type,
+        record_type_coarse=rt_coarse,
+    )
+    check_results_called(
+        operator=operator,
+        expected_outputs=expected_outputs,
+    )
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_not_validated_not_picked_up.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_not_validated_not_picked_up.py
new file mode 100644
index 00000000..8fa07928
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_not_validated_not_picked_up.py
@@ -0,0 +1,44 @@
+import pytest
+
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator
+from src.db.client.async_ import AsyncDatabaseClient
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.check import check_not_called
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \
+    PushToHuggingFaceTestSetupStatusEnum
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.helper import setup_urls
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.input import \
+    TestPushToHuggingFaceURLSetupEntryInput
+from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
+
+
+@pytest.mark.asyncio
+async def test_huggingface_task_not_validated_not_picked_up(
+    adb_client_test: AsyncDatabaseClient,
+    operator: PushToHuggingFaceTaskOperator
+):
+    record_type = RecordType.COURT_CASES
+
+    # Add URLs with pending status
+    inp = TestPushToHuggingFaceURLSetupEntryInput(
+        record_type=record_type,
+        status=PushToHuggingFaceTestSetupStatusEnum.NOT_VALIDATED,
+        has_html_content=True
+    )
+    _ = await setup_urls(adb_client_test, inp=inp)
+
+    # Confirm task doesn't meet prerequisites
+    assert not await operator.meets_task_prerequisites()
+
+    # Run task as though it did meet prerequisites
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+
+    # Confirm task ran without error
+    assert_task_ran_without_error(run_info)
+
+    # Confirm task still doesn't meet prerequisites
+    assert not await operator.meets_task_prerequisites()
+
+    # Confirm pending URL not picked up
+    check_not_called(operator)
diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_validated_picked_up.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_validated_picked_up.py
new file mode 100644
index 00000000..4ca89aa1
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_validated_picked_up.py
@@ -0,0 +1,60 @@
+import pytest
+
+from src.collectors.enums import URLStatus
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator
+from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse
+from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput
+from src.db.client.async_ import AsyncDatabaseClient
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.check import check_results_called
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.data import generate_expected_outputs
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.helper import setup_urls
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.input import \
+    TestPushToHuggingFaceURLSetupEntryInput
+from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \
+    PushToHuggingFaceTestSetupStatusEnum
+from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
+
+
+@pytest.mark.asyncio
+async def test_huggingface_task_validated_picked_up(
+    adb_client_test: AsyncDatabaseClient,
+    operator: PushToHuggingFaceTaskOperator
+):
+    record_type = RecordType.GEOGRAPHIC
+    rt_coarse = RecordTypeCoarse.INFO_ABOUT_AGENCIES
+
+    # Add URLs with validated status
+    inp = TestPushToHuggingFaceURLSetupEntryInput(
+        record_type=record_type,
+        status=PushToHuggingFaceTestSetupStatusEnum.DATA_SOURCE,
+        has_html_content=True
+    )
+    url_ids: list[int] = await setup_urls(adb_client_test, inp=inp)
+
+    # Confirm task meets prerequisites
+    assert await operator.meets_task_prerequisites()
+
+    # Run task
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+
+    # Confirm task ran without error
+    assert_task_ran_without_error(run_info)
+
+    # Confirm task no longer meets prerequisites
+    assert not await operator.meets_task_prerequisites()
+
+    # Confirm URLs picked up
+    # Confirm expected URLs picked up
+    expected_outputs: list[GetForLoadingToHuggingFaceOutput] = generate_expected_outputs(
+        url_ids=url_ids,
+        relevant=True,
+        record_type_fine=record_type,
+        record_type_coarse=rt_coarse,
+    )
+    check_results_called(
+        operator=operator,
+        expected_outputs=expected_outputs,
+    )
+
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/agency/conftest.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/conftest.py
index 5b0539e7..85b9f1bc 100644
--- a/tests/automated/integration/tasks/scheduled/impl/sync/agency/conftest.py
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/conftest.py
@@ -1,20 +1,30 @@
 import pytest_asyncio
 
 from src.core.tasks.scheduled.impl.sync.agency.operator import SyncAgenciesTaskOperator
+from src.db.client.async_ import AsyncDatabaseClient
+from src.external.pdap.client import PDAPClient
 from tests.automated.integration.tasks.scheduled.impl.sync.agency.helpers import update_existing_agencies_updated_at, \
     add_existing_agencies
 
+
+@pytest_asyncio.fixture
+async def operator(
+    adb_client_test: AsyncDatabaseClient,
+    mock_pdap_client: PDAPClient
+) -> SyncAgenciesTaskOperator:
+    return SyncAgenciesTaskOperator(
+        adb_client=adb_client_test,
+        pdap_client=mock_pdap_client
+    )
+
 @pytest_asyncio.fixture
 async def setup(
     db_data_creator,
-    mock_pdap_client
+    operator
 ) -> SyncAgenciesTaskOperator:
     await add_existing_agencies(db_data_creator)
     await update_existing_agencies_updated_at(db_data_creator)
 
-    return SyncAgenciesTaskOperator(
-        adb_client=db_data_creator.adb_client,
-        pdap_client=mock_pdap_client
-    )
+    return operator
 
 
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/agency/setup/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/setup/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/agency/setup/core.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/setup/core.py
new file mode 100644
index 00000000..cb84b014
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/setup/core.py
@@ -0,0 +1,53 @@
+from contextlib import contextmanager
+from datetime import timedelta, datetime
+from unittest.mock import patch, AsyncMock
+
+from src.core.enums import RecordType
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.external.pdap.client import PDAPClient
+from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInfo, AgenciesSyncResponseInnerInfo
+from tests.helpers.data_creator.core import DBDataCreator
+from tests.helpers.simple_test_data_functions import generate_test_name
+
+
+def set_up_mock_pdap_client_responses(
+    mock_pdap_client: PDAPClient,
+    responses: list[AgenciesSyncResponseInfo | Exception]
+) -> None:
+    """
+    Modifies:
+    - pdap_client.sync_agencies
+    """
+    mock_sync_agencies = AsyncMock(
+        side_effect=responses + [AgenciesSyncResponseInfo(agencies=[])]
+    )
+    mock_pdap_client.sync_agencies = mock_sync_agencies
+
+async def set_up_urls(
+    db_data_creator: DBDataCreator,
+    record_type: RecordType,
+    validated_type: URLValidatedType | None = None,
+    agency_ids: list[int] | None = None,
+) -> list[int]:
+    """Create 2 Test URLs in database."""
+    url_ids: list[int] = await db_data_creator.create_urls(record_type=record_type, count=2)
+    if validated_type is not None:
+        await db_data_creator.create_validated_flags(url_ids=url_ids, validation_type=validated_type)
+    if agency_ids is not None:
+        await db_data_creator.create_url_agency_links(url_ids=url_ids, agency_ids=agency_ids)
+    return url_ids
+
+def set_up_sync_response_info(
+    agency_id: int,
+    meta_urls: list[str],
+) -> AgenciesSyncResponseInfo:
+    yesterday = datetime.now() - timedelta(days=1)
+    return AgenciesSyncResponseInfo(agencies=[AgenciesSyncResponseInnerInfo(
+        agency_id=agency_id,
+        meta_urls=meta_urls,
+        updated_at=yesterday,
+        state_name=None,
+        county_name=None,
+        locality_name=None,
+        display_name=generate_test_name(agency_id)
+    )])
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_ds_url_in_db_not_sync.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_ds_url_in_db_not_sync.py
new file mode 100644
index 00000000..42384615
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_ds_url_in_db_not_sync.py
@@ -0,0 +1,90 @@
+import pytest
+
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.scheduled.impl.sync.agency.operator import SyncAgenciesTaskOperator
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.agency.sqlalchemy import Agency
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInfo
+from tests.automated.integration.tasks.scheduled.impl.sync.agency.helpers import check_sync_concluded
+from tests.automated.integration.tasks.scheduled.impl.sync.agency.setup.core import set_up_sync_response_info, \
+    set_up_mock_pdap_client_responses
+from tests.helpers.asserts import assert_task_run_success
+from tests.helpers.data_creator.core import DBDataCreator
+
+
+@pytest.mark.asyncio
+async def test_data_sources_url_in_db_not_meta_url_sync(
+    wiped_database,
+    operator: SyncAgenciesTaskOperator,
+    db_data_creator: DBDataCreator
+):
+    """
+    In an Agency Sync, a URL validated as a Data Source linked to the agency
+    should be untouched if the URL is not in the sync response.
+    """
+    db_client: AsyncDatabaseClient = operator.adb_client
+
+    agency_id: int = 1
+
+    # Create agency
+    await db_data_creator.create_agency(agency_id)
+
+    # Set up sync response with new meta URL
+    sync_response: AgenciesSyncResponseInfo = set_up_sync_response_info(
+        agency_id=agency_id,
+        meta_urls=[
+            "https://example.com/meta-url-1",
+        ]
+    )
+
+    # Create additional URL Validated as data source and link to agency
+    ds_url_mapping: URLMapping = (await db_data_creator.create_validated_urls(
+        validation_type=URLValidatedType.DATA_SOURCE,
+        record_type=RecordType.ACCIDENT_REPORTS
+    ))[0]
+    ds_url_id: int = ds_url_mapping.url_id
+    await db_data_creator.create_url_agency_links(
+        url_ids=[ds_url_id],
+        agency_ids=[agency_id]
+    )
+
+    set_up_mock_pdap_client_responses(operator.pdap_client, [sync_response])
+
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+    assert_task_run_success(run_info)
+
+    await check_sync_concluded(db_client)
+
+    # Confirm one agency in the database
+    agencies: list[Agency] = await db_client.get_all(Agency)
+    assert len(agencies) == 1
+
+    # Confirm 2 URLs in database
+    urls: list[URL] = await db_client.get_all(URL)
+    assert len(urls) == 2
+    assert set(url.record_type for url in urls) == {
+        RecordType.CONTACT_INFO_AND_AGENCY_META,
+        RecordType.ACCIDENT_REPORTS
+    }
+
+    # Confirm 2 Agency-URL Links
+    links: list[LinkURLAgency] = await db_client.get_all(LinkURLAgency)
+    assert len(links) == 2
+    assert all(link.agency_id == 1 for link in links)
+    assert set(link.url_id for link in links) == set(url.id for url in urls)
+
+    # Confirm 2 Validated Flags with different Validation Types
+    flags: list[FlagURLValidated] = await db_client.get_all(FlagURLValidated)
+    assert len(flags) == 2
+    assert set(flag.type for flag in flags) == {
+        URLValidatedType.META_URL,
+        URLValidatedType.DATA_SOURCE
+    }
+    assert set(flag.url_id for flag in flags) == set(url.id for url in urls)
+
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_interruption.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_interruption.py
index bf4ff81e..80b338db 100644
--- a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_interruption.py
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_interruption.py
@@ -22,15 +22,12 @@ async def test_agency_sync_interruption(
     operator = setup
     db_client = operator.adb_client
 
-
-
     with patch_sync_agencies(
         [FIRST_CALL_RESPONSE, ValueError("test error")]
     ):
         run_info = await operator.run_task()
         assert run_info.outcome == TaskOperatorOutcome.ERROR, run_info.message
 
-
     # Get current updated_ats from database for the 5 recently updated
     query = (
         select(
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_meta_url_in_db_not_sync.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_meta_url_in_db_not_sync.py
new file mode 100644
index 00000000..9db57ec7
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_meta_url_in_db_not_sync.py
@@ -0,0 +1,78 @@
+import pytest
+
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.scheduled.impl.sync.agency.operator import SyncAgenciesTaskOperator
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.agency.sqlalchemy import Agency
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInfo
+from tests.automated.integration.tasks.scheduled.impl.sync.agency.helpers import check_sync_concluded
+from tests.automated.integration.tasks.scheduled.impl.sync.agency.setup.core import set_up_sync_response_info, \
+    set_up_mock_pdap_client_responses
+from tests.helpers.asserts import assert_task_run_success
+from tests.helpers.data_creator.core import DBDataCreator
+
+
+@pytest.mark.asyncio
+async def test_meta_url_in_db_not_sync(
+    wiped_database,
+    operator: SyncAgenciesTaskOperator,
+    db_data_creator: DBDataCreator
+):
+    """
+    In an Agency Sync, a URL in the DB validated as a Meta URL linked to the agency
+    but not included in the most recent sync response should be removed as a link
+    """
+    db_client: AsyncDatabaseClient = operator.adb_client
+
+    # Create Meta URL and link to Agency
+    agency_id: int = 1
+    await db_data_creator.create_agency(agency_id)
+    meta_url_mapping: URLMapping = (await db_data_creator.create_validated_urls(
+        validation_type=URLValidatedType.META_URL,
+        record_type=RecordType.CONTACT_INFO_AND_AGENCY_META
+    ))[0]
+    meta_url_id: int = meta_url_mapping.url_id
+    await db_data_creator.create_url_agency_links(
+        url_ids=[meta_url_id],
+        agency_ids=[agency_id]
+    )
+
+    # Create Sync Response for agency with no Meta URLs
+    sync_response: AgenciesSyncResponseInfo = set_up_sync_response_info(
+        agency_id=agency_id,
+        meta_urls=[]
+    )
+
+    set_up_mock_pdap_client_responses(operator.pdap_client, [sync_response])
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+    assert_task_run_success(run_info)
+
+    await check_sync_concluded(db_client)
+
+    # Confirm one agency in the database
+    agencies: list[Agency] = await db_client.get_all(Agency)
+    assert len(agencies) == 1
+
+    # Confirm 1 URL in database
+    urls: list[URL] = await db_client.get_all(URL)
+    assert len(urls) == 1
+    assert all(url.record_type == RecordType.CONTACT_INFO_AND_AGENCY_META for url in urls)
+
+    # Confirm no Agency-URL Links
+    links: list[LinkURLAgency] = await db_client.get_all(LinkURLAgency)
+    assert len(links) == 0
+
+    # Confirm 1 Validated Flag
+    flags: list[FlagURLValidated] = await db_client.get_all(FlagURLValidated)
+    assert len(flags) == 1
+    assert all(flag.type == URLValidatedType.META_URL for flag in flags)
+    assert all(flag.url_id == meta_url_id for flag in flags)
+
+
+
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_happy_path.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_no_meta_urls.py
similarity index 95%
rename from tests/automated/integration/tasks/scheduled/impl/sync/agency/test_happy_path.py
rename to tests/automated/integration/tasks/scheduled/impl/sync/agency/test_no_meta_urls.py
index d783b5cb..772139f4 100644
--- a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_happy_path.py
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_no_meta_urls.py
@@ -17,6 +17,9 @@ async def test_agency_sync_happy_path(
     wiped_database,
     setup: SyncAgenciesTaskOperator
 ):
+    """
+    Test behavior of Agency sync where no meta URLs are returned.
+    """
     operator = setup
     db_client = operator.adb_client
 
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_same_meta_url_diff_agency.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_same_meta_url_diff_agency.py
new file mode 100644
index 00000000..9a0e920b
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_same_meta_url_diff_agency.py
@@ -0,0 +1,77 @@
+import pytest
+
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.scheduled.impl.sync.agency.operator import SyncAgenciesTaskOperator
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.agency.sqlalchemy import Agency
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInfo
+from tests.automated.integration.tasks.scheduled.impl.sync.agency.helpers import check_sync_concluded
+from tests.automated.integration.tasks.scheduled.impl.sync.agency.setup.core import set_up_sync_response_info, \
+    set_up_mock_pdap_client_responses
+from tests.helpers.asserts import assert_task_run_success
+from tests.helpers.data_creator.core import DBDataCreator
+
+
+@pytest.mark.asyncio
+async def test_same_meta_url_diff_agency(
+    wiped_database,
+    operator: SyncAgenciesTaskOperator,
+    db_data_creator: DBDataCreator
+):
+    """
+    Test that, in the case of a Meta URL already linked with one agency in the DB and
+    a new sync response with the same Meta URL but linked to a different agency,
+    the link to the original agency should be untouched while the link to the new agency
+    should be added.
+    """
+    db_client: AsyncDatabaseClient = operator.adb_client
+    existing_agency_id: int = 1
+
+    await db_data_creator.create_agency(existing_agency_id)
+    meta_url_mapping: URLMapping = (await db_data_creator.create_validated_urls(
+        validation_type=URLValidatedType.META_URL,
+        record_type=RecordType.CONTACT_INFO_AND_AGENCY_META
+    ))[0]
+    meta_url_id: int = meta_url_mapping.url_id
+    await db_data_creator.create_url_agency_links(
+        url_ids=[meta_url_id],
+        agency_ids=[existing_agency_id]
+    )
+
+    new_agency_id: int = 2
+    meta_url: str = meta_url_mapping.url
+    sync_response: AgenciesSyncResponseInfo = set_up_sync_response_info(
+        agency_id=new_agency_id,
+        meta_urls=[meta_url]
+    )
+
+    set_up_mock_pdap_client_responses(operator.pdap_client, [sync_response])
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+    assert_task_run_success(run_info)
+
+    await check_sync_concluded(db_client)
+
+    # Confirm two agencies in the database
+    agencies: list[Agency] = await db_client.get_all(Agency)
+    assert len(agencies) == 2
+
+    # Confirm 1 URL in database
+    urls: list[URL] = await db_client.get_all(URL)
+    assert len(urls) == 1
+    assert all(url.record_type == RecordType.CONTACT_INFO_AND_AGENCY_META for url in urls)
+
+    # Confirm 2 Agency-URL Links
+    links: list[LinkURLAgency] = await db_client.get_all(LinkURLAgency)
+    assert len(links) == 2
+
+    # Confirm 2 Validated Flag
+    flags: list[FlagURLValidated] = await db_client.get_all(FlagURLValidated)
+    assert len(flags) == 1
+    assert all(flag.type == URLValidatedType.META_URL for flag in flags)
+    assert all(flag.url_id == meta_url_id for flag in flags)
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_with_meta_url_not_in_database.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_with_meta_url_not_in_database.py
new file mode 100644
index 00000000..13a8eb20
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_with_meta_url_not_in_database.py
@@ -0,0 +1,67 @@
+import pytest
+
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.scheduled.impl.sync.agency.operator import SyncAgenciesTaskOperator
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.agency.sqlalchemy import Agency
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInnerInfo, AgenciesSyncResponseInfo
+from tests.automated.integration.tasks.scheduled.impl.sync.agency.helpers import patch_sync_agencies, \
+    check_sync_concluded
+from tests.automated.integration.tasks.scheduled.impl.sync.agency.setup.core import set_up_sync_response_info, \
+    set_up_mock_pdap_client_responses
+from tests.helpers.asserts import assert_task_run_success
+
+
+@pytest.mark.asyncio
+async def test_with_meta_url_not_in_database(
+    wiped_database,
+    operator: SyncAgenciesTaskOperator
+):
+    """
+    In an Agency Sync, a Meta URL included in the sync response
+    but not present in the DB should be added to the DB with:
+    - The URLValidationFlag set to `Meta URL`
+    - The Record Type set to `Contact Info and Agency Meta`
+    - The link to the agency added
+    """
+    db_client: AsyncDatabaseClient = operator.adb_client
+
+    sync_response: AgenciesSyncResponseInfo = set_up_sync_response_info(
+        agency_id=1,
+        meta_urls=[
+            "https://example.com/meta-url-1",
+            "https://example.com/meta-url-2",
+        ]
+    )
+
+    set_up_mock_pdap_client_responses(operator.pdap_client, [sync_response])
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+    assert_task_run_success(run_info)
+
+    await check_sync_concluded(db_client)
+
+    # Confirm one agency in the database
+    agencies: list[Agency] = await db_client.get_all(Agency)
+    assert len(agencies) == 1
+
+    # Confirm 2 URLs in database
+    urls: list[URL] = await db_client.get_all(URL)
+    assert len(urls) == 2
+    assert all(url.record_type == RecordType.CONTACT_INFO_AND_AGENCY_META for url in urls)
+
+    # Confirm 2 Agency-URL Links
+    links: list[LinkURLAgency] = await db_client.get_all(LinkURLAgency)
+    assert len(links) == 2
+    assert all(link.agency_id == 1 for link in links)
+    assert set(link.url_id for link in links) == set(url.id for url in urls)
+
+    # Confirm 2 Validated Flags
+    flags: list[FlagURLValidated] = await db_client.get_all(FlagURLValidated)
+    assert len(flags) == 2
+    assert all(flag.type == URLValidatedType.META_URL for flag in flags)
+    assert set(flag.url_id for flag in flags) == set(url.id for url in urls)
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/check.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/check.py
index 12428d7d..dcc1fc23 100644
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/check.py
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/check.py
@@ -1,4 +1,4 @@
-from datetime import timedelta
+from datetime import timedelta, datetime
 
 from sqlalchemy import select, cast, func, TIMESTAMP
 
@@ -9,14 +9,9 @@
 
 async def check_sync_concluded(
     db_client: AsyncDatabaseClient,
+    current_db_datetime: datetime,
     check_updated_at: bool = True
-):
-
-    current_db_datetime = await db_client.scalar(
-        select(
-            cast(func.now(), TIMESTAMP)
-        )
-    )
+) -> None:
 
     sync_state_results = await db_client.scalar(
         select(
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/conftest.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/conftest.py
index 44239db8..e91461ea 100644
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/conftest.py
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/conftest.py
@@ -1,12 +1,16 @@
+from datetime import datetime
+
 import pytest_asyncio
 
 from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.agency.sqlalchemy import Agency
 from src.external.pdap.client import PDAPClient
 from tests.helpers.data_creator.core import DBDataCreator
 
 
 @pytest_asyncio.fixture
-async def test_operator(
+async def operator(
     db_data_creator: DBDataCreator,
     mock_pdap_client: PDAPClient
 ) -> SyncDataSourcesTaskOperator:
@@ -14,3 +18,30 @@ async def test_operator(
         adb_client=db_data_creator.adb_client,
         pdap_client=mock_pdap_client
     )
+
+@pytest_asyncio.fixture
+async def current_db_time(
+    adb_client_test: AsyncDatabaseClient
+) -> datetime:
+    return (await adb_client_test.get_current_database_time()).replace(tzinfo=None)
+
+
+@pytest_asyncio.fixture
+async def agency_ids(
+    adb_client_test: AsyncDatabaseClient
+) -> list[int]:
+    """Creates and returns the ids of 4 agencies"""
+    agencies: list[Agency] = []
+    agency_ids: list[int] = []
+    for i in range(4):
+        agency = Agency(
+            agency_id=i,
+            name=f"Test Agency {i}",
+            state="test_state",
+            county="test_county",
+            locality="test_locality"
+        )
+        agency_ids.append(i)
+        agencies.append(agency)
+    await adb_client_test.add_all(agencies)
+    return agency_ids
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/existence_checker.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/existence_checker.py
deleted file mode 100644
index 4007c38d..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/existence_checker.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from collections import defaultdict
-
-from src.db.models.impl.link.url_agency_.sqlalchemy import LinkURLAgency
-from src.db.models.impl.url.core.sqlalchemy import URL
-from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource
-from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInfo, DataSourcesSyncResponseInnerInfo
-
-
-class URLExistenceChecker:
-
-    def __init__(
-        self,
-        responses: list[DataSourcesSyncResponseInfo],
-        url_ds_links: list[URLDataSource],
-        url_agency_links: list[LinkURLAgency]
-    ):
-        self._ds_id_response_dict: dict[int, DataSourcesSyncResponseInnerInfo] = {}
-        for response in responses:
-            for data_source in response.data_sources:
-                self._ds_id_response_dict[data_source.id] = data_source
-        self._ds_id_url_link_dict = {}
-        for link in url_ds_links:
-            self._ds_id_url_link_dict[link.data_source_id] = link.url_id
-        self._url_id_agency_link_dict = defaultdict(list)
-        for link in url_agency_links:
-            self._url_id_agency_link_dict[link.url_id].append(link.agency_id)
-
-
-    def check(self, url: URL):
-        ds_id = self._ds_id_url_link_dict.get(url.id)
-        if ds_id is None:
-            raise AssertionError(f"URL {url.id} has no data source link")
-        response = self._ds_id_response_dict.get(ds_id)
-        if response is None:
-            raise AssertionError(f"Data source {ds_id} has no response")
-
-        assert response.url == url.url
-        assert response.description == url.description
-        assert response.name == url.name
-
-        agency_ids = self._url_id_agency_link_dict.get(url.id)
-        assert set(response.agency_ids) == set(agency_ids)
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/core.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/core.py
index 932d2518..f7cd3337 100644
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/core.py
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/core.py
@@ -1,7 +1,17 @@
 from contextlib import contextmanager
-from unittest.mock import patch
+from datetime import datetime, timedelta
+from unittest.mock import patch, create_autospec, AsyncMock
 
+from src.collectors.enums import URLStatus
+from src.core.enums import RecordType
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
 from src.external.pdap.client import PDAPClient
+from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInfo, DataSourcesSyncResponseInnerInfo
+from src.external.pdap.enums import ApprovalStatus, DataSourcesURLStatus
+from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.queries.url_.url import \
+    TestDataSourcesSyncURLSetupQueryBuilder
+from tests.helpers.simple_test_data_functions import generate_test_url
 
 
 @contextmanager
@@ -11,4 +21,68 @@ def patch_sync_data_sources(side_effects: list):
         "sync_data_sources",
         side_effect=side_effects
     ):
-        yield
\ No newline at end of file
+        yield
+
+
+
+def set_up_mock_pdap_client_responses(
+    mock_pdap_client: PDAPClient,
+    responses: list[DataSourcesSyncResponseInfo | Exception]
+) -> None:
+    """
+    Modifies:
+    - pdap_client.sync_data_sources
+    """
+    mock_sync_data_sources = AsyncMock(
+        side_effect=responses + [DataSourcesSyncResponseInfo(data_sources=[])]
+    )
+    mock_pdap_client.sync_data_sources = mock_sync_data_sources
+
+async def set_up_urls(
+    adb_client: AsyncDatabaseClient,
+    record_type: RecordType,
+    validated_type: URLValidatedType | None = None,
+    previously_synced: bool = False,
+) -> list[int]:
+    """Creates 2 test URLs."""
+
+    builder = TestDataSourcesSyncURLSetupQueryBuilder(
+        record_type=record_type,
+        validated_type=validated_type,
+        previously_synced=previously_synced,
+    )
+
+    return await adb_client.run_query_builder(builder)
+
+def _generate_test_data_source_name(i: int) -> str:
+    return f"Test Data Source {i}"
+
+def _generate_test_data_source_description(i: int) -> str:
+    return f"Test Data Source Description {i}"
+
+def set_up_sync_response_info(
+    ids: list[int],
+    record_type: RecordType,
+    agency_ids: list[int],
+    approval_status: ApprovalStatus,
+    ds_url_status: DataSourcesURLStatus,
+) -> DataSourcesSyncResponseInfo:
+    yesterday = datetime.now() - timedelta(days=1)
+    inner_info_list: list[DataSourcesSyncResponseInnerInfo] = []
+    for id_ in ids:
+        inner_info_list.append(
+            DataSourcesSyncResponseInnerInfo(
+                id=id_,
+                url=generate_test_url(id_),
+                name=_generate_test_data_source_name(id_),
+                description=_generate_test_data_source_description(id_),
+                record_type=record_type,
+                agency_ids=agency_ids,
+                approval_status=approval_status,
+                url_status=ds_url_status,
+                updated_at=yesterday,
+            )
+        )
+    return DataSourcesSyncResponseInfo(
+        data_sources=inner_info_list,
+    )
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/data.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/data.py
deleted file mode 100644
index e4094b38..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/data.py
+++ /dev/null
@@ -1,100 +0,0 @@
-from src.collectors.enums import URLStatus
-from src.core.enums import RecordType
-from src.external.pdap.enums import DataSourcesURLStatus, ApprovalStatus
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.data_sources import TestDSURLSetupEntry
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import SyncResponseOrder, AgencyAssigned
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.source_collector import TestSCURLSetupEntry
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.core import TestURLSetupEntry
-
-ENTRIES = [
-    TestURLSetupEntry(
-        # A URL in both DBs that should be overwritten
-        url='https://example.com/1',
-        ds_info=TestDSURLSetupEntry(
-            id=100,
-            name='Overwritten URL 1 Name',
-            description='Overwritten URL 1 Description',
-            url_status=DataSourcesURLStatus.OK,
-            approval_status=ApprovalStatus.APPROVED,
-            record_type=RecordType.ACCIDENT_REPORTS,
-            agencies_assigned=[AgencyAssigned.ONE, AgencyAssigned.TWO],
-            sync_response_order=SyncResponseOrder.FIRST
-        ),
-        sc_info=TestSCURLSetupEntry(
-            name='Pre-existing URL 1 Name',
-            description='Pre-existing URL 1 Description',
-            record_type=RecordType.ACCIDENT_REPORTS,
-            url_status=URLStatus.PENDING,
-            agencies_assigned=[AgencyAssigned.ONE, AgencyAssigned.THREE]
-        ),
-        final_url_status=URLStatus.SUBMITTED
-    ),
-    TestURLSetupEntry(
-        # A DS-only approved but broken URL
-        url='https://example.com/2',
-        ds_info=TestDSURLSetupEntry(
-            id=101,
-            name='New URL 2 Name',
-            description='New URL 2 Description',
-            url_status=DataSourcesURLStatus.BROKEN,
-            approval_status=ApprovalStatus.APPROVED,
-            record_type=RecordType.INCARCERATION_RECORDS,
-            agencies_assigned=[AgencyAssigned.TWO],
-            sync_response_order=SyncResponseOrder.FIRST
-        ),
-        sc_info=None,
-        final_url_status=URLStatus.NOT_FOUND
-    ),
-    TestURLSetupEntry(
-        # An SC-only pending URL, should be unchanged.
-        url='https://example.com/3',
-        ds_info=None,
-        sc_info=TestSCURLSetupEntry(
-            name='Pre-existing URL 3 Name',
-            description='Pre-existing URL 3 Description',
-            record_type=RecordType.FIELD_CONTACTS,
-            url_status=URLStatus.PENDING,
-            agencies_assigned=[AgencyAssigned.ONE, AgencyAssigned.THREE]
-        ),
-        final_url_status=URLStatus.PENDING
-    ),
-    TestURLSetupEntry(
-        # A DS-only rejected URL
-        url='https://example.com/4',
-        ds_info=TestDSURLSetupEntry(
-            id=102,
-            name='New URL 4 Name',
-            description=None,
-            url_status=DataSourcesURLStatus.OK,
-            approval_status=ApprovalStatus.REJECTED,
-            record_type=RecordType.ACCIDENT_REPORTS,
-            agencies_assigned=[AgencyAssigned.ONE],
-            sync_response_order=SyncResponseOrder.FIRST
-        ),
-        sc_info=None,
-        final_url_status=URLStatus.NOT_RELEVANT
-    ),
-    TestURLSetupEntry(
-        # A pre-existing URL in the second response
-        url='https://example.com/5',
-        ds_info=TestDSURLSetupEntry(
-            id=103,
-            name='New URL 5 Name',
-            description=None,
-            url_status=DataSourcesURLStatus.OK,
-            approval_status=ApprovalStatus.APPROVED,
-            record_type=RecordType.INCARCERATION_RECORDS,
-            agencies_assigned=[AgencyAssigned.ONE],
-            sync_response_order=SyncResponseOrder.SECOND
-        ),
-        sc_info=TestSCURLSetupEntry(
-            name='Pre-existing URL 5 Name',
-            description='Pre-existing URL 5 Description',
-            record_type=None,
-            url_status=URLStatus.PENDING,
-            agencies_assigned=[]
-        ),
-        final_url_status=URLStatus.SUBMITTED
-    )
-]
-
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/enums.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/enums.py
deleted file mode 100644
index fd1e1da2..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/enums.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from enum import Enum
-
-
-class SyncResponseOrder(Enum):
-    """Represents which sync response the entry is in."""
-    FIRST = 1
-    SECOND = 2
-    # No entries should be in 3
-    THIRD = 3
-
-
-class AgencyAssigned(Enum):
-    """Represents which of several pre-created agencies the entry is assigned to."""
-    ONE = 1
-    TWO = 2
-    THREE = 3
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/agency.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/agency.py
deleted file mode 100644
index 0321aec9..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/agency.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from sqlalchemy import select
-
-from src.db.client.async_ import AsyncDatabaseClient
-from src.db.models.impl.agency.sqlalchemy import Agency
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import AgencyAssigned
-
-
-class AgencyAssignmentManager:
-
-    def __init__(self, adb_client: AsyncDatabaseClient):
-        self.adb_client = adb_client
-        self._dict: dict[AgencyAssigned, int] = {}
-
-    async def setup(self):
-        agencies = []
-        for ag_enum in AgencyAssigned:
-            agency = Agency(
-                agency_id=ag_enum.value,
-                name=f"Test Agency {ag_enum.name}",
-                state="test_state",
-                county="test_county",
-                locality="test_locality"
-            )
-            agencies.append(agency)
-        await self.adb_client.add_all(agencies)
-        agency_ids = await self.adb_client.scalars(select(Agency.agency_id))
-        for ag_enum, agency_id in zip(AgencyAssigned, agency_ids):
-            self._dict[ag_enum] = agency_id
-
-    async def get(self, ag_enum: AgencyAssigned) -> int:
-        return self._dict[ag_enum]
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/core.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/core.py
deleted file mode 100644
index 8f1ab8fa..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/core.py
+++ /dev/null
@@ -1,111 +0,0 @@
-from collections import defaultdict
-
-from src.db.client.async_ import AsyncDatabaseClient
-from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInnerInfo, DataSourcesSyncResponseInfo
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import SyncResponseOrder
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.manager.agency import AgencyAssignmentManager
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.manager.queries.check import \
-    CheckURLQueryBuilder
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.manager.url import URLSetupFunctor
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.core import TestURLSetupEntry
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.post import TestURLPostSetupRecord
-
-
-class DataSourcesSyncTestSetupManager:
-
-    def __init__(
-        self,
-        adb_client: AsyncDatabaseClient,
-        entries: list[TestURLSetupEntry],
-    ):
-        self.adb_client = adb_client
-        self.entries = entries
-        self.agency_assignment_manager = AgencyAssignmentManager(self.adb_client)
-
-        self.url_id_to_setup_record: dict[int, TestURLPostSetupRecord] = {}
-        self.ds_id_to_setup_record: dict[int, TestURLPostSetupRecord] = {}
-        self.sync_response_order_to_setup_record: dict[
-            SyncResponseOrder, list[TestURLPostSetupRecord]
-        ] = defaultdict(list)
-
-        self.response_dict: dict[
-            SyncResponseOrder, list[DataSourcesSyncResponseInnerInfo]
-        ] = defaultdict(list)
-
-    async def setup(self):
-        await self.setup_agencies()
-        await self.setup_entries()
-
-    async def setup_entries(self):
-        for entry in self.entries:
-            await self.setup_entry(entry)
-
-    async def setup_entry(
-        self,
-        entry: TestURLSetupEntry
-    ) -> None:
-        """
-        Modifies:
-            self.url_id_to_setup_record
-            self.ds_id_to_setup_record
-            self.response_dict
-        """
-        functor = URLSetupFunctor(
-            entry=entry,
-            agency_assignment_manager=self.agency_assignment_manager,
-            adb_client=self.adb_client
-        )
-        result = await functor()
-        response_info = result.ds_response_info
-        if response_info is not None:
-            self.response_dict[entry.ds_info.sync_response_order].append(response_info)
-        if result.url_id is not None:
-            self.url_id_to_setup_record[result.url_id] = result
-        if result.data_sources_id is not None:
-            self.ds_id_to_setup_record[result.data_sources_id] = result
-        if entry.ds_info is not None:
-            self.sync_response_order_to_setup_record[
-                entry.ds_info.sync_response_order
-            ].append(result)
-
-    async def setup_agencies(self):
-        await self.agency_assignment_manager.setup()
-
-    async def get_data_sources_sync_responses(
-        self,
-        orders: list[SyncResponseOrder | ValueError]
-    ) -> list[DataSourcesSyncResponseInfo]:
-        results = []
-        for order in orders:
-            results.append(
-                DataSourcesSyncResponseInfo(
-                    data_sources=self.response_dict[order]
-                )
-            )
-        return results
-
-    async def check_via_url(self, url_id: int):
-        builder = CheckURLQueryBuilder(
-            record=self.url_id_to_setup_record[url_id]
-        )
-        await self.adb_client.run_query_builder(builder)
-
-    async def check_via_data_source(self, data_source_id: int):
-        builder = CheckURLQueryBuilder(
-            record=self.ds_id_to_setup_record[data_source_id]
-        )
-        await self.adb_client.run_query_builder(builder)
-
-    async def check_results(self):
-        for url_id in self.url_id_to_setup_record.keys():
-            await self.check_via_url(url_id)
-        for data_source_id in self.ds_id_to_setup_record.keys():
-            await self.check_via_data_source(data_source_id)
-
-    async def check_via_sync_response_order(self, order: SyncResponseOrder):
-        records = self.sync_response_order_to_setup_record[order]
-        for record in records:
-            builder = CheckURLQueryBuilder(
-                record=record
-            )
-            await self.adb_client.run_query_builder(builder)
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/queries/check.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/queries/check.py
deleted file mode 100644
index ad1bc4c0..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/queries/check.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import selectinload
-
-from src.db.models.impl.url.core.sqlalchemy import URL
-from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource
-from src.db.queries.base.builder import QueryBuilderBase
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.post import TestURLPostSetupRecord
-from src.db.helpers.session import session_helper as sh
-
-
-class CheckURLQueryBuilder(QueryBuilderBase):
-
-    def __init__(self, record: TestURLPostSetupRecord):
-        super().__init__()
-        self.record = record
-
-    async def run(self, session: AsyncSession) -> None:
-        """Check if url and associated properties match record.
-        Raises:
-            AssertionError: if url and associated properties do not match record
-        """
-        query = (
-            select(URL)
-            .options(
-                selectinload(URL.data_source),
-                selectinload(URL.confirmed_agencies),
-            )
-            .outerjoin(URLDataSource, URL.id == URLDataSource.url_id)
-        )
-        if self.record.url_id is not None:
-            query = query.where(URL.id == self.record.url_id)
-        if self.record.data_sources_id is not None:
-            query = query.where(URLDataSource.data_source_id == self.record.data_sources_id)
-
-        result = await sh.one_or_none(session=session, query=query)
-        assert result is not None, f"URL not found for {self.record}"
-        await self.check_results(result)
-
-    async def check_results(self, url: URL):
-        assert url.record_type == self.record.final_record_type
-        assert url.description == self.record.final_description
-        assert url.name == self.record.final_name
-        agencies = [agency.agency_id for agency in url.confirmed_agencies]
-        assert set(agencies) == set(self.record.final_agency_ids)
-        assert url.status == self.record.final_url_status
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/url.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/url.py
deleted file mode 100644
index 81eaa50f..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/url.py
+++ /dev/null
@@ -1,97 +0,0 @@
-from pendulum import today
-
-from src.db.client.async_ import AsyncDatabaseClient
-from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
-from src.db.models.impl.url.core.enums import URLSource
-from src.db.models.impl.url.core.sqlalchemy import URL
-from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInnerInfo
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import AgencyAssigned
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.manager.agency import AgencyAssignmentManager
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.core import TestURLSetupEntry
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.data_sources import \
-    TestDSURLSetupEntry
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.post import TestURLPostSetupRecord
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.source_collector import \
-    TestSCURLSetupEntry
-
-
-class URLSetupFunctor:
-
-    def __init__(
-        self,
-        entry: TestURLSetupEntry,
-        agency_assignment_manager: AgencyAssignmentManager,
-        adb_client: AsyncDatabaseClient
-    ):
-        self.adb_client = adb_client
-        self.agency_assignment_manager = agency_assignment_manager
-        self.prime_entry = entry
-        self.sc_agency_ids = None
-        self.ds_agency_ids = None
-        self.sc_url_id = None
-        self.ds_response_info = None
-
-    async def __call__(self) -> TestURLPostSetupRecord:
-        await self.setup_entry()
-        return TestURLPostSetupRecord(
-            url_id=self.sc_url_id,
-            sc_setup_entry=self.prime_entry.sc_info,
-            ds_setup_entry=self.prime_entry.ds_info,
-            sc_agency_ids=self.sc_agency_ids,
-            ds_agency_ids=self.ds_agency_ids,
-            ds_response_info=self.ds_response_info,
-            final_url_status=self.prime_entry.final_url_status,
-        )
-
-    async def setup_entry(self):
-        if self.prime_entry.sc_info is not None:
-            self.sc_url_id = await self.setup_sc_entry(self.prime_entry.sc_info)
-        if self.prime_entry.ds_info is not None:
-            self.ds_response_info = await self.setup_ds_entry(self.prime_entry.ds_info)
-
-    async def get_agency_ids(self, ags: list[AgencyAssigned]):
-        results = []
-        for ag in ags:
-            results.append(await self.agency_assignment_manager.get(ag))
-        return results
-
-    async def setup_sc_entry(
-        self,
-        entry: TestSCURLSetupEntry
-    ) -> int:
-        """Set up source collector entry and return url id."""
-        self.sc_agency_ids = await self.get_agency_ids(self.prime_entry.sc_info.agencies_assigned)
-        url = URL(
-            url=self.prime_entry.url,
-            name=entry.name,
-            description=entry.description,
-            collector_metadata={},
-            status=entry.url_status.value,
-            record_type=entry.record_type.value if entry.record_type is not None else None,
-            source=URLSource.COLLECTOR
-        )
-        url_id = await self.adb_client.add(url, return_id=True)
-        links = []
-        for ag_id in self.sc_agency_ids:
-            link = LinkURLAgency(url_id=url_id, agency_id=ag_id)
-            links.append(link)
-        await self.adb_client.add_all(links)
-        return url_id
-
-    async def setup_ds_entry(
-        self,
-        ds_entry: TestDSURLSetupEntry
-    ) -> DataSourcesSyncResponseInnerInfo:
-        """Set up data source entry and return response info."""
-        self.ds_agency_ids = await self.get_agency_ids(self.prime_entry.ds_info.agencies_assigned)
-        return DataSourcesSyncResponseInnerInfo(
-            id=ds_entry.id,
-            url=self.prime_entry.url,
-            name=ds_entry.name,
-            description=ds_entry.description,
-            url_status=ds_entry.url_status,
-            approval_status=ds_entry.approval_status,
-            record_type=ds_entry.record_type,
-            updated_at=today(),
-            agency_ids=self.ds_agency_ids
-        )
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/core.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/core.py
deleted file mode 100644
index 155a3ace..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/core.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from pydantic import BaseModel
-
-from src.collectors.enums import URLStatus
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.data_sources import TestDSURLSetupEntry
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.source_collector import \
-    TestSCURLSetupEntry
-
-
-class TestURLSetupEntry(BaseModel):
-    url: str
-    ds_info: TestDSURLSetupEntry | None # Represents URL previously existing in DS DB
-    sc_info: TestSCURLSetupEntry | None # Represents URL previously existing in SC DB
-
-    final_url_status: URLStatus
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/data_sources.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/data_sources.py
deleted file mode 100644
index 47809293..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/data_sources.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from pydantic import BaseModel
-
-from src.core.enums import RecordType
-from src.external.pdap.enums import DataSourcesURLStatus, ApprovalStatus
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import AgencyAssigned, SyncResponseOrder
-
-
-class TestDSURLSetupEntry(BaseModel):
-    """Represents URL previously existing in DS DB.
-
-    These values should overwrite any SC values
-    """
-    id: int  # ID of URL in DS App
-    name: str
-    description: str | None
-    url_status: DataSourcesURLStatus
-    approval_status: ApprovalStatus
-    record_type: RecordType
-    agencies_assigned: list[AgencyAssigned]
-    sync_response_order: SyncResponseOrder
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/post.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/post.py
deleted file mode 100644
index e535cd56..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/post.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from pydantic import BaseModel
-
-from src.collectors.enums import URLStatus
-from src.core.enums import RecordType
-from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInnerInfo
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.data_sources import \
-    TestDSURLSetupEntry
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.source_collector import \
-    TestSCURLSetupEntry
-
-
-class TestURLPostSetupRecord(BaseModel):
-    """Stores a setup entry along with relevant database-generated ids"""
-    url_id: int | None
-    sc_setup_entry: TestSCURLSetupEntry | None
-    ds_setup_entry: TestDSURLSetupEntry | None
-    sc_agency_ids: list[int] | None
-    ds_agency_ids: list[int] | None
-    ds_response_info: DataSourcesSyncResponseInnerInfo | None
-    final_url_status: URLStatus
-
-    @property
-    def data_sources_id(self) -> int | None:
-        if self.ds_setup_entry is None:
-            return None
-        return self.ds_setup_entry.id
-
-    @property
-    def final_record_type(self) -> RecordType:
-        if self.ds_setup_entry is not None:
-            return self.ds_setup_entry.record_type
-        return self.sc_setup_entry.record_type
-
-    @property
-    def final_name(self) -> str:
-        if self.ds_setup_entry is not None:
-            return self.ds_setup_entry.name
-        return self.sc_setup_entry.name
-
-    @property
-    def final_description(self) -> str:
-        if self.ds_setup_entry is not None:
-            return self.ds_setup_entry.description
-        return self.sc_setup_entry.description
-
-    @property
-    def final_agency_ids(self) -> list[int] | None:
-        if self.ds_setup_entry is not None:
-            return self.ds_agency_ids
-        return self.sc_agency_ids
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/source_collector.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/source_collector.py
deleted file mode 100644
index c151d783..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/source_collector.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from pydantic import BaseModel
-
-from src.collectors.enums import URLStatus
-from src.core.enums import RecordType
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import AgencyAssigned
-
-
-class TestSCURLSetupEntry(BaseModel):
-    """Represents URL previously existing in SC DB.
-
-    These values should be overridden by any DS values
-    """
-    name: str
-    description: str
-    record_type: RecordType | None
-    url_status: URLStatus
-    agencies_assigned: list[AgencyAssigned]
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/requester.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/requester.py
new file mode 100644
index 00000000..a514b151
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/requester.py
@@ -0,0 +1,59 @@
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.core.enums import RecordType
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic
+from src.db.models.impl.url.core.enums import URLSource
+from src.db.models.impl.url.core.pydantic.insert import URLInsertModel
+from src.db.models.impl.url.data_source.pydantic import URLDataSourcePydantic
+from src.db.templates.requester import RequesterBase
+from tests.helpers.simple_test_data_functions import generate_test_name, generate_test_url
+
+
+class TestDataSourcesSyncURLSetupQueryRequester(RequesterBase):
+
+    async def insert_urls(
+        self,
+        record_type: RecordType,
+    ) -> list[int]:
+
+        insert_models: list[URLInsertModel] = []
+        for i in range(2):
+            url = URLInsertModel(
+                url=generate_test_url(i),
+                name=generate_test_name(i),
+                record_type=record_type,
+                source=URLSource.COLLECTOR,
+            )
+            insert_models.append(url)
+
+        return await self.session_helper.bulk_insert(self.session, models=insert_models, return_ids=True)
+
+    async def insert_validated_flags(
+        self,
+        url_ids: list[int],
+        validated_type: URLValidatedType
+    ) -> None:
+        to_insert: list[FlagURLValidatedPydantic] = []
+        for url_id in url_ids:
+            flag = FlagURLValidatedPydantic(
+                url_id=url_id,
+                type=validated_type,
+            )
+            to_insert.append(flag)
+
+        await self.session_helper.bulk_insert(self.session, models=to_insert)
+
+    async def insert_data_source_entry(
+        self,
+        url_ids: list[int],
+    ):
+        to_insert: list[URLDataSourcePydantic] = [
+            URLDataSourcePydantic(
+                url_id=url_id,
+                data_source_id=url_id,
+            )
+            for url_id in url_ids
+        ]
+
+        await self.session_helper.bulk_insert(self.session, models=to_insert)
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/url.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/url.py
new file mode 100644
index 00000000..0176a95f
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/url.py
@@ -0,0 +1,35 @@
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.core.enums import RecordType
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.queries.base.builder import QueryBuilderBase
+from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.queries.url_.requester import \
+    TestDataSourcesSyncURLSetupQueryRequester
+
+
+class TestDataSourcesSyncURLSetupQueryBuilder(QueryBuilderBase):
+
+    def __init__(
+        self,
+        record_type: RecordType,
+        validated_type: URLValidatedType | None = None,
+        previously_synced: bool = False,
+    ):
+        super().__init__()
+        self.record_type = record_type
+        self.validated_type = validated_type
+        self.previously_synced = previously_synced
+
+    async def run(self, session: AsyncSession) -> list[int]:
+        requester = TestDataSourcesSyncURLSetupQueryRequester(session=session)
+
+        url_ids: list[int] = await requester.insert_urls(record_type=self.record_type)
+
+        if self.validated_type is not None:
+            await requester.insert_validated_flags(url_ids=url_ids, validated_type=self.validated_type)
+
+        if self.previously_synced:
+            await requester.insert_data_source_entry(url_ids=url_ids)
+
+        return url_ids
+
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_db_only.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_db_only.py
new file mode 100644
index 00000000..87cf163a
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_db_only.py
@@ -0,0 +1,76 @@
+from datetime import datetime
+
+import pytest
+
+from src.collectors.enums import URLStatus
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator
+from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInfo
+from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.check import check_sync_concluded
+from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.core import \
+    set_up_mock_pdap_client_responses, set_up_urls
+
+from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
+
+
+@pytest.mark.asyncio
+async def test_db_only(
+    operator: SyncDataSourcesTaskOperator,
+    adb_client_test: AsyncDatabaseClient,
+    current_db_time: datetime
+):
+    """
+    Test that operator does nothing with entries only in the database, and nothing is returned by the endpoint.
+    """
+
+    # Add URLs to database
+    url_ids: list[int] = await set_up_urls(
+        adb_client=adb_client_test,
+        record_type=RecordType.COMPLAINTS_AND_MISCONDUCT,
+        validated_type=None,
+    )
+
+    # Set up pdap client to return nothing
+    set_up_mock_pdap_client_responses(
+        operator.pdap_client,
+        responses=[
+            DataSourcesSyncResponseInfo(data_sources=[])
+        ]
+    )
+
+    # Run operator
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+
+    # Confirm operator ran without error
+    assert_task_ran_without_error(run_info)
+
+    # Check sync concluded
+    assert operator.pdap_client.sync_data_sources.call_count == 1
+    assert operator.pdap_client.sync_data_sources.call_args[0][0] == DataSourcesSyncParameters(
+        cutoff_date=None,
+        page=1
+    )
+
+    # Confirm URLs are unchanged in database
+    urls: list[URL] = await adb_client_test.get_all(URL)
+    assert len(urls) == len(url_ids)
+    assert {url.id for url in urls} == set(url_ids)
+    assert all(url.status == URLStatus.OK for url in urls)
+    assert all(url.record_type == RecordType.COMPLAINTS_AND_MISCONDUCT for url in urls)
+
+    # Confirm presence of sync status row with cutoff date and last updated at after initial db time
+    await check_sync_concluded(
+        adb_client_test,
+        check_updated_at=False,
+        current_db_datetime=current_db_time
+    )
+
+    # Confirm no validated flags
+    flags: list[FlagURLValidated] = await adb_client_test.get_all(FlagURLValidated)
+    assert len(flags) == 0
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_happy_path.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_happy_path.py
deleted file mode 100644
index 41f38b2a..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_happy_path.py
+++ /dev/null
@@ -1,62 +0,0 @@
-from unittest.mock import MagicMock, call
-
-import pytest
-
-from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator
-from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.check import check_sync_concluded
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.core import patch_sync_data_sources
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.data import ENTRIES
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import SyncResponseOrder
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.manager.core import \
-    DataSourcesSyncTestSetupManager
-from tests.helpers.asserts import assert_task_run_success
-
-
-@pytest.mark.asyncio
-async def test_data_sources_sync_happy_path(
-    test_operator: SyncDataSourcesTaskOperator
-):
-    adb_client = test_operator.adb_client
-
-    manager = DataSourcesSyncTestSetupManager(
-        adb_client=adb_client,
-        entries=ENTRIES
-    )
-    await manager.setup()
-
-    with patch_sync_data_sources(
-        await manager.get_data_sources_sync_responses([order for order in SyncResponseOrder])
-    ):
-        run_info = await test_operator.run_task()
-        assert_task_run_success(run_info)
-        mock_func: MagicMock = test_operator.pdap_client.sync_data_sources
-
-        mock_func.assert_has_calls(
-            [
-                call(
-                    DataSourcesSyncParameters(
-                        cutoff_date=None,
-                        page=1
-                    )
-                ),
-                call(
-                    DataSourcesSyncParameters(
-                        cutoff_date=None,
-                        page=2
-                    )
-                ),
-                call(
-                    DataSourcesSyncParameters(
-                        cutoff_date=None,
-                        page=3
-                    )
-                )
-            ]
-        )
-        await check_sync_concluded(adb_client, check_updated_at=False)
-
-    # Check results according to expectations.
-    await manager.check_results()
-
-
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_interruption.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_interruption.py
index 0441a102..3aa26866 100644
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_interruption.py
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_interruption.py
@@ -1,50 +1,73 @@
+from datetime import datetime
+
 import pytest
 from sqlalchemy import select
 
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
 from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator
 from src.core.tasks.url.enums import TaskOperatorOutcome
+from src.db.client.async_ import AsyncDatabaseClient
 from src.db.models.impl.state.sync.data_sources import DataSourcesSyncState
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.check import check_sync_concluded
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.core import patch_sync_data_sources
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.data import ENTRIES
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import SyncResponseOrder
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.manager.core import \
-    DataSourcesSyncTestSetupManager
-
-
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInfo
+from src.external.pdap.enums import ApprovalStatus, DataSourcesURLStatus
+from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.core import patch_sync_data_sources, \
+    set_up_mock_pdap_client_responses, set_up_sync_response_info
+from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
 
 @pytest.mark.asyncio
 async def test_data_sources_sync_interruption(
-    test_operator: SyncDataSourcesTaskOperator
+    operator: SyncDataSourcesTaskOperator,
+    adb_client_test: AsyncDatabaseClient,
+    current_db_time: datetime,
+    agency_ids: list[int]
 ):
-    adb_client = test_operator.adb_client
+    """
+    Test that in the case of an interruption.
+    The data sources sync will resume from the last processed page.
+    """
 
-    manager = DataSourcesSyncTestSetupManager(
-        adb_client=adb_client,
-        entries=ENTRIES
+    # Set up endpoint to return URLs on page 1, raise error on page 2
+    # return URLs on page 2 on the second call, and return nothing on page 3
+    set_up_mock_pdap_client_responses(
+        mock_pdap_client=operator.pdap_client,
+        responses=[
+            set_up_sync_response_info(
+                ids=[0, 1],
+                record_type=RecordType.ACCIDENT_REPORTS,
+                agency_ids=agency_ids,
+                approval_status=ApprovalStatus.APPROVED,
+                ds_url_status=DataSourcesURLStatus.OK,
+            ),
+            ValueError("test ds sync error"),
+            set_up_sync_response_info(
+                ids=[2, 3],
+                record_type=RecordType.ACCIDENT_REPORTS,
+                agency_ids=agency_ids,
+                approval_status=ApprovalStatus.APPROVED,
+                ds_url_status=DataSourcesURLStatus.OK,
+            ),
+            DataSourcesSyncResponseInfo(
+                data_sources=[],
+            )
+        ]
     )
-    await manager.setup()
 
-    first_response = await manager.get_data_sources_sync_responses(
-        [SyncResponseOrder.FIRST]
-    )
 
-    with patch_sync_data_sources(
-        side_effects=
-            first_response +
-            [ValueError("test error")]
-    ):
-        run_info = await test_operator.run_task()
-        assert run_info.outcome == TaskOperatorOutcome.ERROR, run_info.message
+    # Run operator
+    run_info: TaskOperatorRunInfo = await operator.run_task()
 
-    await manager.check_via_sync_response_order(SyncResponseOrder.FIRST)
+    # Confirm presence of error
+    assert run_info.outcome == TaskOperatorOutcome.ERROR
+    assert "test ds sync error" in run_info.message
 
-    # Second response should not be processed
-    with pytest.raises(AssertionError):
-        await manager.check_via_sync_response_order(SyncResponseOrder.SECOND)
+    # Confirm first URLs added to database
+    urls: list[URL] = await adb_client_test.get_all(URL)
+    assert len(urls) == 2
 
-    # Check sync state results
-    sync_state_results = await adb_client.scalar(
+    # Confirm sync status updated to page 2 and cutoff date is null
+    sync_state_results = await adb_client_test.scalar(
         select(
             DataSourcesSyncState
         )
@@ -53,13 +76,22 @@ async def test_data_sources_sync_interruption(
     assert sync_state_results.last_full_sync_at is None
     assert sync_state_results.current_cutoff_date is None
 
-    second_response = await manager.get_data_sources_sync_responses(
-        [SyncResponseOrder.SECOND, SyncResponseOrder.THIRD]
-    )
-    with patch_sync_data_sources(second_response):
-        await test_operator.run_task()
+    # Run operator again
+    run_info: TaskOperatorRunInfo = await operator.run_task()
 
-    await check_sync_concluded(adb_client)
+    # Confirm operator ran without error
+    assert_task_ran_without_error(run_info)
 
-    await manager.check_via_sync_response_order(SyncResponseOrder.SECOND)
-    await manager.check_via_sync_response_order(SyncResponseOrder.THIRD)
\ No newline at end of file
+    # Confirm second URLs added to database
+    urls: list[URL] = await adb_client_test.get_all(URL)
+    assert len(urls) == 4
+
+    # Confirm page updated to null and cutoff date updated
+    sync_state_results = await adb_client_test.scalar(
+        select(
+            DataSourcesSyncState
+        )
+    )
+    assert sync_state_results.current_page is None
+    assert sync_state_results.last_full_sync_at is not None
+    assert sync_state_results.current_cutoff_date is not None
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_meta_url_not_modified.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_meta_url_not_modified.py
new file mode 100644
index 00000000..51d40d6f
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_meta_url_not_modified.py
@@ -0,0 +1,88 @@
+import pytest
+
+from src.collectors.enums import URLStatus
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.external.pdap.enums import ApprovalStatus, DataSourcesURLStatus
+from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.core import set_up_urls, \
+    set_up_mock_pdap_client_responses, set_up_sync_response_info
+from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
+from tests.helpers.data_creator.core import DBDataCreator
+
+
+@pytest.mark.asyncio
+async def test_meta_url_not_modified(
+    operator: SyncDataSourcesTaskOperator,
+    adb_client_test: AsyncDatabaseClient,
+    agency_ids: list[int],
+    db_data_creator: DBDataCreator,
+):
+    """
+    In a Data Source Sync, a validated Meta URL linked to an agency should be untouched
+    if the sync response includes that same agency with other Data Sources URL
+    """
+    original_url_ids: list[int] = await set_up_urls(
+        adb_client=adb_client_test,
+        record_type=RecordType.CONTACT_INFO_AND_AGENCY_META,
+        validated_type=URLValidatedType.META_URL,
+    )
+    # Link URLs to existing agencies
+    await db_data_creator.create_url_agency_links(
+        url_ids=original_url_ids,
+        agency_ids=agency_ids,
+    )
+
+    set_up_mock_pdap_client_responses(
+        mock_pdap_client=operator.pdap_client,
+        responses=[
+            set_up_sync_response_info(
+                ids=[2, 3],
+                record_type=RecordType.COMPLAINTS_AND_MISCONDUCT,
+                agency_ids=agency_ids,
+                approval_status=ApprovalStatus.APPROVED,
+                ds_url_status=DataSourcesURLStatus.OK,
+            ),
+        ]
+    )
+
+    # Run operator
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+
+    # Confirm operator ran without error
+    assert_task_ran_without_error(run_info)
+
+    # Check sync concluded
+    operator.pdap_client.sync_data_sources.call_count == 2
+
+    # Confirm presence of 4 URLs in database
+    urls: list[URL] = await adb_client_test.get_all(URL)
+    assert len(urls) == 4
+    assert all([url.status == URLStatus.OK for url in urls])
+    assert set([url.record_type for url in urls]) == {
+        RecordType.CONTACT_INFO_AND_AGENCY_META,
+        RecordType.COMPLAINTS_AND_MISCONDUCT
+    }
+    all_url_ids: list[int] = [url.id for url in urls]
+    # Check that all original URLs are present
+    assert set(all_url_ids) >= set(original_url_ids)
+
+    links: list[LinkURLAgency] = await adb_client_test.get_all(LinkURLAgency)
+    assert len(links) == 16
+    assert set(link.url_id for link in links) == set(all_url_ids)
+    assert set(link.agency_id for link in links) == set(agency_ids)
+
+    # Confirm presence of validated flag
+    flags: list[FlagURLValidated] = await adb_client_test.get_all(FlagURLValidated)
+    assert len(flags) == 4
+    assert set([flag.type for flag in flags]) == {
+        URLValidatedType.META_URL,
+        URLValidatedType.DATA_SOURCE,
+    }
+    assert set(flag.url_id for flag in flags) == set(all_url_ids)
+
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_multiple_calls.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_multiple_calls.py
new file mode 100644
index 00000000..0ae831bd
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_multiple_calls.py
@@ -0,0 +1,107 @@
+from datetime import datetime, timedelta
+
+import pytest
+from sqlalchemy import select
+
+from src.collectors.enums import URLStatus
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator
+from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.state.sync.data_sources import DataSourcesSyncState
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInfo
+from src.external.pdap.enums import ApprovalStatus, DataSourcesURLStatus
+from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.core import \
+    set_up_mock_pdap_client_responses, set_up_sync_response_info
+from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
+
+
+@pytest.mark.asyncio
+async def test_ds_sync_multiple_calls(
+    operator: SyncDataSourcesTaskOperator,
+    adb_client_test: AsyncDatabaseClient,
+    current_db_time: datetime,
+    agency_ids: list[int]
+):
+    """
+    Test that operator properly handles multiple calls to sync endpoint.
+    """
+
+    # Set up endpoint to return URLs on page 1 and 2, and stop on page 3
+    set_up_mock_pdap_client_responses(
+        mock_pdap_client=operator.pdap_client,
+        responses=[
+            set_up_sync_response_info(
+                ids=[0, 1],
+                record_type=RecordType.ACCIDENT_REPORTS,
+                agency_ids=agency_ids,
+                approval_status=ApprovalStatus.APPROVED,
+                ds_url_status=DataSourcesURLStatus.OK,
+            ),
+            set_up_sync_response_info(
+                ids=[2, 3],
+                record_type=RecordType.ACCIDENT_REPORTS,
+                agency_ids=agency_ids,
+                approval_status=ApprovalStatus.APPROVED,
+                ds_url_status=DataSourcesURLStatus.OK,
+            ),
+            DataSourcesSyncResponseInfo(
+                data_sources=[],
+            )
+        ]
+    )
+
+    # Run operator
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+
+    # Confirm operator ran without error
+    assert_task_ran_without_error(run_info)
+
+
+    # Confirm URLs are added to database
+    urls: list[URL] = await adb_client_test.get_all(URL)
+    assert all(url.status == URLStatus.OK for url in urls)
+    assert all(url.record_type == RecordType.ACCIDENT_REPORTS for url in urls)
+    url_ids: list[int] = [url.id for url in urls]
+
+    # Confirm 3 calls to pdap_client.sync_data_sources
+    assert operator.pdap_client.sync_data_sources.call_count == 3
+
+    # Confirm sync status updated
+    sync_state_results = await adb_client_test.scalar(
+        select(
+            DataSourcesSyncState
+        )
+    )
+    assert sync_state_results.current_page is None
+    assert sync_state_results.last_full_sync_at > current_db_time - timedelta(minutes=5)
+    assert sync_state_results.current_cutoff_date > (current_db_time - timedelta(days=2)).date()
+
+    set_up_mock_pdap_client_responses(
+        mock_pdap_client=operator.pdap_client,
+        responses=[
+            DataSourcesSyncResponseInfo(
+                data_sources=[],
+            )
+        ]
+    )
+
+    # Run operator again
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+
+    # Confirm operator ran without error
+    assert_task_ran_without_error(run_info)
+
+    # Confirm no new URLs added
+    urls: list[URL] = await adb_client_test.get_all(URL)
+    assert set([url.id for url in urls]) == set(url_ids)
+
+    # Confirm call to pdap_client.sync_data_sources made with cutoff_date
+    assert operator.pdap_client.sync_data_sources.called_once_with(
+        DataSourcesSyncParameters(
+            cutoff_date=sync_state_results.current_cutoff_date,
+            page=1
+        )
+    )
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_no_new_results.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_no_new_results.py
deleted file mode 100644
index ebcbe856..00000000
--- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_no_new_results.py
+++ /dev/null
@@ -1,59 +0,0 @@
-from datetime import datetime
-from unittest.mock import MagicMock
-
-import pytest
-
-from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator
-from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters
-from src.db.models.impl.state.sync.data_sources import DataSourcesSyncState
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.check import check_sync_concluded
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.core import patch_sync_data_sources
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.data import ENTRIES
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import SyncResponseOrder
-from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.manager.core import \
-    DataSourcesSyncTestSetupManager
-from tests.helpers.asserts import assert_task_run_success
-
-
-@pytest.mark.asyncio
-async def test_data_sources_sync_no_new_results(
-    test_operator: SyncDataSourcesTaskOperator
-):
-    adb_client = test_operator.adb_client
-
-    cutoff_date = datetime(2025, 5, 1).date()
-
-    manager = DataSourcesSyncTestSetupManager(
-        adb_client=adb_client,
-        entries=ENTRIES
-    )
-    await manager.setup()
-
-    first_response = await manager.get_data_sources_sync_responses(
-        [SyncResponseOrder.THIRD]
-    )
-
-    # Add cutoff date to database
-    await adb_client.add(
-        DataSourcesSyncState(
-            current_cutoff_date=cutoff_date
-        )
-    )
-
-    with patch_sync_data_sources(first_response):
-        run_info = await test_operator.run_task()
-        assert_task_run_success(run_info)
-        mock_func: MagicMock = test_operator.pdap_client.sync_data_sources
-
-        mock_func.assert_called_once_with(
-            DataSourcesSyncParameters(
-                cutoff_date=cutoff_date,
-                page=1
-            )
-        )
-        await check_sync_concluded(adb_client, check_updated_at=False)
-
-    # Check no syncs occurred
-    for sync_response_order in [SyncResponseOrder.FIRST, SyncResponseOrder.SECOND]:
-        with pytest.raises(AssertionError):
-            await manager.check_via_sync_response_order(sync_response_order)
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_broken_approved.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_broken_approved.py
new file mode 100644
index 00000000..7878c83f
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_broken_approved.py
@@ -0,0 +1,85 @@
+from datetime import datetime
+
+import pytest
+
+from src.collectors.enums import URLStatus
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator
+from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.external.pdap.enums import ApprovalStatus, DataSourcesURLStatus
+from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.check import check_sync_concluded
+from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.core import \
+    set_up_mock_pdap_client_responses, set_up_sync_response_info
+from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
+
+
+@pytest.mark.asyncio
+async def test_url_broken_approved(
+    operator: SyncDataSourcesTaskOperator,
+    adb_client_test: AsyncDatabaseClient,
+    agency_ids: list[int],
+    current_db_time: datetime
+):
+    """
+    Test that a data source with
+    - a broken URL status
+    - an approved status
+    Is added to the data source with a 404 Not Found status.
+    """
+
+    # Set up pdap client to return url with broken url status but approved
+    set_up_mock_pdap_client_responses(
+        mock_pdap_client=operator.pdap_client,
+        responses=[
+            set_up_sync_response_info(
+                ids=[0, 1],
+                record_type=RecordType.COMPLAINTS_AND_MISCONDUCT,
+                agency_ids=agency_ids,
+                approval_status=ApprovalStatus.APPROVED,
+                ds_url_status=DataSourcesURLStatus.BROKEN,
+            ),
+        ]
+    )
+
+    # Run operator
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+
+    # Confirm operator ran without error
+    assert_task_ran_without_error(run_info)
+
+    # Check sync concluded
+    operator.pdap_client.sync_data_sources.call_count == 2
+
+    # Confirm presence of URL with status of `404 not found`
+    urls: list[URL] = await adb_client_test.get_all(URL)
+    assert len(urls) == 2
+    assert all([url.status == URLStatus.NOT_FOUND for url in urls])
+    assert all([url.record_type == RecordType.COMPLAINTS_AND_MISCONDUCT for url in urls])
+    url_ids: list[int] = [url.id for url in urls]
+
+    # Confirm presence of agencies
+    links: list[LinkURLAgency] = await adb_client_test.get_all(LinkURLAgency)
+    assert len(links) == 8
+    assert set(link.url_id for link in links) == set(url_ids)
+    assert set(link.agency_id for link in links) == set(agency_ids)
+
+    # Confirm presence of validated flag
+    flags: list[FlagURLValidated] = await adb_client_test.get_all(FlagURLValidated)
+    assert len(flags) == 2
+    assert all([flag.type == URLValidatedType.DATA_SOURCE for flag in flags])
+    assert set(flag.url_id for flag in flags) == set(url_ids)
+
+    # Confirm presence of sync status row
+    await check_sync_concluded(
+        adb_client_test,
+        current_db_datetime=current_db_time
+    )
+
+
+
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_in_db_overwritten_by_ds.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_in_db_overwritten_by_ds.py
new file mode 100644
index 00000000..e1c7f33c
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_in_db_overwritten_by_ds.py
@@ -0,0 +1,94 @@
+import pytest
+
+from src.collectors.enums import URLStatus
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.external.pdap.enums import ApprovalStatus, DataSourcesURLStatus
+from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.core import set_up_urls, \
+    set_up_mock_pdap_client_responses, set_up_sync_response_info
+from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
+
+
+@pytest.mark.asyncio
+async def test_url_in_db_overwritten_by_ds(
+    operator: SyncDataSourcesTaskOperator,
+    adb_client_test: AsyncDatabaseClient,
+    agency_ids: list[int]
+):
+    """
+    Test that a URL in the database is overwritten by a data source with the same URL,
+    if their information is different. 
+    """
+    old_agency_ids: list[int] = agency_ids[:2]
+    new_agency_ids: list[int] = agency_ids[2:4]
+
+
+    # Add URLs to database
+    url_ids: list[int] = await set_up_urls(
+        adb_client=adb_client_test,
+        record_type=RecordType.COMPLAINTS_AND_MISCONDUCT,
+        validated_type=URLValidatedType.DATA_SOURCE,
+    )
+    # Link URLs to 2 existing agencies
+    links: list[LinkURLAgency] = []
+    for url_id in url_ids:
+        for agency_id in old_agency_ids:
+            link = LinkURLAgency(
+                url_id=url_id,
+                agency_id=agency_id,
+            )
+            links.append(link)
+    await adb_client_test.add_all(links)
+
+    # Set up pdap client to return same URLs with different information
+    # - different name
+    # - different description
+    # - different status
+    # - different approval status (approved vs. not relevant)
+    # - different record type
+    # - different agencies assigned
+    set_up_mock_pdap_client_responses(
+        mock_pdap_client=operator.pdap_client,
+        responses=[
+            set_up_sync_response_info(
+                ids=[0, 1],
+                record_type=RecordType.ACCIDENT_REPORTS,
+                agency_ids=new_agency_ids,
+                approval_status=ApprovalStatus.REJECTED,
+                ds_url_status=DataSourcesURLStatus.BROKEN,
+            ),
+        ]
+    )
+
+    # Run operator
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+
+    # Confirm operator ran without error
+    assert_task_ran_without_error(run_info)
+
+
+    # Confirm URL name, description, record type, and status are overwritten
+    urls: list[URL] = await adb_client_test.get_all(URL)
+    assert len(urls) == 2
+    assert all([url.status == URLStatus.NOT_FOUND for url in urls])
+    assert all([url.record_type == RecordType.ACCIDENT_REPORTS for url in urls])
+    url_ids: list[int] = [url.id for url in urls]
+
+    # Confirm agencies are overwritten
+    links: list[LinkURLAgency] = await adb_client_test.get_all(LinkURLAgency)
+    assert len(links) == 4
+    assert set(link.url_id for link in links) == set(url_ids)
+    assert set(link.agency_id for link in links) == set(new_agency_ids)
+
+    # Confirm validated types overwritten
+    flags: list[FlagURLValidated] = await adb_client_test.get_all(FlagURLValidated)
+    assert len(flags) == 2
+    assert all([flag.type == URLValidatedType.NOT_RELEVANT for flag in flags])
+    assert set(flag.url_id for flag in flags) == set(url_ids)
+
diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_ok_approved.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_ok_approved.py
new file mode 100644
index 00000000..eeff4028
--- /dev/null
+++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_ok_approved.py
@@ -0,0 +1,63 @@
+import pytest
+
+from src.collectors.enums import URLStatus
+from src.core.enums import RecordType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.url.core.sqlalchemy import URL
+from src.external.pdap.enums import ApprovalStatus, DataSourcesURLStatus
+from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.core import \
+    set_up_mock_pdap_client_responses, set_up_sync_response_info
+from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
+
+
+@pytest.mark.asyncio
+async def test_url_ok_approved(
+    operator: SyncDataSourcesTaskOperator,
+    adb_client_test: AsyncDatabaseClient,
+    agency_ids: list[int]
+):
+    """
+    Test that a URL with an OK URL status and an approved status
+    is added to the database with an OK status
+    and a validated flag with `submitted=True`
+    """
+
+    # Set up pdap client to return url with ok url status and approved
+    set_up_mock_pdap_client_responses(
+        mock_pdap_client=operator.pdap_client,
+        responses=[
+            set_up_sync_response_info(
+                ids=[0, 1],
+                record_type=RecordType.OTHER,
+                agency_ids=agency_ids,
+                approval_status=ApprovalStatus.APPROVED,
+                ds_url_status=DataSourcesURLStatus.OK,
+            ),
+        ]
+    )
+
+    # Run operator
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+
+    # Confirm operator ran without error
+    assert_task_ran_without_error(run_info)
+
+    # Check sync concluded
+    operator.pdap_client.sync_data_sources.call_count == 2
+
+    # Confirm URL is added to database with OK status
+    urls: list[URL] = await adb_client_test.get_all(URL)
+    assert len(urls) == 2
+    assert all([url.status == URLStatus.OK for url in urls])
+    assert all([url.record_type == RecordType.OTHER for url in urls])
+    url_ids: list[int] = [url.id for url in urls]
+
+    # Confirm presence of validated flag
+    flags: list[FlagURLValidated] = await adb_client_test.get_all(FlagURLValidated)
+    assert len(flags) == 2
+    assert all([flag.type == URLValidatedType.DATA_SOURCE for flag in flags])
+    assert set(flag.url_id for flag in flags) == set(url_ids)
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/conftest.py b/tests/automated/integration/tasks/url/impl/agency_identification/conftest.py
similarity index 69%
rename from tests/automated/integration/tasks/url/impl/agency_identification/happy_path/conftest.py
rename to tests/automated/integration/tasks/url/impl/agency_identification/conftest.py
index b6787899..7feb6d61 100644
--- a/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/conftest.py
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/conftest.py
@@ -1,29 +1,29 @@
-from unittest.mock import create_autospec, AsyncMock
+from unittest.mock import create_autospec
 
 import pytest
 
 from src.collectors.impl.muckrock.api_interface.core import MuckrockAPIInterface
 from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.core import \
+    NLPProcessor
 from src.core.tasks.url.operators.agency_identification.subtasks.loader import AgencyIdentificationSubtaskLoader
 from src.db.client.async_ import AsyncDatabaseClient
 from src.external.pdap.client import PDAPClient
-from tests.automated.integration.tasks.url.impl.agency_identification.happy_path.mock import mock_run_subtask
 
 
 @pytest.fixture
 def operator(
     adb_client_test: AsyncDatabaseClient
-):
+) -> AgencyIdentificationTaskOperator:
 
     operator = AgencyIdentificationTaskOperator(
         adb_client=adb_client_test,
         loader=AgencyIdentificationSubtaskLoader(
             pdap_client=create_autospec(PDAPClient),
-            muckrock_api_interface=create_autospec(MuckrockAPIInterface)
-        )
-    )
-    operator.run_subtask = AsyncMock(
-        side_effect=mock_run_subtask
+            muckrock_api_interface=create_autospec(MuckrockAPIInterface),
+            adb_client=adb_client_test,
+            nlp_processor=create_autospec(NLPProcessor)
+        ),
     )
 
     return operator
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/asserts.py b/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/asserts.py
deleted file mode 100644
index c7818e77..00000000
--- a/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/asserts.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from src.db.client.async_ import AsyncDatabaseClient
-from src.db.models.impl.agency.sqlalchemy import Agency
-from src.db.models.impl.url.suggestion.agency.auto import AutomatedUrlAgencySuggestion
-
-
-async def assert_expected_confirmed_and_auto_suggestions(adb_client: AsyncDatabaseClient):
-    confirmed_suggestions = await adb_client.get_urls_with_confirmed_agencies()
-
-    # The number of confirmed suggestions is dependent on how often
-    # the subtask iterated through the sample agency suggestions defined in `data.py`
-    assert len(confirmed_suggestions) == 3, f"Expected 3 confirmed suggestions, got {len(confirmed_suggestions)}"
-    agencies = await adb_client.get_all(Agency)
-    assert len(agencies) == 2
-    auto_suggestions = await adb_client.get_all(AutomatedUrlAgencySuggestion)
-    assert len(auto_suggestions) == 4, f"Expected 4 auto suggestions, got {len(auto_suggestions)}"
-    # Of the auto suggestions, 2 should be unknown
-    assert len([s for s in auto_suggestions if s.is_unknown]) == 2
-    # Of the auto suggestions, 2 should not be unknown
-    assert len([s for s in auto_suggestions if not s.is_unknown]) == 2
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/data.py b/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/data.py
deleted file mode 100644
index ea224c37..00000000
--- a/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/data.py
+++ /dev/null
@@ -1,34 +0,0 @@
-
-
-from src.core.enums import SuggestionType
-from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
-
-SAMPLE_AGENCY_SUGGESTIONS = [
-    URLAgencySuggestionInfo(
-        url_id=-1, # This will be overwritten
-        suggestion_type=SuggestionType.UNKNOWN,
-        pdap_agency_id=None,
-        agency_name=None,
-        state=None,
-        county=None,
-        locality=None
-    ),
-    URLAgencySuggestionInfo(
-        url_id=-1, # This will be overwritten
-        suggestion_type=SuggestionType.CONFIRMED,
-        pdap_agency_id=-1,
-        agency_name="Test Agency",
-        state="Test State",
-        county="Test County",
-        locality="Test Locality"
-    ),
-    URLAgencySuggestionInfo(
-        url_id=-1, # This will be overwritten
-        suggestion_type=SuggestionType.AUTO_SUGGESTION,
-        pdap_agency_id=-1,
-        agency_name="Test Agency 2",
-        state="Test State 2",
-        county="Test County 2",
-        locality="Test Locality 2"
-    )
-]
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/mock.py b/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/mock.py
deleted file mode 100644
index a4dcb227..00000000
--- a/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/mock.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from copy import deepcopy
-from typing import Optional
-
-from src.core.enums import SuggestionType
-from tests.automated.integration.tasks.url.impl.agency_identification.happy_path.data import SAMPLE_AGENCY_SUGGESTIONS
-
-
-async def mock_run_subtask(
-    subtask,
-    url_id: int,
-    collector_metadata: Optional[dict]
-):
-    """A mocked version of run_subtask that returns a single suggestion for each url_id."""
-
-    # Deepcopy to prevent using the same instance in memory
-    suggestion = deepcopy(SAMPLE_AGENCY_SUGGESTIONS[url_id % 3])
-    suggestion.url_id = url_id
-    suggestion.pdap_agency_id = (url_id % 3) if suggestion.suggestion_type != SuggestionType.UNKNOWN else None
-    return [suggestion]
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/test_happy_path.py b/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/test_happy_path.py
deleted file mode 100644
index dc261c12..00000000
--- a/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/test_happy_path.py
+++ /dev/null
@@ -1,129 +0,0 @@
-from unittest.mock import AsyncMock
-
-import pytest
-from aiohttp import ClientSession
-
-from src.collectors.enums import CollectorType, URLStatus
-from src.core.tasks.url.enums import TaskOperatorOutcome
-from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator
-from src.core.tasks.url.operators.agency_identification.subtasks.impl.ckan import CKANAgencyIdentificationSubtask
-from src.core.tasks.url.operators.agency_identification.subtasks.impl.muckrock import \
-    MuckrockAgencyIdentificationSubtask
-from src.core.tasks.url.operators.agency_identification.subtasks.impl.unknown import UnknownAgencyIdentificationSubtask
-from tests.automated.integration.tasks.url.impl.agency_identification.happy_path.asserts import \
-    assert_expected_confirmed_and_auto_suggestions
-from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters
-from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters
-from tests.helpers.data_creator.core import DBDataCreator
-from tests.helpers.data_creator.models.creation_info.batch.v2 import BatchURLCreationInfoV2
-
-
-@pytest.mark.asyncio
-async def test_agency_identification_task(
-    db_data_creator: DBDataCreator,
-    test_client_session: ClientSession,
-    operator: AgencyIdentificationTaskOperator,
-):
-    """Test full flow of AgencyIdentificationTaskOperator"""
-
-    # Confirm does not yet meet prerequisites
-    assert not await operator.meets_task_prerequisites()
-
-    collector_type_to_url_id: dict[CollectorType | None, int] = {}
-
-    # Create six urls, one from each strategy
-    for strategy in [
-        CollectorType.COMMON_CRAWLER,
-        CollectorType.AUTO_GOOGLER,
-        CollectorType.MUCKROCK_COUNTY_SEARCH,
-        CollectorType.MUCKROCK_SIMPLE_SEARCH,
-        CollectorType.MUCKROCK_ALL_SEARCH,
-        CollectorType.CKAN,
-    ]:
-        # Create two URLs for each, one pending and one errored
-        creation_info: BatchURLCreationInfoV2 = await db_data_creator.batch_v2(
-            parameters=TestBatchCreationParameters(
-                strategy=strategy,
-                urls=[
-                    TestURLCreationParameters(
-                        count=1,
-                        status=URLStatus.PENDING,
-                        with_html_content=True
-                    ),
-                    TestURLCreationParameters(
-                        count=1,
-                        status=URLStatus.ERROR,
-                        with_html_content=True
-                    )
-                ]
-            )
-        )
-        collector_type_to_url_id[strategy] = creation_info.urls_by_status[URLStatus.PENDING].url_mappings[0].url_id
-
-    # Create an additional two urls with no collector.
-    response = await db_data_creator.url_v2(
-        parameters=[
-            TestURLCreationParameters(
-                count=1,
-                status=URLStatus.PENDING,
-                with_html_content=True
-            ),
-            TestURLCreationParameters(
-                count=1,
-                status=URLStatus.ERROR,
-                with_html_content=True
-            )
-        ]
-    )
-    collector_type_to_url_id[None] = response.urls_by_status[URLStatus.PENDING].url_mappings[0].url_id
-
-
-    # Confirm meets prerequisites
-    assert await operator.meets_task_prerequisites()
-    # Run task
-    run_info = await operator.run_task()
-    assert run_info.outcome == TaskOperatorOutcome.SUCCESS, run_info.message
-
-    # Confirm tasks are piped into the correct subtasks
-        # * common_crawler into common_crawler_subtask
-        # * auto_googler into auto_googler_subtask
-        # * muckrock_county_search into muckrock_subtask
-        # * muckrock_simple_search into muckrock_subtask
-        # * muckrock_all_search into muckrock_subtask
-        # * ckan into ckan_subtask
-
-
-    mock_run_subtask: AsyncMock = operator.run_subtask
-
-    # Check correct number of calls to run_subtask
-    assert mock_run_subtask.call_count == 7
-
-    # Confirm subtask classes are correct for the given urls
-    d2 = {}
-    for call_arg in mock_run_subtask.call_args_list:
-        subtask_class = call_arg[0][0].__class__
-        url_id = call_arg[0][1]
-        d2[url_id] = subtask_class
-
-
-    subtask_class_collector_type = [
-        (MuckrockAgencyIdentificationSubtask, CollectorType.MUCKROCK_ALL_SEARCH),
-        (MuckrockAgencyIdentificationSubtask, CollectorType.MUCKROCK_COUNTY_SEARCH),
-        (MuckrockAgencyIdentificationSubtask, CollectorType.MUCKROCK_SIMPLE_SEARCH),
-        (CKANAgencyIdentificationSubtask, CollectorType.CKAN),
-        (UnknownAgencyIdentificationSubtask, CollectorType.COMMON_CRAWLER),
-        (UnknownAgencyIdentificationSubtask, CollectorType.AUTO_GOOGLER),
-        (UnknownAgencyIdentificationSubtask, None)
-    ]
-
-    for subtask_class, collector_type in subtask_class_collector_type:
-        url_id = collector_type_to_url_id[collector_type]
-        assert d2[url_id] == subtask_class
-
-    # Confirm task again does not meet prerequisites
-    assert not await operator.meets_task_prerequisites()
-    # #  Check confirmed and auto suggestions
-    adb_client = db_data_creator.adb_client
-    # TODO: This component appears to be affected by the order of other tests being run
-    #  but does pass when run alone. Resolve.
-    # await assert_expected_confirmed_and_auto_suggestions(adb_client)
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/ckan/__init__.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/ckan/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/ckan/test_core.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/ckan/test_core.py
new file mode 100644
index 00000000..90aacfa5
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/ckan/test_core.py
@@ -0,0 +1,100 @@
+from unittest.mock import AsyncMock
+
+import pytest
+
+from src.collectors.enums import CollectorType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask
+from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion
+from src.external.pdap.enums import MatchAgencyResponseStatus
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.ckan_.core import CKANAgencyIDSubtaskOperator
+from src.core.enums import SuggestionType
+from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse
+from src.external.pdap.dtos.match_agency.post import MatchAgencyInfo
+from tests.helpers.asserts import assert_task_run_success
+from tests.helpers.data_creator.core import DBDataCreator
+
+
+@pytest.mark.asyncio
+async def test_ckan_subtask(
+    operator: AgencyIdentificationTaskOperator,
+    db_data_creator: DBDataCreator
+):
+    # Test that ckan subtask correctly sends agency id to
+    # CKANAPIInterface, sends resultant agency name to
+    # PDAPClient and adds received suggestions to
+    # url_agency_suggestions
+    adb_client: AsyncDatabaseClient = operator.adb_client
+
+    # Run basic survey and confirm no next subtask
+    assert not await operator.meets_task_prerequisites()
+    assert operator._subtask is None
+
+    applicable_url_id: int = (
+        await db_data_creator.create_urls(
+            count=1,
+            collector_metadata={
+                "agency_name": "Test Agency"
+            }
+        )
+    )[0].url_id
+    applicable_batch_id: int = await db_data_creator.create_batch(
+        strategy=CollectorType.CKAN
+    )
+    await db_data_creator.create_batch_url_links(
+        url_ids=[applicable_url_id],
+        batch_id=applicable_batch_id
+    )
+
+    # Confirm prerequisite met and subtask is CKAN
+    assert await operator.meets_task_prerequisites()
+    assert operator._subtask == AutoAgencyIDSubtaskType.CKAN
+
+    pdap_client_mock = operator.loader._pdap_client
+    pdap_client_mock.match_agency.return_value = MatchAgencyResponse(
+        status=MatchAgencyResponseStatus.PARTIAL_MATCH,
+        matches=[
+            MatchAgencyInfo(
+                id=1,
+                submitted_name="Mock Agency Name",
+            ),
+            MatchAgencyInfo(
+                id=2,
+                submitted_name="Another Mock Agency Name",
+            )
+        ]
+    )
+
+    # Create agencies
+    await db_data_creator.create_agency(1)
+    await db_data_creator.create_agency(2)
+
+    # Run the operator
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+    assert_task_run_success(run_info)
+
+    # Confirm prerequisite no longer met
+    assert not await operator.meets_task_prerequisites()
+    assert operator._subtask is None
+
+    # Verify results
+    subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask)
+    assert len(subtasks) == 1
+    subtask: URLAutoAgencyIDSubtask = subtasks[0]
+    assert subtask.type == AutoAgencyIDSubtaskType.CKAN
+    assert subtask.url_id == applicable_url_id
+    subtask_id: int = subtask.id
+
+    suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all(
+        AgencyIDSubtaskSuggestion
+    )
+    assert len(suggestions) == 2
+    assert {suggestion.confidence for suggestion in suggestions} == {50}
+    assert {suggestion.agency_id for suggestion in suggestions} == {1, 2}
+    assert {suggestion.subtask_id for suggestion in suggestions} == {subtask_id}
+
+    # Assert methods called as expected
+    pdap_client_mock.match_agency.assert_called_once_with(name="Test Agency")
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/__init__.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/__init__.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/test_blacklist.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/test_blacklist.py
new file mode 100644
index 00000000..05a9e2bb
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/test_blacklist.py
@@ -0,0 +1,51 @@
+import pytest
+
+from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from tests.helpers.data_creator.core import DBDataCreator
+
+
+@pytest.mark.asyncio
+async def test_blacklist(
+    db_data_creator: DBDataCreator,
+    operator: AgencyIdentificationTaskOperator,
+):
+    """Test Survey does not pick up for Homepage Match
+    URLs with root URLs that have more than two agencies
+     whose meta_urls have it as a root"""
+    # Create Root URL
+    root_url_id: int = (await db_data_creator.create_urls(count=1))[0].url_id
+
+    # Flag as Root
+    await db_data_creator.flag_as_root([root_url_id])
+
+    # Create ineligible URL
+    url_id: int = (await db_data_creator.create_urls(count=1))[0].url_id
+
+    # Link Root URL to ineligible URL
+    await db_data_creator.link_urls_to_root([url_id], root_url_id=root_url_id)
+
+    # Create Meta URLs
+    meta_urls: list[URLMapping] = await db_data_creator.create_validated_urls(
+        count=3,
+        validation_type=URLValidatedType.META_URL
+    )
+
+    # Create 3 agencies
+    agency_ids: list[int] = await db_data_creator.create_agencies(count=3)
+
+    # Link Meta URLs to Agencies
+    await db_data_creator.link_urls_to_agencies(
+        url_ids=[url.url_id for url in meta_urls],
+        agency_ids=agency_ids
+    )
+
+    # Link Meta URLs to Root URL
+    await db_data_creator.link_urls_to_root(
+        url_ids=[url.url_id for url in meta_urls],
+        root_url_id=root_url_id
+    )
+
+    # Run survey and confirm prerequisites not met
+    assert not await operator.meets_task_prerequisites()
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/test_no_validated_meta_urls.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/test_no_validated_meta_urls.py
new file mode 100644
index 00000000..a9576768
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/test_no_validated_meta_urls.py
@@ -0,0 +1,29 @@
+
+import pytest
+
+from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator
+from tests.helpers.data_creator.core import DBDataCreator
+
+
+@pytest.mark.asyncio
+async def test_no_validated_meta_urls(
+    db_data_creator: DBDataCreator,
+    operator: AgencyIdentificationTaskOperator,
+):
+    """Test survey does not pick up for Homepage Match
+    URLs whose Root URLs do not have validated meta URLs."""
+
+    # Create Root URL
+    root_url_id: int = (await db_data_creator.create_urls(count=1))[0].url_id
+
+    # Flag as Root
+    await db_data_creator.flag_as_root([root_url_id])
+
+    # Create ineligible URL
+    url_id: int = (await db_data_creator.create_urls(count=1))[0].url_id
+
+    # Link Root URL to ineligible URL
+    await db_data_creator.link_urls_to_root([url_id], root_url_id=root_url_id)
+
+    # Run survey and confirm prerequisites not met
+    assert not await operator.meets_task_prerequisites()
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/test_root_urls.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/test_root_urls.py
new file mode 100644
index 00000000..627dd05a
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/test_root_urls.py
@@ -0,0 +1,22 @@
+import pytest
+
+from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator
+from tests.conftest import db_data_creator
+from tests.helpers.data_creator.core import DBDataCreator
+
+
+@pytest.mark.asyncio
+async def test_root_urls(
+    db_data_creator: DBDataCreator,
+    operator: AgencyIdentificationTaskOperator,
+):
+    """Test survey does not pick up root URLs for Homepage Match."""
+
+    # Create URL
+    url_id: int = (await db_data_creator.create_urls(count=1))[0].url_id
+
+    # Flag as Root
+    await db_data_creator.flag_as_root([url_id])
+
+    # Run survey and confirm prerequisites not met
+    assert not await operator.meets_task_prerequisites()
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py
new file mode 100644
index 00000000..43a1677c
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py
@@ -0,0 +1,159 @@
+from collections import defaultdict
+
+import pytest
+
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode
+from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask
+from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion
+from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
+from tests.helpers.data_creator.core import DBDataCreator
+
+
+@pytest.mark.asyncio
+async def test_homepage_match(
+    db_data_creator: DBDataCreator,
+    operator: AgencyIdentificationTaskOperator,
+):
+    """
+    Test the following cases:
+    Single Agency: A URL whose Root URL has one meta URL is properly linked
+    Multi Agency: A URL whose Root URL has multiple meta URLs is properly linked
+    """
+
+    # Create 2 root URLs
+    root_url_mappings: list[URLMapping] = (
+        await db_data_creator.create_urls(count=2)
+    )
+    root_url_ids: list[int] = [url_mapping.url_id for url_mapping in root_url_mappings]
+
+    # Flag as Root
+    await db_data_creator.flag_as_root(root_url_ids)
+
+    # Separate Root URLs
+    single_agency_root_url_id: int = root_url_ids[0]
+    multi_agency_root_url_id: int = root_url_ids[1]
+
+    # Create 3 agencies
+    agency_ids: list[int] = await db_data_creator.create_agencies(count=3)
+    single_agency_id: int = agency_ids[0]
+    multi_agency_ids: list[int] = agency_ids[1:]
+
+    # Create 1 Meta URL for single agency case
+    single_meta_url_id: int = (await db_data_creator.create_validated_urls(
+        count=1,
+        validation_type=URLValidatedType.META_URL
+    ))[0].url_id
+    # Link single meta URL to single agency
+    await db_data_creator.create_url_agency_links(
+        url_ids=[single_meta_url_id],
+        agency_ids=[single_agency_id])
+    # Link single meta URL to root
+    await db_data_creator.link_urls_to_root(
+        url_ids=[single_meta_url_id],
+        root_url_id=single_agency_root_url_id
+    )
+
+
+    # Create 2 Meta URLs and agencies for multi agency case
+    multi_meta_urls: list[URLMapping] = await db_data_creator.create_validated_urls(
+        count=2,
+        validation_type=URLValidatedType.META_URL
+    )
+    multi_meta_url_ids: list[int] = [url_mapping.url_id for url_mapping in multi_meta_urls]
+    # Link multi meta URLs to agencies
+    await db_data_creator.create_url_agency_links(
+        url_ids=[multi_meta_url_ids[0]],
+        agency_ids=[multi_agency_ids[0]]
+    )
+    await db_data_creator.create_url_agency_links(
+        url_ids=[multi_meta_url_ids[1]],
+        agency_ids=[multi_agency_ids[1]]
+    )
+    # Link multi meta URLs to root
+    await db_data_creator.link_urls_to_root(
+        url_ids=multi_meta_url_ids,
+        root_url_id=multi_agency_root_url_id
+    )
+
+    # Check operator does not meet prerequisites
+    assert not await operator.meets_task_prerequisites()
+
+    # Set up eligible URLs
+    eligible_urls: list[URLMapping] = await db_data_creator.create_urls(
+        count=2,
+    )
+    single_url_id: int = eligible_urls[0].url_id
+    multi_url_id: int = eligible_urls[1].url_id
+
+    # Link eligible URLs to each root
+    await db_data_creator.link_urls_to_root(
+        url_ids=[single_url_id],
+        root_url_id=single_agency_root_url_id
+    )
+    await db_data_creator.link_urls_to_root(
+        url_ids=[multi_url_id],
+        root_url_id=multi_agency_root_url_id
+    )
+
+    # Check operator now meets prerequisites
+    assert await operator.meets_task_prerequisites()
+    assert operator._subtask == AutoAgencyIDSubtaskType.HOMEPAGE_MATCH
+
+    # Run operator
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+
+    # Confirm operator ran without error
+    assert_task_ran_without_error(run_info)
+
+    adb_client: AsyncDatabaseClient = db_data_creator.adb_client
+
+    # Confirm presence of subtasks
+    subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask)
+    assert len(subtasks) == 2
+
+    # Confirm both listed as agencies found
+    assert all(subtask.agencies_found for subtask in subtasks)
+
+    url_id_to_subtask: dict[int, URLAutoAgencyIDSubtask] = {
+        subtask.url_id: subtask for subtask in subtasks
+    }
+    single_subtask: URLAutoAgencyIDSubtask = url_id_to_subtask[single_url_id]
+    multi_subtask: URLAutoAgencyIDSubtask = url_id_to_subtask[multi_url_id]
+
+    # Check subtasks have expected detail codes
+    assert single_subtask.detail == SubtaskDetailCode.HOMEPAGE_SINGLE_AGENCY
+    assert multi_subtask.detail == SubtaskDetailCode.HOMEPAGE_MULTI_AGENCY
+
+
+    # Get suggestions
+    suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all(AgencyIDSubtaskSuggestion)
+    assert len(suggestions) == 3
+
+    # Confirm each suggestion properly linked to expected subtask
+    subtask_id_to_suggestions: dict[int, list[AgencyIDSubtaskSuggestion]] = defaultdict(list)
+    for suggestion in suggestions:
+        subtask_id_to_suggestions[suggestion.subtask_id].append(suggestion)
+
+    # Check Single Agency Case Suggestion
+    single_suggestion: AgencyIDSubtaskSuggestion = \
+        subtask_id_to_suggestions[single_subtask.id][0]
+    # Check Single Agency Case Suggestion has expected agency
+    assert single_suggestion.agency_id == single_agency_id
+    # Confirm confidence is 95
+    assert single_suggestion.confidence == 95
+
+    # Check Multi Agency Case Suggestion
+    multi_suggestions: list[AgencyIDSubtaskSuggestion] = subtask_id_to_suggestions[multi_subtask.id]
+    # Check Multi Agency Case Suggestion has expected agencies
+    assert {suggestion.agency_id for suggestion in multi_suggestions} \
+        == set(multi_agency_ids)
+    # Confirm confidence for each is 50
+    assert all(suggestion.confidence == 50 for suggestion in multi_suggestions)
+
+    # Test operator no longer meets prerequisites
+    assert not await operator.meets_task_prerequisites()
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/muckrock/__init__.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/muckrock/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/muckrock/test_core.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/muckrock/test_core.py
new file mode 100644
index 00000000..7cf72c5e
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/muckrock/test_core.py
@@ -0,0 +1,148 @@
+from unittest.mock import MagicMock
+
+import pytest
+
+from src.collectors.enums import CollectorType
+from src.collectors.impl.muckrock.api_interface.core import MuckrockAPIInterface
+from src.collectors.impl.muckrock.api_interface.lookup_response import AgencyLookupResponse
+from src.collectors.impl.muckrock.enums import AgencyLookupResponseType
+from src.core.enums import SuggestionType
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator
+from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.muckrock_.core import MuckrockAgencyIDSubtaskOperator
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask
+from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion
+from src.external.pdap.client import PDAPClient
+from src.external.pdap.dtos.match_agency.post import MatchAgencyInfo
+from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse
+from src.external.pdap.enums import MatchAgencyResponseStatus
+from tests.helpers.asserts import assert_task_run_success
+from tests.helpers.data_creator.core import DBDataCreator
+
+
+@pytest.mark.asyncio
+async def test_muckrock_subtask(
+    operator: AgencyIdentificationTaskOperator,
+    db_data_creator: DBDataCreator
+):
+    adb_client: AsyncDatabaseClient = operator.adb_client
+
+    # Run basic survey and confirm no next subtask
+    assert not await operator.meets_task_prerequisites()
+    assert operator._subtask is None
+
+    # Add validated URL and confirm no next subtask
+    await db_data_creator.create_validated_urls(count=1)
+
+    assert not await operator.meets_task_prerequisites()
+    assert operator._subtask is None
+
+    # Add unvalidated URL without collector type
+    inapplicable_url_id: int = (await db_data_creator.create_urls(count=1))[0].url_id
+
+    # Should still not have subtask
+    assert not await operator.meets_task_prerequisites()
+    assert operator._subtask is None
+
+    # Create Auto Googler batch and link to validated URL
+    inapplicable_batch_id: int = await db_data_creator.create_batch(
+        strategy=CollectorType.AUTO_GOOGLER
+    )
+    await db_data_creator.create_batch_url_links(
+        url_ids=[inapplicable_url_id],
+        batch_id=inapplicable_batch_id
+    )
+
+    # Confirm prerequisite not met
+    assert not await operator.meets_task_prerequisites()
+    assert operator._subtask is None
+
+    # Create Muckrock batch and link to validated URL
+    applicable_url_id: int = (
+        await db_data_creator.create_urls(
+            count=1,
+            collector_metadata={
+                "agency": 123
+            }
+        )
+    )[0].url_id
+    applicable_batch_id: int = await db_data_creator.create_batch(
+        strategy=CollectorType.MUCKROCK_SIMPLE_SEARCH
+    )
+    await db_data_creator.create_batch_url_links(
+        url_ids=[applicable_url_id],
+        batch_id=applicable_batch_id
+    )
+
+    # Confirm prerequisite met and subtask is Muckrock
+    assert await operator.meets_task_prerequisites()
+    assert operator._subtask == AutoAgencyIDSubtaskType.MUCKROCK
+
+    # Test that muckrock subtask correctly sends agency name to
+    # MatchAgenciesInterface and adds received suggestions to
+    # url_agency_suggestions
+
+    # Create mock instances for dependency injections
+    muckrock_api_interface_mock = operator.loader._muckrock_api_interface
+    pdap_client_mock = operator.loader._pdap_client
+
+    # Set up mock return values for method calls
+    muckrock_api_interface_mock.lookup_agency.return_value = AgencyLookupResponse(
+        type=AgencyLookupResponseType.FOUND,
+        name="Mock Agency Name",
+        error=None
+    )
+
+    # Create agencies
+    await db_data_creator.create_agency(1)
+    await db_data_creator.create_agency(2)
+
+    pdap_client_mock.match_agency.return_value = MatchAgencyResponse(
+        status=MatchAgencyResponseStatus.PARTIAL_MATCH,
+        matches=[
+            MatchAgencyInfo(
+                id=1,
+                submitted_name="Mock Agency Name",
+            ),
+            MatchAgencyInfo(
+                id=2,
+                submitted_name="Another Mock Agency Name",
+            )
+        ]
+    )
+
+    # Run the operator
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+    assert_task_run_success(run_info)
+
+    # Confirm prerequisite no longer met
+    assert not await operator.meets_task_prerequisites()
+    assert operator._subtask is None
+
+    # Verify results
+    subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask)
+    assert len(subtasks) == 1
+    subtask: URLAutoAgencyIDSubtask = subtasks[0]
+    assert subtask.type == AutoAgencyIDSubtaskType.MUCKROCK
+    assert subtask.url_id == applicable_url_id
+    subtask_id: int = subtask.id
+
+    suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all(
+        AgencyIDSubtaskSuggestion
+    )
+    assert len(suggestions) == 2
+    assert {suggestion.confidence for suggestion in suggestions} == {50}
+    assert {suggestion.agency_id for suggestion in suggestions} == {1, 2}
+    assert {suggestion.subtask_id for suggestion in suggestions} == {subtask_id}
+
+
+    # # Assert methods called as expected
+    muckrock_api_interface_mock.lookup_agency.assert_called_once_with(
+        muckrock_agency_id=123
+    )
+    pdap_client_mock.match_agency.assert_called_once_with(
+        name="Mock Agency Name"
+    )
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/__init__.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/__init__.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/conftest.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/conftest.py
new file mode 100644
index 00000000..766a7ca5
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/conftest.py
@@ -0,0 +1,15 @@
+import pytest_asyncio
+
+from src.db.dtos.url.mapping import URLMapping
+from tests.helpers.data_creator.core import DBDataCreator
+
+
+@pytest_asyncio.fixture
+async def url_ids(
+    db_data_creator: DBDataCreator,
+) -> list[int]:
+    # Create 2 URLs with compressed HTML
+    url_mappings: list[URLMapping] = await db_data_creator.create_urls(count=2)
+    url_ids: list[int] = [url.url_id for url in url_mappings]
+    await db_data_creator.html_data(url_ids=url_ids)
+    return url_ids
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_core.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_core.py
new file mode 100644
index 00000000..2c3ed419
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_core.py
@@ -0,0 +1,118 @@
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.models.input import \
+    NLPLocationMatchSubtaskInput
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.core import \
+    AgencyIDSubtaskInternalProcessor
+from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData
+from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.models.impl.link.task_url import LinkTaskURL
+from src.db.models.impl.url.error_info.sqlalchemy import URLErrorInfo
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic
+from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask
+from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion
+from tests.helpers.asserts import assert_task_run_success
+from tests.helpers.data_creator.core import DBDataCreator
+
+PATCH_ROOT = (
+    "src.core.tasks.url.operators.agency_identification.subtasks." +
+    "impl.nlp_location_match_.core.AgencyIDSubtaskInternalProcessor.process"
+)
+
+
+
+@pytest.mark.asyncio
+async def test_nlp_location_match(
+    operator: AgencyIdentificationTaskOperator,
+    db_data_creator: DBDataCreator,
+    url_ids: list[int],
+    monkeypatch
+):
+    # Confirm operator meets prerequisites
+    assert await operator.meets_task_prerequisites()
+    assert operator._subtask == AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH
+
+    happy_path_url_id: int = url_ids[0]
+    error_url_id: int = url_ids[1]
+
+    agency_ids: list[int] = await db_data_creator.create_agencies(count=2)
+    agency_id_25: int = agency_ids[0]
+    agency_id_75: int = agency_ids[1]
+
+    async def mock_process_response(
+        self: AgencyIDSubtaskInternalProcessor,
+        inputs: list[NLPLocationMatchSubtaskInput],
+    ) -> list[AutoAgencyIDSubtaskData]:
+        response = [
+            AutoAgencyIDSubtaskData(
+                pydantic_model=URLAutoAgencyIDSubtaskPydantic(
+                    task_id=self._task_id,
+                    url_id=happy_path_url_id,
+                    type=AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH,
+                    agencies_found=True,
+                ),
+                suggestions=[
+                    AgencySuggestion(
+                        agency_id=agency_id_25,
+                        confidence=25
+                    ),
+                    AgencySuggestion(
+                        agency_id=agency_id_75,
+                        confidence=75
+                    )
+                ]
+            ),
+            AutoAgencyIDSubtaskData(
+                pydantic_model=URLAutoAgencyIDSubtaskPydantic(
+                    task_id=self._task_id,
+                    url_id=error_url_id,
+                    type=AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH,
+                    agencies_found=False,
+                ),
+                suggestions=[],
+                error="Test error"
+            )
+        ]
+        return response
+
+    monkeypatch.setattr(AgencyIDSubtaskInternalProcessor, "process", mock_process_response)
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+    assert_task_run_success(run_info)
+
+    adb_client: AsyncDatabaseClient = operator.adb_client
+    # Confirm two URLs linked to the task
+    task_links: list[LinkTaskURL] = await adb_client.get_all(LinkTaskURL)
+    assert len(task_links) == 2
+    assert {task_link.url_id for task_link in task_links} == set(url_ids)
+    assert {task_link.task_id for task_link in task_links} == {operator._task_id}
+
+    # Confirm two subtasks were created
+    subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask)
+    assert len(subtasks) == 2
+    assert {subtask.url_id for subtask in subtasks} == set(url_ids)
+    assert {subtask.task_id for subtask in subtasks} == {operator._task_id}
+    assert {subtask.type for subtask in subtasks} == {AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH}
+    assert {subtask.agencies_found for subtask in subtasks} == {True, False}
+
+
+    # Confirm one URL error info
+    error_infos: list[URLErrorInfo] = await adb_client.get_all(URLErrorInfo)
+    assert len(error_infos) == 1
+    assert error_infos[0].task_id == operator._task_id
+    assert error_infos[0].url_id == error_url_id
+    assert error_infos[0].error == "Test error"
+
+    # Confirm two suggestions for happy path URL id
+    suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all(AgencyIDSubtaskSuggestion)
+    assert len(suggestions) == 2
+    # Confirm expected agency ids
+    assert {suggestion.agency_id for suggestion in suggestions} == set(agency_ids)
+    # Confirm both have the expected confidence values
+    assert {suggestion.confidence for suggestion in suggestions} == {25, 75}
+
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/__init__.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/__init__.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/conftest.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/conftest.py
new file mode 100644
index 00000000..2abee544
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/conftest.py
@@ -0,0 +1,18 @@
+from unittest.mock import AsyncMock
+
+import pytest
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.core import \
+    AgencyIDSubtaskInternalProcessor
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.core import \
+    NLPProcessor
+from src.external.pdap.client import PDAPClient
+
+
+@pytest.fixture
+def internal_processor() -> AgencyIDSubtaskInternalProcessor:
+    return AgencyIDSubtaskInternalProcessor(
+        nlp_processor=AsyncMock(spec=NLPProcessor),
+        pdap_client=AsyncMock(PDAPClient),
+        task_id=1
+    )
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_empty.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_empty.py
new file mode 100644
index 00000000..01899f30
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_empty.py
@@ -0,0 +1,14 @@
+import pytest
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.core import \
+    AgencyIDSubtaskInternalProcessor
+
+
+@pytest.mark.asyncio()
+async def test_empty(
+    internal_processor: AgencyIDSubtaskInternalProcessor,
+):
+    """
+    Test that when an input has no US State or locations,
+    that result is not returned
+    """
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_no_state_any_locations.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_no_state_any_locations.py
new file mode 100644
index 00000000..5fbbc6b5
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_no_state_any_locations.py
@@ -0,0 +1,14 @@
+import pytest
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.core import \
+    AgencyIDSubtaskInternalProcessor
+
+
+@pytest.mark.asyncio()
+async def test_no_state_any_locations(
+    internal_processor: AgencyIDSubtaskInternalProcessor,
+):
+    """
+    Test that when an input has no US State and any locations
+    that the result is not returned
+    """
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_state_multiple_locations.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_state_multiple_locations.py
new file mode 100644
index 00000000..6e7aef6a
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_state_multiple_locations.py
@@ -0,0 +1,14 @@
+import pytest
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.core import \
+    AgencyIDSubtaskInternalProcessor
+
+
+@pytest.mark.asyncio()
+async def test_state_multiple_locations(
+    internal_processor: AgencyIDSubtaskInternalProcessor,
+):
+    """
+    Test that when an input has a US State and multiple locations
+    then multiple results are returned with separate request ids
+    """
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_state_no_locations.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_state_no_locations.py
new file mode 100644
index 00000000..c0b1cef4
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_state_no_locations.py
@@ -0,0 +1,14 @@
+import pytest
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.core import \
+    AgencyIDSubtaskInternalProcessor
+
+
+@pytest.mark.asyncio()
+async def test_state_no_locations(
+    internal_processor: AgencyIDSubtaskInternalProcessor,
+):
+    """
+    Test that when an input has a US State and no locations
+    then no result is returned
+    """
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_state_one_location.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_state_one_location.py
new file mode 100644
index 00000000..7b4ef303
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/internal_processor/match_urls_to_search_params/test_state_one_location.py
@@ -0,0 +1,14 @@
+import pytest
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.core import \
+    AgencyIDSubtaskInternalProcessor
+
+
+@pytest.mark.asyncio()
+async def test_state_one_location(
+    internal_processor: AgencyIDSubtaskInternalProcessor,
+):
+    """
+    Test that when an input has a US State and one locatio
+    then one result is returned
+    """
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/test_nlp_response_valid.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/test_nlp_response_valid.py
new file mode 100644
index 00000000..ea81341c
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/test_nlp_response_valid.py
@@ -0,0 +1,57 @@
+import pytest
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.models.response import \
+    NLPLocationMatchResponse
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.models.us_state import \
+    USState
+
+US_STATE = USState(
+    name="Pennsylvania",
+    iso="PA",
+)
+
+SINGLE_LOCATION: list[str] = ["Pittsburgh"]
+MULTIPLE_LOCATION: list[str] = ["Pittsburgh", "Allegheny"]
+
+@pytest.mark.parametrize(
+    argnames="nlp_response, expected_result",
+    argvalues=[
+        (
+            NLPLocationMatchResponse(
+                locations=SINGLE_LOCATION,
+                us_state=US_STATE
+            ),
+            True,
+        ),
+        (
+            NLPLocationMatchResponse(
+                locations=MULTIPLE_LOCATION,
+                us_state=US_STATE,
+            ),
+            True
+        ),
+        (
+            NLPLocationMatchResponse(
+                locations=MULTIPLE_LOCATION,
+                us_state=None,
+            ),
+            False,
+        ),
+        (
+            NLPLocationMatchResponse(
+                locations=[],
+                us_state=US_STATE,
+            ),
+            False,
+        ),
+        (
+            NLPLocationMatchResponse(
+                locations=[],
+                us_state=None,
+            ),
+            False
+        )
+    ],
+)
+def test_nlp_response_valid(nlp_response: NLPLocationMatchResponse, expected_result: bool):
+    assert nlp_response.valid == expected_result
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_ckan.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_ckan.py
deleted file mode 100644
index 6a2e4fed..00000000
--- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_ckan.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from unittest.mock import AsyncMock
-
-import pytest
-
-from src.external.pdap.enums import MatchAgencyResponseStatus
-from src.core.tasks.url.operators.agency_identification.subtasks.impl.ckan import CKANAgencyIdentificationSubtask
-from src.core.enums import SuggestionType
-from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse
-from src.external.pdap.dtos.match_agency.post import MatchAgencyInfo
-from tests.helpers.data_creator.core import DBDataCreator
-
-
-@pytest.mark.asyncio
-async def test_ckan_subtask(db_data_creator: DBDataCreator):
-    # Test that ckan subtask correctly sends agency id to
-    # CKANAPIInterface, sends resultant agency name to
-    # PDAPClient and adds received suggestions to
-    # url_agency_suggestions
-
-    pdap_client = AsyncMock()
-    pdap_client.match_agency.return_value = MatchAgencyResponse(
-        status=MatchAgencyResponseStatus.PARTIAL_MATCH,
-        matches=[
-            MatchAgencyInfo(
-                id=1,
-                submitted_name="Mock Agency Name",
-            ),
-            MatchAgencyInfo(
-                id=2,
-                submitted_name="Another Mock Agency Name",
-            )
-        ]
-    )  # Assuming MatchAgencyResponse is a class
-
-    # Create an instance of CKANAgencyIdentificationSubtask
-    task = CKANAgencyIdentificationSubtask(pdap_client)
-
-    # Call the run method with static values
-    collector_metadata = {"agency_name": "Test Agency"}
-    url_id = 1
-
-    # Call the run method
-    result = await task.run(url_id, collector_metadata)
-
-    # Check the result
-    assert len(result) == 2
-    assert result[0].url_id == 1
-    assert result[0].suggestion_type == SuggestionType.AUTO_SUGGESTION
-    assert result[0].pdap_agency_id == 1
-    assert result[0].agency_name == "Mock Agency Name"
-    assert result[1].url_id == 1
-    assert result[1].suggestion_type == SuggestionType.AUTO_SUGGESTION
-    assert result[1].pdap_agency_id == 2
-    assert result[1].agency_name == "Another Mock Agency Name"
-
-    # Assert methods called as expected
-    pdap_client.match_agency.assert_called_once_with(name="Test Agency")
-
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_muckrock.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_muckrock.py
deleted file mode 100644
index 80f92ec4..00000000
--- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_muckrock.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from unittest.mock import MagicMock
-
-import pytest
-
-from src.collectors.impl.muckrock.api_interface.core import MuckrockAPIInterface
-from src.collectors.impl.muckrock.api_interface.lookup_response import AgencyLookupResponse
-from src.collectors.impl.muckrock.enums import AgencyLookupResponseType
-from src.core.enums import SuggestionType
-from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
-from src.core.tasks.url.operators.agency_identification.subtasks.impl.muckrock import MuckrockAgencyIdentificationSubtask
-from src.external.pdap.client import PDAPClient
-from src.external.pdap.dtos.match_agency.post import MatchAgencyInfo
-from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse
-from src.external.pdap.enums import MatchAgencyResponseStatus
-from tests.helpers.data_creator.core import DBDataCreator
-
-
-@pytest.mark.asyncio
-async def test_muckrock_subtask(db_data_creator: DBDataCreator):
-    # Test that muckrock subtask correctly sends agency name to
-    # MatchAgenciesInterface and adds received suggestions to
-    # url_agency_suggestions
-
-    # Create mock instances for dependency injections
-    muckrock_api_interface_mock = MagicMock(spec=MuckrockAPIInterface)
-    pdap_client_mock = MagicMock(spec=PDAPClient)
-
-    # Set up mock return values for method calls
-    muckrock_api_interface_mock.lookup_agency.return_value = AgencyLookupResponse(
-        type=AgencyLookupResponseType.FOUND,
-        name="Mock Agency Name",
-        error=None
-    )
-
-    pdap_client_mock.match_agency.return_value = MatchAgencyResponse(
-        status=MatchAgencyResponseStatus.PARTIAL_MATCH,
-        matches=[
-            MatchAgencyInfo(
-                id=1,
-                submitted_name="Mock Agency Name",
-            ),
-            MatchAgencyInfo(
-                id=2,
-                submitted_name="Another Mock Agency Name",
-            )
-        ]
-    )
-
-    # Create an instance of MuckrockAgencyIdentificationSubtask with mock dependencies
-    muckrock_agency_identification_subtask = MuckrockAgencyIdentificationSubtask(
-        muckrock_api_interface=muckrock_api_interface_mock,
-        pdap_client=pdap_client_mock
-    )
-
-    # Run the subtask
-    results: list[URLAgencySuggestionInfo] = await muckrock_agency_identification_subtask.run(
-        url_id=1,
-        collector_metadata={
-            "agency": 123
-        }
-    )
-
-    # Verify the results
-    assert len(results) == 2
-    assert results[0].url_id == 1
-    assert results[0].suggestion_type == SuggestionType.AUTO_SUGGESTION
-    assert results[0].pdap_agency_id == 1
-    assert results[0].agency_name == "Mock Agency Name"
-    assert results[1].url_id == 1
-    assert results[1].suggestion_type == SuggestionType.AUTO_SUGGESTION
-    assert results[1].pdap_agency_id == 2
-    assert results[1].agency_name == "Another Mock Agency Name"
-
-    # Assert methods called as expected
-    muckrock_api_interface_mock.lookup_agency.assert_called_once_with(
-        muckrock_agency_id=123
-    )
-    pdap_client_mock.match_agency.assert_called_once_with(
-        name="Mock Agency Name"
-    )
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_unknown.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_unknown.py
deleted file mode 100644
index aab59dca..00000000
--- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_unknown.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import pytest
-
-from src.core.enums import SuggestionType
-from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
-from src.core.tasks.url.operators.agency_identification.subtasks.impl.unknown import UnknownAgencyIdentificationSubtask
-
-
-@pytest.mark.asyncio
-async def test_unknown_agency_identification_subtask():
-    # Test that no_collector subtask correctly adds URL to
-    # url_agency_suggestions with label 'Unknown'
-    subtask = UnknownAgencyIdentificationSubtask()
-    results: list[URLAgencySuggestionInfo] = await subtask.run(url_id=1, collector_metadata={})
-    assert len(results) == 1
-    assert results[0].url_id == 1
-    assert results[0].suggestion_type == SuggestionType.UNKNOWN
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/survey/__init__.py b/tests/automated/integration/tasks/url/impl/agency_identification/survey/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py b/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py
new file mode 100644
index 00000000..8ace042e
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py
@@ -0,0 +1,49 @@
+import pytest
+
+from src.collectors.enums import CollectorType
+from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+from tests.helpers.data_creator.core import DBDataCreator
+
+@pytest.mark.asyncio
+async def test_survey_flag(
+    operator: AgencyIdentificationTaskOperator,
+    db_data_creator: DBDataCreator,
+    monkeypatch
+):
+    """
+    Test that survey correctly disables Subtask flags
+    when the environment variable is set to disable that subtask
+    """
+
+    # Run basic survey and confirm no next subtask
+    assert not await operator.meets_task_prerequisites()
+    assert operator._subtask is None
+
+    applicable_url_id: int = (
+        await db_data_creator.create_urls(
+            count=1,
+            collector_metadata={
+                "agency_name": "Test Agency"
+            }
+        )
+    )[0].url_id
+    applicable_batch_id: int = await db_data_creator.create_batch(
+        strategy=CollectorType.CKAN
+    )
+    await db_data_creator.create_batch_url_links(
+        url_ids=[applicable_url_id],
+        batch_id=applicable_batch_id
+    )
+
+    # Confirm prerequisite met and subtask is CKAN
+    assert await operator.meets_task_prerequisites()
+    assert operator._subtask == AutoAgencyIDSubtaskType.CKAN
+
+    # Set flag to disable CKAN Subtask
+    monkeypatch.setenv(
+        "AGENCY_ID_CKAN_FLAG", "0"
+    )
+
+    # Confirm prerequisite no longer met.
+    assert not await operator.meets_task_prerequisites()
\ No newline at end of file
diff --git a/tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py b/tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py
index 81b03070..5943213b 100644
--- a/tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py
+++ b/tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py
@@ -32,7 +32,7 @@ async def test_url_auto_relevant_task(db_data_creator):
     assert len(urls) == 3
     counter = Counter([url.status for url in urls])
     assert counter[URLStatus.ERROR] == 1
-    assert counter[URLStatus.PENDING] == 2
+    assert counter[URLStatus.OK] == 2
 
     # Confirm two annotations were created
     suggestions: list[AutoRelevantSuggestion] = await adb_client.get_all(AutoRelevantSuggestion)
diff --git a/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/setup.py b/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/setup.py
index 76f1969e..c0dbef6a 100644
--- a/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/setup.py
+++ b/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/setup.py
@@ -3,7 +3,6 @@
 from src.external.url_request.dtos.url_response import URLResponseInfo
 from tests.automated.integration.tasks.url.impl.html.setup.data import TEST_ENTRIES
 from tests.automated.integration.tasks.url.impl.html.setup.models.entry import TestURLHTMLTaskSetupEntry, TestErrorType
-from tests.helpers.simple_test_data_functions import generate_test_html
 
 
 def _get_success(
@@ -29,6 +28,19 @@ def _get_content_type(
         return None
     return "text/html"
 
+def _generate_test_html() -> str:
+    return """
+    <!DOCTYPE html>
+    <html>
+      <head>
+        <title>Example HTML</title>
+      </head>
+      <body>
+        <h1>Example HTML</h1>
+        <p>This is an example of HTML content.</p>
+      </body>
+    </html>
+    """
 
 def setup_url_to_response_info(
 ) -> dict[str, URLResponseInfo]:
@@ -37,7 +49,7 @@ def setup_url_to_response_info(
         response_info = URLResponseInfo(
             success=_get_success(entry),
             status=get_http_status(entry),
-            html=generate_test_html() if _get_success(entry) else None,
+            html=_generate_test_html() if _get_success(entry) else None,
             content_type=_get_content_type(entry),
             exception=None if _get_success(entry) else "Error"
         )
diff --git a/tests/automated/integration/tasks/url/impl/html/setup/data.py b/tests/automated/integration/tasks/url/impl/html/setup/data.py
index e9495ad4..5615392c 100644
--- a/tests/automated/integration/tasks/url/impl/html/setup/data.py
+++ b/tests/automated/integration/tasks/url/impl/html/setup/data.py
@@ -11,7 +11,7 @@
     TestURLHTMLTaskSetupEntry(
         url_info=TestURLInfo(
             url="https://happy-path.com/pending",
-            status=URLStatus.PENDING
+            status=URLStatus.OK
         ),
         web_metadata_info=TestWebMetadataInfo(
             accessed=True,
@@ -66,7 +66,7 @@
     TestURLHTMLTaskSetupEntry(
         url_info=TestURLInfo(
             url="https://not-200-path.com/submitted",
-            status=URLStatus.PENDING
+            status=URLStatus.OK
         ),
         web_metadata_info=TestWebMetadataInfo(
             accessed=True,
@@ -83,7 +83,7 @@
     TestURLHTMLTaskSetupEntry(
         url_info=TestURLInfo(
             url="https://no-web-metadata.com/submitted",
-            status=URLStatus.PENDING
+            status=URLStatus.OK
         ),
         web_metadata_info=None,
         expected_result=ExpectedResult(
diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py
index 404f00e1..e788fff1 100644
--- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py
+++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py
@@ -1,15 +1,19 @@
 import pytest
 
 from src.collectors.enums import URLStatus
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
 from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
 from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager
 from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager
+from tests.helpers.data_creator.core import DBDataCreator
 
 
 @pytest.mark.asyncio
 async def test_url_probe_task_error(
     setup_manager: TestURLProbeSetupManager,
-    check_manager: TestURLProbeCheckManager
+    check_manager: TestURLProbeCheckManager,
+    db_data_creator: DBDataCreator
 ):
     """
     If a URL returns a 500 error response (or any other error),
@@ -28,15 +32,20 @@ async def test_url_probe_task_error(
         )
     )
     assert not await operator.meets_task_prerequisites()
-    url_id = await setup_manager.setup_url(URLStatus.SUBMITTED)
+    url_id: int = await setup_manager.setup_url(URLStatus.OK)
+    await db_data_creator.create_validated_flags([url_id], validation_type=URLValidatedType.DATA_SOURCE)
+    await db_data_creator.create_url_data_sources([url_id])
+
     assert await operator.meets_task_prerequisites()
     run_info = await operator.run_task()
     assert_task_ran_without_error(run_info)
     assert not await operator.meets_task_prerequisites()
     await check_manager.check_url(
         url_id=url_id,
-        expected_status=URLStatus.SUBMITTED
+        expected_status=URLStatus.OK
     )
+
+
     await check_manager.check_web_metadata(
         url_id=url_id,
         status_code=500,
diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py
index 97937c15..7fc54da4 100644
--- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py
+++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py
@@ -1,15 +1,18 @@
 import pytest
 
 from src.collectors.enums import URLStatus
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
 from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error
 from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager
 from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager
+from tests.helpers.data_creator.core import DBDataCreator
 
 
 @pytest.mark.asyncio
 async def test_url_probe_task_not_found(
     setup_manager: TestURLProbeSetupManager,
-    check_manager: TestURLProbeCheckManager
+    check_manager: TestURLProbeCheckManager,
+    db_data_creator: DBDataCreator
 ):
     """
     If a URL returns a 404 error response,
@@ -29,14 +32,15 @@ async def test_url_probe_task_not_found(
         )
     )
     assert not await operator.meets_task_prerequisites()
-    url_id = await setup_manager.setup_url(URLStatus.NOT_RELEVANT)
+    url_id = await setup_manager.setup_url(URLStatus.OK)
+    await db_data_creator.create_validated_flags([url_id], validation_type=URLValidatedType.NOT_RELEVANT)
     assert await operator.meets_task_prerequisites()
     run_info = await operator.run_task()
     assert_task_ran_without_error(run_info)
     assert not await operator.meets_task_prerequisites()
     await check_manager.check_url(
         url_id=url_id,
-        expected_status=URLStatus.NOT_RELEVANT
+        expected_status=URLStatus.OK
     )
     await check_manager.check_web_metadata(
         url_id=url_id,
diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py
index a02f1ba4..ecaec084 100644
--- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py
+++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py
@@ -28,14 +28,14 @@ async def test_url_probe_task_no_redirect_ok(
         )
     )
     assert not await operator.meets_task_prerequisites()
-    url_id = await setup_manager.setup_url(URLStatus.PENDING)
+    url_id = await setup_manager.setup_url(URLStatus.OK)
     assert await operator.meets_task_prerequisites()
     run_info = await operator.run_task()
     assert_task_ran_without_error(run_info)
     assert not await operator.meets_task_prerequisites()
     await check_manager.check_url(
         url_id=url_id,
-        expected_status=URLStatus.PENDING
+        expected_status=URLStatus.OK
     )
     await check_manager.check_web_metadata(
         url_id=url_id,
diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py
index 0c1da5fd..cfd1f68f 100644
--- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py
+++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py
@@ -31,8 +31,8 @@ async def test_two_urls(
         ]
     )
     assert not await operator.meets_task_prerequisites()
-    url_id_1 = await setup_manager.setup_url(URLStatus.PENDING, url=url_1)
-    url_id_2 = await setup_manager.setup_url(URLStatus.NOT_RELEVANT, url=url_2)
+    url_id_1 = await setup_manager.setup_url(URLStatus.OK, url=url_1)
+    url_id_2 = await setup_manager.setup_url(URLStatus.OK, url=url_2)
     assert await operator.meets_task_prerequisites()
     run_info = await operator.run_task()
     assert_task_ran_without_error(run_info)
diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py b/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py
index 88098b16..df695021 100644
--- a/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py
+++ b/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py
@@ -28,12 +28,12 @@ async def test_url_probe_task_redirect_dest_new_ok(
             dest_error=None
         )
     )
-    source_url_id = await setup_manager.setup_url(URLStatus.PENDING)
+    source_url_id = await setup_manager.setup_url(URLStatus.OK)
     run_info = await operator.run_task()
     assert_task_ran_without_error(run_info)
     await check_manager.check_url(
         url_id=source_url_id,
-        expected_status=URLStatus.PENDING
+        expected_status=URLStatus.OK
     )
     await check_manager.check_web_metadata(
         url_id=source_url_id,
@@ -45,7 +45,7 @@ async def test_url_probe_task_redirect_dest_new_ok(
     dest_url_id = await check_manager.check_redirect(source_url_id)
     await check_manager.check_url(
         url_id=dest_url_id,
-        expected_status=URLStatus.PENDING
+        expected_status=URLStatus.OK
     )
     await check_manager.check_web_metadata(
         url_id=dest_url_id,
diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py
index 0744f3b9..b52dce6b 100644
--- a/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py
+++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py
@@ -29,8 +29,8 @@ async def test_url_probe_task_redirect_dest_exists_in_db(
             dest_error=None
         )
     )
-    source_url_id = await setup_manager.setup_url(URLStatus.INDIVIDUAL_RECORD)
-    dest_url_id = await setup_manager.setup_url(URLStatus.PENDING, url=TEST_DEST_URL)
+    source_url_id = await setup_manager.setup_url(URLStatus.OK)
+    dest_url_id = await setup_manager.setup_url(URLStatus.OK, url=TEST_DEST_URL)
     # Add web metadata for destination URL, to prevent it from being pulled
     web_metadata = URLWebMetadataPydantic(
         url_id=dest_url_id,
@@ -44,11 +44,11 @@ async def test_url_probe_task_redirect_dest_exists_in_db(
     assert_task_ran_without_error(run_info)
     await check_manager.check_url(
         url_id=source_url_id,
-        expected_status=URLStatus.INDIVIDUAL_RECORD
+        expected_status=URLStatus.OK
     )
     await check_manager.check_url(
         url_id=dest_url_id,
-        expected_status=URLStatus.PENDING
+        expected_status=URLStatus.OK
     )
     await check_manager.check_web_metadata(
         url_id=source_url_id,
diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/test_redirect_infinite.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_redirect_infinite.py
index ed9c38ac..5a66af3d 100644
--- a/tests/automated/integration/tasks/url/impl/probe/redirect/test_redirect_infinite.py
+++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_redirect_infinite.py
@@ -27,11 +27,11 @@ async def test_url_probe_task_redirect_infinite(
             redirect_url=TEST_URL
         )
     )
-    url_id = await setup_manager.setup_url(URLStatus.PENDING)
+    url_id = await setup_manager.setup_url(URLStatus.OK)
     run_info = await operator.run_task()
     await check_manager.check_url(
         url_id=url_id,
-        expected_status=URLStatus.PENDING
+        expected_status=URLStatus.OK
     )
     await check_manager.check_web_metadata(
         url_id=url_id,
diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py
index 267d9015..f0e113ff 100644
--- a/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py
+++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py
@@ -34,17 +34,17 @@ async def test_url_probe_task_redirect_two_urls_same_dest(
             ),
         ]
     )
-    source_url_id_1 = await setup_manager.setup_url(URLStatus.PENDING)
-    source_url_id_2 = await setup_manager.setup_url(URLStatus.PENDING, url="https://example.com/2")
+    source_url_id_1 = await setup_manager.setup_url(URLStatus.OK)
+    source_url_id_2 = await setup_manager.setup_url(URLStatus.OK, url="https://example.com/2")
     run_info = await operator.run_task()
     assert_task_ran_without_error(run_info)
     await check_manager.check_url(
         url_id=source_url_id_1,
-        expected_status=URLStatus.PENDING
+        expected_status=URLStatus.OK
     )
     await check_manager.check_url(
         url_id=source_url_id_2,
-        expected_status=URLStatus.PENDING
+        expected_status=URLStatus.OK
     )
     redirect_url_id_1 = await check_manager.check_redirect(
         source_url_id=source_url_id_1
diff --git a/tests/automated/integration/tasks/url/impl/submit_approved/test_submit_approved_url_task.py b/tests/automated/integration/tasks/url/impl/submit_approved/test_submit_approved_url_task.py
index 7d56ddcf..f992fbb6 100644
--- a/tests/automated/integration/tasks/url/impl/submit_approved/test_submit_approved_url_task.py
+++ b/tests/automated/integration/tasks/url/impl/submit_approved/test_submit_approved_url_task.py
@@ -16,9 +16,9 @@
 
 @pytest.mark.asyncio
 async def test_submit_approved_url_task(
-        db_data_creator,
-        mock_pdap_client: PDAPClient,
-        monkeypatch
+    db_data_creator,
+    mock_pdap_client: PDAPClient,
+    monkeypatch
 ):
     """
     The submit_approved_url_task should submit
@@ -37,7 +37,7 @@ async def test_submit_approved_url_task(
 
     # Create URLs with status 'validated' in database and all requisite URL values
     # Ensure they have optional metadata as well
-    urls = await setup_validated_urls(db_data_creator)
+    urls: list[str] = await setup_validated_urls(db_data_creator)
     mock_make_request(mock_pdap_client, urls)
 
     # Check Task Operator does meet pre-requisites
@@ -50,14 +50,14 @@ async def test_submit_approved_url_task(
     assert run_info.outcome == TaskOperatorOutcome.SUCCESS, run_info.message
 
     # Get URLs
-    urls = await db_data_creator.adb_client.get_all(URL, order_by_attribute="id")
-    url_1 = urls[0]
-    url_2 = urls[1]
-    url_3 = urls[2]
+    urls: list[URL] = await db_data_creator.adb_client.get_all(URL, order_by_attribute="id")
+    url_1: URL = urls[0]
+    url_2: URL = urls[1]
+    url_3: URL = urls[2]
 
     # Check URLs have been marked as 'submitted'
-    assert url_1.status == URLStatus.SUBMITTED
-    assert url_2.status == URLStatus.SUBMITTED
+    assert url_1.status == URLStatus.OK
+    assert url_2.status == URLStatus.OK
     assert url_3.status == URLStatus.ERROR
 
     # Get URL Data Source Links
diff --git a/tests/automated/integration/tasks/url/impl/submit_approved/test_validated_meta_url.py b/tests/automated/integration/tasks/url/impl/submit_approved/test_validated_meta_url.py
new file mode 100644
index 00000000..5f927159
--- /dev/null
+++ b/tests/automated/integration/tasks/url/impl/submit_approved/test_validated_meta_url.py
@@ -0,0 +1,42 @@
+import pytest
+
+from src.core.tasks.base.run_info import TaskOperatorRunInfo
+from src.core.tasks.url.operators.submit_approved.core import SubmitApprovedURLTaskOperator
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource
+from src.external.pdap.client import PDAPClient
+from tests.helpers.asserts import assert_task_run_success
+
+
+@pytest.mark.asyncio
+async def test_validated_meta_url_not_included(
+    db_data_creator,
+    mock_pdap_client: PDAPClient,
+    monkeypatch
+):
+    """
+    If a validated Meta URL is included in the database
+    This should not be included in the submit approved task
+    """
+
+    # Get Task Operator
+    operator = SubmitApprovedURLTaskOperator(
+        adb_client=db_data_creator.adb_client,
+        pdap_client=mock_pdap_client
+    )
+
+    dbdc = db_data_creator
+    url_1: int = (await dbdc.create_validated_urls(
+        validation_type=URLValidatedType.META_URL
+    ))[0].url_id
+
+    # Test task operator does not meet prerequisites
+    assert not await operator.meets_task_prerequisites()
+
+    # Run task and confirm runs without error
+    run_info: TaskOperatorRunInfo = await operator.run_task()
+    assert_task_run_success(run_info)
+
+    # Confirm entry not included in database
+    ds_urls: list[URLDataSource] = await dbdc.adb_client.get_all(URLDataSource)
+    assert len(ds_urls) == 0
diff --git a/tests/automated/integration/tasks/url/impl/test_url_404_probe.py b/tests/automated/integration/tasks/url/impl/test_url_404_probe.py
index 630f7f4e..e55ad9ad 100644
--- a/tests/automated/integration/tasks/url/impl/test_url_404_probe.py
+++ b/tests/automated/integration/tasks/url/impl/test_url_404_probe.py
@@ -12,6 +12,7 @@
 from src.collectors.enums import URLStatus
 from src.core.tasks.url.enums import TaskOperatorOutcome
 from src.external.url_request.dtos.url_response import URLResponseInfo
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 from tests.helpers.data_creator.core import DBDataCreator
 from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters
 from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters
@@ -19,6 +20,7 @@
 
 @pytest.mark.asyncio
 async def test_url_404_probe_task(
+    wiped_database,
     db_data_creator: DBDataCreator
 ):
 
@@ -84,12 +86,12 @@ async def mock_make_simple_requests(self, urls: list[str]) -> list[URLResponseIn
             urls=[
                 TestURLCreationParameters(
                     count=3,
-                    status=URLStatus.PENDING,
+                    status=URLCreationEnum.OK,
                     with_html_content=True
                 ),
                 TestURLCreationParameters(
                     count=1,
-                    status=URLStatus.ERROR,
+                    status=URLCreationEnum.ERROR,
                     with_html_content=False
                 ),
             ]
@@ -104,12 +106,12 @@ async def mock_make_simple_requests(self, urls: list[str]) -> list[URLResponseIn
     assert run_info.outcome == TaskOperatorOutcome.SUCCESS, run_info.message
 
 
-    pending_url_mappings = creation_info.urls_by_status[URLStatus.PENDING].url_mappings
+    pending_url_mappings = creation_info.urls_by_status[URLCreationEnum.OK].url_mappings
     url_id_success = pending_url_mappings[0].url_id
     url_id_404 = pending_url_mappings[1].url_id
     url_id_error = pending_url_mappings[2].url_id
 
-    url_id_initial_error = creation_info.urls_by_status[URLStatus.ERROR].url_mappings[0].url_id
+    url_id_initial_error = creation_info.urls_by_status[URLCreationEnum.ERROR].url_mappings[0].url_id
 
     # Check that URLProbedFor404 has been appropriately populated
     probed_for_404_objects: list[URLProbedFor404] = await db_data_creator.adb_client.get_all(URLProbedFor404)
@@ -128,9 +130,9 @@ def find_url(url_id: int) -> URL:
                 return url
         raise Exception(f"URL with id {url_id} not found")
 
-    assert find_url(url_id_success).status == URLStatus.PENDING
+    assert find_url(url_id_success).status == URLStatus.OK
     assert find_url(url_id_404).status == URLStatus.NOT_FOUND
-    assert find_url(url_id_error).status == URLStatus.PENDING
+    assert find_url(url_id_error).status == URLStatus.OK
     assert find_url(url_id_initial_error).status == URLStatus.ERROR
 
     # Check that meets_task_prerequisites now returns False
diff --git a/tests/automated/integration/tasks/url/loader/conftest.py b/tests/automated/integration/tasks/url/loader/conftest.py
index 045236f9..52a17b5e 100644
--- a/tests/automated/integration/tasks/url/loader/conftest.py
+++ b/tests/automated/integration/tasks/url/loader/conftest.py
@@ -4,10 +4,11 @@
 
 from src.collectors.impl.muckrock.api_interface.core import MuckrockAPIInterface
 from src.core.tasks.url.loader import URLTaskOperatorLoader
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.core import \
+    NLPProcessor
 from src.core.tasks.url.operators.html.scraper.parser.core import HTMLResponseParser
 from src.db.client.async_ import AsyncDatabaseClient
 from src.external.huggingface.inference.client import HuggingFaceInferenceClient
-from src.external.internet_archives.client import InternetArchivesClient
 from src.external.pdap.client import PDAPClient
 from src.external.url_request.core import URLRequestInterface
 
@@ -22,4 +23,5 @@ def loader() -> URLTaskOperatorLoader:
         pdap_client=AsyncMock(spec=PDAPClient),
         muckrock_api_interface=AsyncMock(spec=MuckrockAPIInterface),
         hf_inference_client=AsyncMock(spec=HuggingFaceInferenceClient),
+        nlp_processor=AsyncMock(spec=NLPProcessor)
     )
\ No newline at end of file
diff --git a/tests/helpers/batch_creation_parameters/core.py b/tests/helpers/batch_creation_parameters/core.py
index dfc33644..4562cbdf 100644
--- a/tests/helpers/batch_creation_parameters/core.py
+++ b/tests/helpers/batch_creation_parameters/core.py
@@ -9,10 +9,10 @@
 
 
 class TestBatchCreationParameters(BaseModel):
-    created_at: Optional[datetime.datetime] = None
+    created_at: datetime.datetime | None = None
     outcome: BatchStatus = BatchStatus.READY_TO_LABEL
     strategy: CollectorType = CollectorType.EXAMPLE
-    urls: Optional[list[TestURLCreationParameters]] = None
+    urls: list[TestURLCreationParameters] | None = None
 
     @model_validator(mode='after')
     def validate_urls(self):
diff --git a/tests/helpers/batch_creation_parameters/enums.py b/tests/helpers/batch_creation_parameters/enums.py
new file mode 100644
index 00000000..d61a2793
--- /dev/null
+++ b/tests/helpers/batch_creation_parameters/enums.py
@@ -0,0 +1,11 @@
+from enum import Enum
+
+
+class URLCreationEnum(Enum):
+    OK = "ok"
+    SUBMITTED = "submitted"
+    VALIDATED = "validated"
+    ERROR = "error"
+    NOT_RELEVANT = "not_relevant"
+    DUPLICATE = "duplicate"
+    NOT_FOUND = "not_found"
\ No newline at end of file
diff --git a/tests/helpers/batch_creation_parameters/url_creation_parameters.py b/tests/helpers/batch_creation_parameters/url_creation_parameters.py
index 2e30cca0..701a239b 100644
--- a/tests/helpers/batch_creation_parameters/url_creation_parameters.py
+++ b/tests/helpers/batch_creation_parameters/url_creation_parameters.py
@@ -1,23 +1,26 @@
 from pydantic import BaseModel, model_validator
 
 from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo
-from src.collectors.enums import URLStatus
 from src.core.enums import RecordType
 from tests.helpers.batch_creation_parameters.annotation_info import AnnotationInfo
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 
 
 class TestURLCreationParameters(BaseModel):
     count: int = 1
-    status: URLStatus = URLStatus.PENDING
+    status: URLCreationEnum = URLCreationEnum.OK
     with_html_content: bool = False
     annotation_info: AnnotationInfo = AnnotationInfo()
 
     @model_validator(mode='after')
     def validate_annotation_info(self):
-        if self.status == URLStatus.NOT_RELEVANT:
+        if self.status == URLCreationEnum.NOT_RELEVANT:
             self.annotation_info.final_review_approved = False
             return self
-        if self.status != URLStatus.VALIDATED:
+        if self.status not in (
+                URLCreationEnum.SUBMITTED,
+                URLCreationEnum.VALIDATED
+        ):
             return self
 
         # Assume is validated
diff --git a/tests/helpers/counter.py b/tests/helpers/counter.py
new file mode 100644
index 00000000..8d9de1a0
--- /dev/null
+++ b/tests/helpers/counter.py
@@ -0,0 +1,7 @@
+
+from itertools import count
+
+COUNTER = count(1)
+
+def next_int() -> int:
+    return next(COUNTER)
\ No newline at end of file
diff --git a/tests/helpers/data_creator/commands/impl/annotate.py b/tests/helpers/data_creator/commands/impl/annotate.py
index 5f341326..1f549615 100644
--- a/tests/helpers/data_creator/commands/impl/annotate.py
+++ b/tests/helpers/data_creator/commands/impl/annotate.py
@@ -7,7 +7,7 @@
 from src.core.enums import SuggestionType
 from tests.helpers.batch_creation_parameters.annotation_info import AnnotationInfo
 from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase
-from tests.helpers.data_creator.commands.impl.suggestion.auto.agency import AgencyAutoSuggestionsCommand
+from tests.helpers.data_creator.commands.impl.suggestion.auto.agency_.core import AgencyAutoSuggestionsCommand
 from tests.helpers.data_creator.commands.impl.suggestion.auto.record_type import AutoRecordTypeSuggestionCommand
 from tests.helpers.data_creator.commands.impl.suggestion.auto.relevant import AutoRelevantSuggestionCommand
 from tests.helpers.data_creator.commands.impl.suggestion.user.agency import AgencyUserSuggestionsCommand
diff --git a/tests/helpers/data_creator/commands/impl/batch.py b/tests/helpers/data_creator/commands/impl/batch.py
index 69583a45..6871661d 100644
--- a/tests/helpers/data_creator/commands/impl/batch.py
+++ b/tests/helpers/data_creator/commands/impl/batch.py
@@ -3,7 +3,7 @@
 
 from src.collectors.enums import CollectorType
 from src.core.enums import BatchStatus
-from src.db.models.impl.batch.pydantic import BatchInfo
+from src.db.models.impl.batch.pydantic.info import BatchInfo
 from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase
 
 
diff --git a/tests/helpers/data_creator/commands/impl/suggestion/auto/agency.py b/tests/helpers/data_creator/commands/impl/suggestion/auto/agency.py
deleted file mode 100644
index 96743df8..00000000
--- a/tests/helpers/data_creator/commands/impl/suggestion/auto/agency.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from typing import final
-
-from typing_extensions import override
-
-from src.core.enums import SuggestionType
-from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
-from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase
-from tests.helpers.data_creator.commands.impl.agency import AgencyCommand
-
-@final
-class AgencyAutoSuggestionsCommand(DBDataCreatorCommandBase):
-
-    def __init__(
-        self,
-        url_id: int,
-        count: int,
-        suggestion_type: SuggestionType = SuggestionType.AUTO_SUGGESTION
-    ):
-        super().__init__()
-        if suggestion_type == SuggestionType.UNKNOWN:
-            count = 1  # Can only be one auto suggestion if unknown
-        self.url_id = url_id
-        self.count = count
-        self.suggestion_type = suggestion_type
-
-    @override
-    async def run(self) -> None:
-        suggestions = []
-        for _ in range(self.count):
-            if self.suggestion_type == SuggestionType.UNKNOWN:
-                pdap_agency_id = None
-            else:
-                pdap_agency_id = await self.run_command(AgencyCommand())
-            suggestion = URLAgencySuggestionInfo(
-                url_id=self.url_id,
-                suggestion_type=self.suggestion_type,
-                pdap_agency_id=pdap_agency_id,
-                state="Test State",
-                county="Test County",
-                locality="Test Locality"
-            )
-            suggestions.append(suggestion)
-
-        await self.adb_client.add_agency_auto_suggestions(
-            suggestions=suggestions
-        )
\ No newline at end of file
diff --git a/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/__init__.py b/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py b/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py
new file mode 100644
index 00000000..fe54c6f9
--- /dev/null
+++ b/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py
@@ -0,0 +1,78 @@
+from typing import final
+
+from typing_extensions import override
+
+from src.core.enums import SuggestionType
+from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
+from src.db.enums import TaskType
+from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType
+from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic
+from src.db.models.impl.url.suggestion.agency.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic
+from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase
+from tests.helpers.data_creator.commands.impl.agency import AgencyCommand
+
+@final
+class AgencyAutoSuggestionsCommand(DBDataCreatorCommandBase):
+
+    def __init__(
+        self,
+        url_id: int,
+        count: int,
+        suggestion_type: SuggestionType = SuggestionType.AUTO_SUGGESTION,
+        subtask_type: AutoAgencyIDSubtaskType = AutoAgencyIDSubtaskType.HOMEPAGE_MATCH,
+        confidence: int = 50
+    ):
+        super().__init__()
+        if suggestion_type == SuggestionType.UNKNOWN:
+            count = 1  # Can only be one auto suggestion if unknown
+            agencies_found = False
+        else:
+            agencies_found = True
+        self.url_id = url_id
+        self.count = count
+        self.suggestion_type = suggestion_type
+        self.subtask_type = subtask_type
+        self.confidence = confidence
+        self.agencies_found = agencies_found
+
+    @override
+    async def run(self) -> None:
+        task_id: int = await self.add_task()
+        subtask_id: int = await self.create_subtask(task_id)
+        if not self.agencies_found:
+            return
+
+        suggestions: list[AgencyIDSubtaskSuggestionPydantic] = []
+        for _ in range(self.count):
+            pdap_agency_id: int = await self.run_command(AgencyCommand())
+
+            suggestion = AgencyIDSubtaskSuggestionPydantic(
+                subtask_id=subtask_id,
+                agency_id=pdap_agency_id,
+                confidence=self.confidence,
+            )
+            suggestions.append(suggestion)
+
+        await self.adb_client.bulk_insert(
+            models=suggestions,
+        )
+
+    async def add_task(self) -> int:
+        task_id: int = await self.adb_client.initiate_task(
+            task_type=TaskType.AGENCY_IDENTIFICATION,
+        )
+        return task_id
+
+    async def create_subtask(self, task_id: int) -> int:
+        obj: URLAutoAgencyIDSubtaskPydantic = URLAutoAgencyIDSubtaskPydantic(
+            task_id=task_id,
+            type=self.subtask_type,
+            url_id=self.url_id,
+            agencies_found=self.agencies_found,
+        )
+        subtask_id: int = (await self.adb_client.bulk_insert(
+            models=[obj],
+            return_ids=True
+        ))[0]
+        return subtask_id
+
diff --git a/tests/helpers/data_creator/commands/impl/urls_/__init__.py b/tests/helpers/data_creator/commands/impl/urls_/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/helpers/data_creator/commands/impl/urls_/convert.py b/tests/helpers/data_creator/commands/impl/urls_/convert.py
new file mode 100644
index 00000000..d76edfe5
--- /dev/null
+++ b/tests/helpers/data_creator/commands/impl/urls_/convert.py
@@ -0,0 +1,36 @@
+from src.collectors.enums import URLStatus
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
+
+
+def convert_url_creation_enum_to_url_status(url_creation_enum: URLCreationEnum) -> URLStatus:
+    match url_creation_enum:
+        case URLCreationEnum.OK:
+            return URLStatus.OK
+        case URLCreationEnum.SUBMITTED:
+            return URLStatus.OK
+        case URLCreationEnum.VALIDATED:
+            return URLStatus.OK
+        case URLCreationEnum.NOT_RELEVANT:
+            return URLStatus.OK
+        case URLCreationEnum.ERROR:
+            return URLStatus.ERROR
+        case URLCreationEnum.DUPLICATE:
+            return URLStatus.DUPLICATE
+        case URLCreationEnum.NOT_FOUND:
+            return URLStatus.NOT_FOUND
+        case _:
+            raise ValueError(f"Unknown URLCreationEnum: {url_creation_enum}")
+
+def convert_url_creation_enum_to_validated_type(
+    url_creation_enum: URLCreationEnum
+) -> URLValidatedType:
+    match url_creation_enum:
+        case URLCreationEnum.SUBMITTED:
+            return URLValidatedType.DATA_SOURCE
+        case URLCreationEnum.VALIDATED:
+            return URLValidatedType.DATA_SOURCE
+        case URLCreationEnum.NOT_RELEVANT:
+            return URLValidatedType.NOT_RELEVANT
+        case _:
+            raise ValueError(f"Unknown URLCreationEnum: {url_creation_enum}")
\ No newline at end of file
diff --git a/tests/helpers/data_creator/commands/impl/urls.py b/tests/helpers/data_creator/commands/impl/urls_/query.py
similarity index 79%
rename from tests/helpers/data_creator/commands/impl/urls.py
rename to tests/helpers/data_creator/commands/impl/urls_/query.py
index ee9ef954..7587abfb 100644
--- a/tests/helpers/data_creator/commands/impl/urls.py
+++ b/tests/helpers/data_creator/commands/impl/urls_/query.py
@@ -1,11 +1,12 @@
 from datetime import datetime
 
-from src.collectors.enums import URLStatus
 from src.core.tasks.url.operators.submit_approved.tdo import SubmittedURLInfo
 from src.db.dtos.url.insert import InsertURLsInfo
 from src.db.models.impl.url.core.enums import URLSource
 from src.db.models.impl.url.core.pydantic.info import URLInfo
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase
+from tests.helpers.data_creator.commands.impl.urls_.convert import convert_url_creation_enum_to_url_status
 from tests.helpers.simple_test_data_functions import generate_test_urls
 
 
@@ -16,7 +17,7 @@ def __init__(
         batch_id: int | None,
         url_count: int,
         collector_metadata: dict | None = None,
-        status: URLStatus = URLStatus.PENDING,
+        status: URLCreationEnum = URLCreationEnum.OK,
         created_at: datetime | None = None
     ):
         super().__init__()
@@ -36,8 +37,11 @@ def run_sync(self) -> InsertURLsInfo:
             url_infos.append(
                 URLInfo(
                     url=url,
-                    status=self.status,
-                    name="Test Name" if self.status == URLStatus.VALIDATED else None,
+                    status=convert_url_creation_enum_to_url_status(self.status),
+                    name="Test Name" if self.status in (
+                        URLCreationEnum.VALIDATED,
+                        URLCreationEnum.SUBMITTED,
+                    ) else None,
                     collector_metadata=self.collector_metadata,
                     created_at=self.created_at,
                     source=URLSource.COLLECTOR
@@ -50,7 +54,7 @@ def run_sync(self) -> InsertURLsInfo:
         )
 
         # If outcome is submitted, also add entry to DataSourceURL
-        if self.status == URLStatus.SUBMITTED:
+        if self.status == URLCreationEnum.SUBMITTED:
             submitted_url_infos = []
             for url_id in url_insert_info.url_ids:
                 submitted_url_info = SubmittedURLInfo(
diff --git a/tests/helpers/data_creator/commands/impl/urls_v2/core.py b/tests/helpers/data_creator/commands/impl/urls_v2/core.py
index c80dc447..f7042720 100644
--- a/tests/helpers/data_creator/commands/impl/urls_v2/core.py
+++ b/tests/helpers/data_creator/commands/impl/urls_v2/core.py
@@ -1,14 +1,16 @@
 from datetime import datetime
 
-from src.collectors.enums import URLStatus
 from src.db.dtos.url.insert import InsertURLsInfo
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters
 from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase
 from tests.helpers.data_creator.commands.impl.annotate import AnnotateCommand
 from tests.helpers.data_creator.commands.impl.html_data import HTMLDataCreatorCommand
-from tests.helpers.data_creator.commands.impl.urls import URLsDBDataCreatorCommand
+from tests.helpers.data_creator.commands.impl.urls_.convert import convert_url_creation_enum_to_validated_type
+from tests.helpers.data_creator.commands.impl.urls_.query import URLsDBDataCreatorCommand
 from tests.helpers.data_creator.commands.impl.urls_v2.response import URLsV2Response
-from tests.helpers.data_creator.models.creation_info.batch.v2 import BatchURLCreationInfoV2
+from tests.helpers.data_creator.generate import generate_validated_flags
 from tests.helpers.data_creator.models.creation_info.url import URLCreationInfo
 
 
@@ -26,7 +28,7 @@ def __init__(
         self.created_at = created_at
 
     async def run(self) -> URLsV2Response:
-        urls_by_status: dict[URLStatus, URLCreationInfo] = {}
+        urls_by_status: dict[URLCreationEnum, URLCreationInfo] = {}
         urls_by_order: list[URLCreationInfo] = []
         # Create urls
         for url_parameters in self.parameters:
diff --git a/tests/helpers/data_creator/commands/impl/urls_v2/response.py b/tests/helpers/data_creator/commands/impl/urls_v2/response.py
index db19328e..74aa8e20 100644
--- a/tests/helpers/data_creator/commands/impl/urls_v2/response.py
+++ b/tests/helpers/data_creator/commands/impl/urls_v2/response.py
@@ -1,9 +1,10 @@
 from pydantic import BaseModel
 
 from src.collectors.enums import URLStatus
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 from tests.helpers.data_creator.models.creation_info.url import URLCreationInfo
 
 
 class URLsV2Response(BaseModel):
-    urls_by_status: dict[URLStatus, URLCreationInfo] = {}
+    urls_by_status: dict[URLCreationEnum, URLCreationInfo] = {}
     urls_by_order: list[URLCreationInfo] = []
\ No newline at end of file
diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py
index 096bad32..57ee3576 100644
--- a/tests/helpers/data_creator/core.py
+++ b/tests/helpers/data_creator/core.py
@@ -5,8 +5,15 @@
 from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo
 from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo
 from src.db.client.async_ import AsyncDatabaseClient
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.agency.sqlalchemy import Agency
 from src.db.models.impl.duplicate.pydantic.insert import DuplicateInsertInfo
 from src.db.dtos.url.insert import InsertURLsInfo
+from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency
+from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL
+from src.db.models.impl.url.core.enums import URLSource
 from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo
 from src.db.client.sync import DatabaseClient
 from src.db.enums import TaskType
@@ -14,26 +21,31 @@
 from src.core.tasks.url.operators.misc_metadata.tdo import URLMiscellaneousMetadataTDO
 from src.core.enums import BatchStatus, SuggestionType, RecordType, SuggestedStatus
 from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters
+from tests.helpers.counter import next_int
 from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase
 from tests.helpers.data_creator.commands.impl.agency import AgencyCommand
 from tests.helpers.data_creator.commands.impl.batch import DBDataCreatorBatchCommand
 from tests.helpers.data_creator.commands.impl.batch_v2 import BatchV2Command
 from tests.helpers.data_creator.commands.impl.html_data import HTMLDataCreatorCommand
 from tests.helpers.data_creator.commands.impl.suggestion.agency_confirmed import AgencyConfirmedSuggestionCommand
-from tests.helpers.data_creator.commands.impl.suggestion.auto.agency import AgencyAutoSuggestionsCommand
+from tests.helpers.data_creator.commands.impl.suggestion.auto.agency_.core import AgencyAutoSuggestionsCommand
 from tests.helpers.data_creator.commands.impl.suggestion.auto.record_type import AutoRecordTypeSuggestionCommand
 from tests.helpers.data_creator.commands.impl.suggestion.auto.relevant import AutoRelevantSuggestionCommand
 from tests.helpers.data_creator.commands.impl.suggestion.user.agency import AgencyUserSuggestionsCommand
 from tests.helpers.data_creator.commands.impl.suggestion.user.record_type import UserRecordTypeSuggestionCommand
 from tests.helpers.data_creator.commands.impl.suggestion.user.relevant import UserRelevantSuggestionCommand
 from tests.helpers.data_creator.commands.impl.url_metadata import URLMetadataCommand
-from tests.helpers.data_creator.commands.impl.urls import URLsDBDataCreatorCommand
+from tests.helpers.data_creator.commands.impl.urls_.query import URLsDBDataCreatorCommand
 from tests.helpers.data_creator.commands.impl.urls_v2.core import URLsV2Command
 from tests.helpers.data_creator.commands.impl.urls_v2.response import URLsV2Response
+from tests.helpers.data_creator.create import create_urls, create_batch, create_batch_url_links, create_validated_flags, \
+    create_url_data_sources
 from tests.helpers.data_creator.models.clients import DBDataCreatorClientContainer
 from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo
 from tests.helpers.data_creator.models.creation_info.batch.v2 import BatchURLCreationInfoV2
+from tests.helpers.simple_test_data_functions import generate_test_name
 
 
 class DBDataCreator:
@@ -105,7 +117,7 @@ async def batch_and_urls(
             url_count: int = 3,
             with_html_content: bool = False,
             batch_status: BatchStatus = BatchStatus.READY_TO_LABEL,
-            url_status: URLStatus = URLStatus.PENDING
+            url_status: URLCreationEnum = URLCreationEnum.OK
     ) -> BatchURLCreationInfo:
         batch_id = self.batch(
             strategy=strategy,
@@ -194,23 +206,14 @@ async def auto_suggestions(
             raise ValueError(f"suggestion_type must be one of {allowed_suggestion_types}")
         if suggestion_type == SuggestionType.UNKNOWN and num_suggestions > 1:
             raise ValueError("num_suggestions must be 1 when suggestion_type is unknown")
-
+        
         for url_id in url_ids:
-            suggestions = []
-            for i in range(num_suggestions):
-                if suggestion_type == SuggestionType.UNKNOWN:
-                    agency_id = None
-                else:
-                    agency_id = await self.agency()
-                suggestion = URLAgencySuggestionInfo(
+            await self.run_command(
+                AgencyAutoSuggestionsCommand(
                     url_id=url_id,
-                    suggestion_type=suggestion_type,
-                    pdap_agency_id=agency_id
+                    count=num_suggestions,
+                    suggestion_type=suggestion_type
                 )
-                suggestions.append(suggestion)
-
-            await self.adb_client.add_agency_auto_suggestions(
-                suggestions=suggestions
             )
 
     async def confirmed_suggestions(self, url_ids: list[int]):
@@ -239,7 +242,7 @@ def urls(
             batch_id: int,
             url_count: int,
             collector_metadata: dict | None = None,
-            outcome: URLStatus = URLStatus.PENDING,
+            outcome: URLCreationEnum = URLCreationEnum.OK,
             created_at: datetime | None = None
     ) -> InsertURLsInfo:
         command = URLsDBDataCreatorCommand(
@@ -259,7 +262,7 @@ async def url_miscellaneous_metadata(
             record_formats: Optional[list[str]] = None,
             data_portal_type: Optional[str] = "Test Data Portal Type",
             supplying_entity: Optional[str] = "Test Supplying Entity"
-    ):
+    ) -> None:
         if record_formats is None:
             record_formats = ["Test Record Format", "Test Record Format 2"]
 
@@ -277,7 +280,11 @@ async def url_miscellaneous_metadata(
         await self.adb_client.add_miscellaneous_metadata([tdo])
 
 
-    def duplicate_urls(self, duplicate_batch_id: int, url_ids: list[int]):
+    def duplicate_urls(
+        self,
+        duplicate_batch_id: int,
+        url_ids: list[int]
+    ) -> None:
         """
         Create duplicates for all given url ids, and associate them
         with the given batch
@@ -302,7 +309,7 @@ async def error_info(
             self,
             url_ids: list[int],
             task_id: Optional[int] = None
-    ):
+    ) -> None:
         if task_id is None:
             task_id = await self.task()
         error_infos = []
@@ -368,3 +375,173 @@ async def url_metadata(
                 status_code=status_code
             )
         )
+
+    async def create_validated_urls(
+        self,
+        record_type: RecordType = RecordType.RESOURCES,
+        validation_type: URLValidatedType = URLValidatedType.DATA_SOURCE,
+        count: int = 1
+    ) -> list[URLMapping]:
+        url_mappings: list[URLMapping] = await self.create_urls(
+            record_type=record_type,
+            count=count
+        )
+        url_ids: list[int] = [url_mapping.url_id for url_mapping in url_mappings]
+        await self.create_validated_flags(
+            url_ids=url_ids,
+            validation_type=validation_type
+        )
+        return url_mappings
+
+    async def create_submitted_urls(
+        self,
+        record_type: RecordType = RecordType.RESOURCES,
+        count: int = 1
+    ) -> list[URLMapping]:
+        url_mappings: list[URLMapping] = await self.create_urls(
+            record_type=record_type,
+            count=count
+        )
+        url_ids: list[int] = [url_mapping.url_id for url_mapping in url_mappings]
+        await self.create_validated_flags(
+            url_ids=url_ids,
+            validation_type=URLValidatedType.DATA_SOURCE
+        )
+        await self.create_url_data_sources(url_ids=url_ids)
+        return url_mappings
+
+
+    async def create_urls(
+        self,
+        status: URLStatus = URLStatus.OK,
+        source: URLSource = URLSource.COLLECTOR,
+        record_type: RecordType | None = RecordType.RESOURCES,
+        collector_metadata: dict | None = None,
+        count: int = 1,
+        batch_id: int | None = None
+    ) -> list[URLMapping]:
+
+        url_mappings: list[URLMapping] = await create_urls(
+            adb_client=self.adb_client,
+            status=status,
+            source=source,
+            record_type=record_type,
+            collector_metadata=collector_metadata,
+            count=count
+        )
+        url_ids: list[int] = [url_mapping.url_id for url_mapping in url_mappings]
+        if batch_id is not None:
+            await self.create_batch_url_links(
+                url_ids=url_ids,
+                batch_id=batch_id
+            )
+        return url_mappings
+
+    async def create_batch(
+        self,
+        status: BatchStatus = BatchStatus.READY_TO_LABEL,
+        strategy: CollectorType = CollectorType.EXAMPLE,
+        date_generated: datetime = datetime.now(),
+    ) -> int:
+        return await create_batch(
+            adb_client=self.adb_client,
+            status=status,
+            strategy=strategy,
+            date_generated=date_generated
+        )
+
+    async def create_batch_url_links(
+        self,
+        url_ids: list[int],
+        batch_id: int,
+    ) -> None:
+        await create_batch_url_links(
+            adb_client=self.adb_client,
+            url_ids=url_ids,
+            batch_id=batch_id
+        )
+
+    async def create_validated_flags(
+        self,
+        url_ids: list[int],
+        validation_type: URLValidatedType,
+    ) -> None:
+        await create_validated_flags(
+            adb_client=self.adb_client,
+            url_ids=url_ids,
+            validation_type=validation_type
+        )
+
+    async def create_url_data_sources(
+        self,
+        url_ids: list[int],
+    ) -> None:
+        await create_url_data_sources(
+            adb_client=self.adb_client,
+            url_ids=url_ids
+        )
+
+    async def create_url_agency_links(
+        self,
+        url_ids: list[int],
+        agency_ids: list[int],
+    ) -> None:
+        links: list[LinkURLAgency] = []
+        for url_id in url_ids:
+            for agency_id in agency_ids:
+                link = LinkURLAgency(
+                    url_id=url_id,
+                    agency_id=agency_id,
+                )
+                links.append(link)
+        await self.adb_client.add_all(links)
+
+    async def create_agency(self, agency_id: int = 1) -> None:
+        agency = Agency(
+            agency_id=agency_id,
+            name=generate_test_name(agency_id),
+            state=None,
+            county=None,
+            locality=None
+        )
+        await self.adb_client.add_all([agency])
+
+    async def create_agencies(self, count: int = 3) -> list[int]:
+        agencies: list[Agency] = []
+        agency_ids: list[int] = []
+        for _ in range(count):
+            agency_id = next_int()
+            agency = Agency(
+                agency_id=agency_id,
+                name=generate_test_name(agency_id),
+                state=None,
+                county=None,
+                locality=None
+            )
+            agencies.append(agency)
+            agency_ids.append(agency_id)
+        await self.adb_client.add_all(agencies)
+        return agency_ids
+
+    async def flag_as_root(self, url_ids: list[int]) -> None:
+        flag_root_urls: list[FlagRootURL] = [
+            FlagRootURL(url_id=url_id) for url_id in url_ids
+        ]
+        await self.adb_client.add_all(flag_root_urls)
+
+    async def link_urls_to_root(self, url_ids: list[int], root_url_id: int) -> None:
+        links: list[LinkURLRootURL] = [
+            LinkURLRootURL(url_id=url_id, root_url_id=root_url_id) for url_id in url_ids
+        ]
+        await self.adb_client.add_all(links)
+
+    async def link_urls_to_agencies(self, url_ids: list[int], agency_ids: list[int]) -> None:
+        assert len(url_ids) == len(agency_ids)
+        links: list[LinkURLAgency] = []
+        for url_id, agency_id in zip(url_ids, agency_ids):
+            link = LinkURLAgency(
+                url_id=url_id,
+                agency_id=agency_id
+            )
+            links.append(link)
+        await self.adb_client.add_all(links)
\ No newline at end of file
diff --git a/tests/helpers/data_creator/create.py b/tests/helpers/data_creator/create.py
new file mode 100644
index 00000000..83b2e3f5
--- /dev/null
+++ b/tests/helpers/data_creator/create.py
@@ -0,0 +1,75 @@
+from datetime import datetime
+
+from src.collectors.enums import CollectorType, URLStatus
+from src.core.enums import BatchStatus, RecordType
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.dtos.url.mapping import URLMapping
+from src.db.models.impl.batch.pydantic.insert import BatchInsertModel
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic
+from src.db.models.impl.link.batch_url.pydantic import LinkBatchURLPydantic
+from src.db.models.impl.url.core.enums import URLSource
+from src.db.models.impl.url.core.pydantic.insert import URLInsertModel
+from src.db.models.impl.url.data_source.pydantic import URLDataSourcePydantic
+from tests.helpers.data_creator.generate import generate_batch, generate_urls, generate_validated_flags, \
+    generate_url_data_sources, generate_batch_url_links
+
+
+async def create_batch(
+    adb_client: AsyncDatabaseClient,
+    status: BatchStatus = BatchStatus.READY_TO_LABEL,
+    strategy: CollectorType = CollectorType.EXAMPLE,
+    date_generated: datetime = datetime.now(),
+) -> int:
+    batch: BatchInsertModel = generate_batch(status=status, strategy=strategy, date_generated=date_generated)
+    return (await adb_client.bulk_insert([batch], return_ids=True))[0]
+
+async def create_urls(
+    adb_client: AsyncDatabaseClient,
+    status: URLStatus = URLStatus.OK,
+    source: URLSource = URLSource.COLLECTOR,
+    record_type: RecordType | None = RecordType.RESOURCES,
+    collector_metadata: dict | None = None,
+    count: int = 1
+) -> list[URLMapping]:
+    urls: list[URLInsertModel] = generate_urls(
+        status=status,
+        source=source,
+        record_type=record_type,
+        collector_metadata=collector_metadata,
+        count=count,
+    )
+    url_ids = await adb_client.bulk_insert(urls, return_ids=True)
+    return [URLMapping(url_id=url_id, url=url.url) for url_id, url in zip(url_ids, urls)]
+
+async def create_validated_flags(
+    adb_client: AsyncDatabaseClient,
+    url_ids: list[int],
+    validation_type: URLValidatedType,
+) -> None:
+    validated_flags: list[FlagURLValidatedPydantic] = generate_validated_flags(
+        url_ids=url_ids,
+        validation_type=validation_type,
+    )
+    await adb_client.bulk_insert(validated_flags)
+
+async def create_url_data_sources(
+    adb_client: AsyncDatabaseClient,
+    url_ids: list[int],
+) -> None:
+    url_data_sources: list[URLDataSourcePydantic] = generate_url_data_sources(
+        url_ids=url_ids,
+    )
+    await adb_client.bulk_insert(url_data_sources)
+
+async def create_batch_url_links(
+    adb_client: AsyncDatabaseClient,
+    url_ids: list[int],
+    batch_id: int,
+) -> None:
+    batch_url_links: list[LinkBatchURLPydantic] = generate_batch_url_links(
+        url_ids=url_ids,
+        batch_id=batch_id,
+    )
+    await adb_client.bulk_insert(batch_url_links)
+
diff --git a/tests/helpers/data_creator/generate.py b/tests/helpers/data_creator/generate.py
new file mode 100644
index 00000000..5dabc016
--- /dev/null
+++ b/tests/helpers/data_creator/generate.py
@@ -0,0 +1,82 @@
+from datetime import datetime
+
+from src.collectors.enums import URLStatus, CollectorType
+from src.core.enums import BatchStatus, RecordType
+from src.db.models.impl.batch.pydantic.insert import BatchInsertModel
+from src.db.models.impl.flag.url_validated.enums import URLValidatedType
+from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic
+from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
+from src.db.models.impl.link.batch_url.pydantic import LinkBatchURLPydantic
+from src.db.models.impl.url.core.enums import URLSource
+from src.db.models.impl.url.core.pydantic.insert import URLInsertModel
+from src.db.models.impl.url.data_source.pydantic import URLDataSourcePydantic
+from tests.helpers.counter import next_int
+
+
+def generate_batch(
+    status: BatchStatus,
+    strategy: CollectorType = CollectorType.EXAMPLE,
+    date_generated: datetime = datetime.now(),
+) -> BatchInsertModel:
+    return BatchInsertModel(
+        strategy=strategy.value,
+        status=status,
+        parameters={},
+        user_id=1,
+        date_generated=date_generated,
+    )
+
+def generate_batch_url_links(
+    url_ids: list[int],
+    batch_id: int
+) -> list[LinkBatchURLPydantic]:
+    return [
+        LinkBatchURLPydantic(
+            url_id=url_id,
+            batch_id=batch_id,
+        )
+        for url_id in url_ids
+    ]
+
+def generate_urls(
+    status: URLStatus = URLStatus.OK,
+    source: URLSource = URLSource.COLLECTOR,
+    record_type: RecordType | None = RecordType.RESOURCES,
+    collector_metadata: dict | None = None,
+    count: int = 1
+) -> list[URLInsertModel]:
+    results: list[URLInsertModel] = []
+    for i in range(count):
+        val: int = next_int()
+        results.append(URLInsertModel(
+            url=f"http://example.com/{val}",
+            status=status,
+            source=source,
+            name=f"Example {val}",
+            collector_metadata=collector_metadata,
+            record_type=record_type,
+        ))
+    return results
+
+def generate_validated_flags(
+    url_ids: list[int],
+    validation_type: URLValidatedType,
+) -> list[FlagURLValidatedPydantic]:
+    return [
+        FlagURLValidatedPydantic(
+            url_id=url_id,
+            type=validation_type,
+        )
+        for url_id in url_ids
+    ]
+
+def generate_url_data_sources(
+    url_ids: list[int],
+) -> list[URLDataSourcePydantic]:
+    return [
+        URLDataSourcePydantic(
+            url_id=url_id,
+            data_source_id=url_id,
+        )
+        for url_id in url_ids
+    ]
\ No newline at end of file
diff --git a/tests/helpers/data_creator/insert.py b/tests/helpers/data_creator/insert.py
new file mode 100644
index 00000000..06b207e3
--- /dev/null
+++ b/tests/helpers/data_creator/insert.py
@@ -0,0 +1,10 @@
+from src.db.client.async_ import AsyncDatabaseClient
+from src.db.templates.markers.bulk.insert import BulkInsertableModel
+
+
+async def bulk_insert_all(
+    adb_client: AsyncDatabaseClient,
+    lists_of_models: list[list[BulkInsertableModel]],
+):
+    for list_of_models in lists_of_models:
+        await adb_client.bulk_insert(list_of_models)
\ No newline at end of file
diff --git a/tests/helpers/data_creator/models/creation_info/batch/v2.py b/tests/helpers/data_creator/models/creation_info/batch/v2.py
index 3e6ed74a..52d7e37d 100644
--- a/tests/helpers/data_creator/models/creation_info/batch/v2.py
+++ b/tests/helpers/data_creator/models/creation_info/batch/v2.py
@@ -1,12 +1,12 @@
 from pydantic import BaseModel
 
-from src.collectors.enums import URLStatus
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 from tests.helpers.data_creator.models.creation_info.url import URLCreationInfo
 
 
 class BatchURLCreationInfoV2(BaseModel):
     batch_id: int
-    urls_by_status: dict[URLStatus, URLCreationInfo] = {}
+    urls_by_status: dict[URLCreationEnum, URLCreationInfo] = {}
 
     @property
     def url_ids(self) -> list[int]:
diff --git a/tests/helpers/data_creator/models/creation_info/url.py b/tests/helpers/data_creator/models/creation_info/url.py
index 082769e7..16c45a0a 100644
--- a/tests/helpers/data_creator/models/creation_info/url.py
+++ b/tests/helpers/data_creator/models/creation_info/url.py
@@ -5,11 +5,12 @@
 from src.collectors.enums import URLStatus
 from src.db.dtos.url.mapping import URLMapping
 from tests.helpers.batch_creation_parameters.annotation_info import AnnotationInfo
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 
 
 class URLCreationInfo(BaseModel):
     url_mappings: list[URLMapping]
-    outcome: URLStatus
+    outcome: URLCreationEnum
     annotation_info: Optional[AnnotationInfo] = None
 
     @property
diff --git a/tests/helpers/setup/annotation/core.py b/tests/helpers/setup/annotation/core.py
index ff5105cd..70123cb9 100644
--- a/tests/helpers/setup/annotation/core.py
+++ b/tests/helpers/setup/annotation/core.py
@@ -1,4 +1,5 @@
 from src.collectors.enums import URLStatus
+from tests.helpers.batch_creation_parameters.enums import URLCreationEnum
 from tests.helpers.data_creator.core import DBDataCreator
 from tests.helpers.setup.annotation.model import AnnotationSetupInfo
 
@@ -6,7 +7,7 @@
 async def setup_for_get_next_url_for_annotation(
         db_data_creator: DBDataCreator,
         url_count: int,
-        outcome: URLStatus = URLStatus.PENDING
+        outcome: URLCreationEnum = URLCreationEnum.OK
 ) -> AnnotationSetupInfo:
     batch_id = db_data_creator.batch()
     insert_urls_info = db_data_creator.urls(
diff --git a/tests/helpers/setup/final_review/core.py b/tests/helpers/setup/final_review/core.py
index 6c4a3498..58b1ae49 100644
--- a/tests/helpers/setup/final_review/core.py
+++ b/tests/helpers/setup/final_review/core.py
@@ -37,7 +37,7 @@ async def add_agency_suggestion() -> int:
         )
         return agency_id
 
-    async def add_record_type_suggestion(record_type: RecordType):
+    async def add_record_type_suggestion(record_type: RecordType) -> None:
         await db_data_creator.user_record_type_suggestion(
             url_id=url_mapping.url_id,
             record_type=record_type
diff --git a/tests/helpers/setup/wipe.py b/tests/helpers/setup/wipe.py
index 630d0f71..e81c266d 100644
--- a/tests/helpers/setup/wipe.py
+++ b/tests/helpers/setup/wipe.py
@@ -8,5 +8,7 @@ def wipe_database(connection_string: str) -> None:
     engine = create_engine(connection_string)
     with engine.connect() as connection:
         for table in reversed(Base.metadata.sorted_tables):
+            if table.info == "view":
+                continue
             connection.execute(table.delete())
         connection.commit()
diff --git a/tests/helpers/simple_test_data_functions.py b/tests/helpers/simple_test_data_functions.py
index df455e0e..7c42fd8d 100644
--- a/tests/helpers/simple_test_data_functions.py
+++ b/tests/helpers/simple_test_data_functions.py
@@ -13,16 +13,15 @@ def generate_test_urls(count: int) -> list[str]:
 
     return results
 
-def generate_test_html() -> str:
-    return """
-    <!DOCTYPE html>
-    <html>
-      <head>
-        <title>Example HTML</title>
-      </head>
-      <body>
-        <h1>Example HTML</h1>
-        <p>This is an example of HTML content.</p>
-      </body>
-    </html>
-    """
\ No newline at end of file
+
+def generate_test_url(i: int) -> str:
+    return f"https://test.com/{i}"
+
+def generate_test_name(i: int) -> str:
+    return f"Test Name {i}"
+
+def generate_test_description(i: int) -> str:
+    return f"Test description {i}"
+
+def generate_test_html(i: int) -> str:
+    return f"<html><body><h1>Test {i}</h1></body></html>"
\ No newline at end of file
diff --git a/tests/manual/agency_identifier/test_nlp_processor.py b/tests/manual/agency_identifier/test_nlp_processor.py
new file mode 100644
index 00000000..c38a52b1
--- /dev/null
+++ b/tests/manual/agency_identifier/test_nlp_processor.py
@@ -0,0 +1,22 @@
+import pytest
+
+from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor.nlp.core import \
+    NLPProcessor
+
+SAMPLE_HTML: str = """
+<html>
+I live in Pittsburgh, Allegheny, Pennsylvania.
+</html>
+"""
+
+@pytest.mark.asyncio
+async def test_nlp_processor_happy_path():
+    nlp_processor = NLPProcessor()
+    response = nlp_processor.parse_for_locations(SAMPLE_HTML)
+    print(response)
+
+@pytest.mark.asyncio
+async def test_nlp_processor_empty_html():
+    nlp_processor = NLPProcessor()
+    response = nlp_processor.parse_for_locations("<html></html>")
+    print(response)
\ No newline at end of file
diff --git a/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py b/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py
index 584facdd..bc9b5dfa 100644
--- a/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py
+++ b/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py
@@ -2,7 +2,7 @@
 
 import dotenv
 
-from src.db.models.impl.batch.pydantic import BatchInfo
+from src.db.models.impl.batch.pydantic.info import BatchInfo
 from src.collectors import CollectorType
 from src.core.enums import BatchStatus
 from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion
diff --git a/tests/manual/core/lifecycle/test_ckan_lifecycle.py b/tests/manual/core/lifecycle/test_ckan_lifecycle.py
index 9a896392..66020a92 100644
--- a/tests/manual/core/lifecycle/test_ckan_lifecycle.py
+++ b/tests/manual/core/lifecycle/test_ckan_lifecycle.py
@@ -1,4 +1,4 @@
-from src.db.models.impl.batch.pydantic import BatchInfo
+from src.db.models.impl.batch.pydantic.info import BatchInfo
 from src.collectors import CollectorType
 from src.core.enums import BatchStatus
 from src.collectors.impl.ckan import group_search, package_search, organization_search
diff --git a/tests/manual/core/lifecycle/test_muckrock_lifecycles.py b/tests/manual/core/lifecycle/test_muckrock_lifecycles.py
index 417e7240..216638dc 100644
--- a/tests/manual/core/lifecycle/test_muckrock_lifecycles.py
+++ b/tests/manual/core/lifecycle/test_muckrock_lifecycles.py
@@ -1,4 +1,4 @@
-from src.db.models.impl.batch.pydantic import BatchInfo
+from src.db.models.impl.batch.pydantic.info import BatchInfo
 from src.collectors import CollectorType
 from src.core.enums import BatchStatus
 from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion
diff --git a/tests/manual/external/pdap/test_sc_agency_search_location.py b/tests/manual/external/pdap/test_sc_agency_search_location.py
new file mode 100644
index 00000000..9b0aac28
--- /dev/null
+++ b/tests/manual/external/pdap/test_sc_agency_search_location.py
@@ -0,0 +1,34 @@
+"""
+
+Location ID, Agency ID
+10464,9873, "Boonsboro, Washington, Maryland"
+15648,9878, "Smithsburg, Washington, Maryland"
+15656,9879, "Williamsport, Washington, Maryland"
+
+"""
+import pytest
+
+from src.external.pdap.client import PDAPClient
+from src.external.pdap.dtos.search_agency_by_location.params import SearchAgencyByLocationParams
+from src.external.pdap.dtos.search_agency_by_location.response import SearchAgencyByLocationResponse
+
+
+@pytest.mark.asyncio
+async def test_sc_agency_search_location(pdap_client_dev: PDAPClient):
+    params: list[SearchAgencyByLocationParams] = [
+        SearchAgencyByLocationParams(
+            request_id=1,
+            query="Boonsboro, Washington, Maryland"
+        ),
+        SearchAgencyByLocationParams(
+            request_id=0,
+            query="Smithsburg, Washington, Maryland"
+        ),
+        SearchAgencyByLocationParams(
+            request_id=-99,
+            query="Williamsport, Washington, Maryland"
+        )
+    ]
+    response: list[SearchAgencyByLocationResponse] = await pdap_client_dev.search_agency_by_location(params)
+    print(response)
+
diff --git a/uv.lock b/uv.lock
index c97b9828..3dffe619 100644
--- a/uv.lock
+++ b/uv.lock
@@ -151,6 +151,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d0/ae/9a053dd9229c0fde6b1f1f33f609ccff1ee79ddda364c756a924c6d8563b/APScheduler-3.11.0-py3-none-any.whl", hash = "sha256:fc134ca32e50f5eadcc4938e3a4545ab19131435e851abb40b34d63d5141c6da", size = 64004, upload_time = "2024-11-24T19:39:24.442Z" },
 ]
 
+[[package]]
+name = "asgiref"
+version = "3.9.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/90/61/0aa957eec22ff70b830b22ff91f825e70e1ef732c06666a805730f28b36b/asgiref-3.9.1.tar.gz", hash = "sha256:a5ab6582236218e5ef1648f242fd9f10626cfd4de8dc377db215d5d5098e3142", size = 36870, upload_time = "2025-07-08T09:07:43.344Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/3c/0464dcada90d5da0e71018c04a140ad6349558afb30b3051b4264cc5b965/asgiref-3.9.1-py3-none-any.whl", hash = "sha256:f3bba7092a48005b5f5bacd747d36ee4a5a61f4a269a6df590b43144355ebd2c", size = 23790, upload_time = "2025-07-08T09:07:41.548Z" },
+]
+
 [[package]]
 name = "asyncpg"
 version = "0.30.0"
@@ -205,6 +214,35 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285, upload_time = "2025-04-15T17:05:12.221Z" },
 ]
 
+[[package]]
+name = "blis"
+version = "1.2.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/61/aa/0743c994884de83472c854bb534c9edab8d711e1880d4fa194e6d876bb60/blis-1.2.1.tar.gz", hash = "sha256:1066beedbedc2143c22bd28742658de05694afebacde8d8c2d14dd4b5a96765a", size = 2510297, upload_time = "2025-04-01T12:01:56.849Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/67/57/ae6596b1e27859886e0b81fb99497bcfff139895585a9e2284681c8a8846/blis-1.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:778c4f72b71f97187e3304acfbd30eab98c9ba1a5b03b65128bc3875400ae604", size = 6976808, upload_time = "2025-04-01T12:01:21.175Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/35/6225e6ad2bccf23ac124448d59112c098d63a8917462e9f73967bc217168/blis-1.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5c5f2ffb0ae9c1f5aaa95b9681bcdd9a777d007c501fa220796329b939ca2790", size = 1281913, upload_time = "2025-04-01T12:01:23.202Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/84/c6a6d1c0a8a00799d2ec5db05d676bd9a9b0472cac4d3eff2e2fd1953521/blis-1.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db4dc5d2d57106bb411633603a5c7d178a0845267c3efc7e5ea4fa7a44772976", size = 3104139, upload_time = "2025-04-01T12:01:24.781Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/6c/c5fab7ed1fe6e8bdcda732017400d1adc53db5b6dd2c2a6046acab91f4fa/blis-1.2.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c621271c2843101927407e052b35a67f853da59d5c74e9e070e982c7f82e2e04", size = 3304143, upload_time = "2025-04-01T12:01:27.363Z" },
+    { url = "https://files.pythonhosted.org/packages/22/d1/85f03269886253758546fcfdbeddee7e717d843ea134596b60db9c2648c4/blis-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43f65f882250b817566d7543abd1f6da297f1662e5dd9936e14c04b88285a497", size = 11660080, upload_time = "2025-04-01T12:01:29.478Z" },
+    { url = "https://files.pythonhosted.org/packages/78/c8/c81ed3036e8ce0d6ce0d19a032c7f3d69247f221c5357e18548dea9380d3/blis-1.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:78a0613d559ccc426c101c67e8f84e1f93491e29d722c370872c538ee652bd07", size = 3133133, upload_time = "2025-04-01T12:01:31.537Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/42/7c296e04b979204777ecae2fe9287ac7b0255d8c4c2111d2a735c439b9d7/blis-1.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:2f5e32e5e5635fc7087b724b53120dbcd86201f56c0405882ce254bc0e493392", size = 4360695, upload_time = "2025-04-01T12:01:33.449Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/ab/aa5c8dfd0068d2cc976830797dd092779259860f964286db05739154e3a7/blis-1.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d339c97cc83f53e39c1013d0dcd7d5278c853dc102d931132eeb05b226e28429", size = 14828081, upload_time = "2025-04-01T12:01:35.129Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/c0/047fef3ac4a531903c52ba7c108fd608556627723bfef7554f040b10e556/blis-1.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:8d284323cc994e9b818c32046f1aa3e57bcc41c74e02daebdf0d3bc3e14355cb", size = 6232639, upload_time = "2025-04-01T12:01:37.268Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/f1/2aecd2447de0eb5deea3a13e471ab43e42e8561afe56a13d830f95c58909/blis-1.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:1cd35e94a1a97b37b31b11f097f998a3a0e75ac06d57e6edf7d9597200f55756", size = 6989811, upload_time = "2025-04-01T12:01:39.013Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/39/4c097508f6b9ef7df27dd5ada0a175e8169f58cbe33d40a303a844abdaea/blis-1.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7b6394d27f2259c580df8d13ebe9c0a188a6ace0a689e93d6e49cb15018d4d9c", size = 1282669, upload_time = "2025-04-01T12:01:41.418Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/8e/b8a5eafa9824fcc7f3339a283e910f7af110d749fd09f52e83f432124543/blis-1.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9c127159415dc772f345abc3575e1e2d02bb1ae7cb7f532267d67705be04c66", size = 3063750, upload_time = "2025-04-01T12:01:43.277Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/7a/f88e935f2cd3ad52ef363beeddf9a537d5038e519aa7b09dc18c762fbb66/blis-1.2.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5f9fa589aa72448009fd5001afb05e69f3bc953fe778b44580fd7d79ee8201a1", size = 3260903, upload_time = "2025-04-01T12:01:44.815Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/26/283f1392974e5c597228f8485f45f89de33f2c85becebc25e846d0485e44/blis-1.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1aa6150259caf4fa0b527bfc8c1e858542f9ca88a386aa90b93e1ca4c2add6df", size = 11616588, upload_time = "2025-04-01T12:01:46.356Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/86/57047b688e42c92e35d0581ef9db15ee3bdf14deff4d9a2481ce331f2dae/blis-1.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3ba67c09883cae52da3d9e9d3f4305464efedd336032c4d5c6c429b27b16f4c1", size = 3072892, upload_time = "2025-04-01T12:01:48.314Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/db/85b6f5fa2a2515470cc5a2cbeaedd25aa465fa572801f18d14c24c9e5102/blis-1.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7d9c5fca21b01c4b2f3cb95b71ce7ef95e58b3b62f0d79d1f699178c72c1e03e", size = 4310005, upload_time = "2025-04-01T12:01:49.815Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/ae/6e610e950476ebc9868a0207a827d67433ef65e2b14b837d317e60248e5a/blis-1.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6952a4a1f15e0d1f73cc1206bd71368b32551f2e94852dae288b50c4ea0daf31", size = 14790198, upload_time = "2025-04-01T12:01:52.601Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/0e/353e29e8dd3d31bba25a3eabbbfb798d82bd19ca2d24fd00583b6d3992f3/blis-1.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:bd0360427b1669684cd35a8355be126d7a33992ccac6dcb1fbef5e100f4e3026", size = 6260640, upload_time = "2025-04-01T12:01:54.849Z" },
+]
+
 [[package]]
 name = "boltons"
 version = "25.0.0"
@@ -289,6 +327,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload_time = "2025-02-20T21:01:16.647Z" },
 ]
 
+[[package]]
+name = "catalogue"
+version = "2.0.10"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/38/b4/244d58127e1cdf04cf2dc7d9566f0d24ef01d5ce21811bab088ecc62b5ea/catalogue-2.0.10.tar.gz", hash = "sha256:4f56daa940913d3f09d589c191c74e5a6d51762b3a9e37dd53b7437afd6cda15", size = 19561, upload_time = "2023-09-25T06:29:24.962Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9e/96/d32b941a501ab566a16358d68b6eb4e4acc373fab3c3c4d7d9e649f7b4bb/catalogue-2.0.10-py3-none-any.whl", hash = "sha256:58c2de0020aa90f4a2da7dfad161bf7b3b054c86a5f09fcedc0b2b740c109a9f", size = 17325, upload_time = "2023-09-25T06:29:23.337Z" },
+]
+
 [[package]]
 name = "certifi"
 version = "2025.4.26"
@@ -375,6 +422,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a2/58/1f37bf81e3c689cc74ffa42102fa8915b59085f54a6e4a80bc6265c0f6bf/click-8.2.0-py3-none-any.whl", hash = "sha256:6b303f0b2aa85f1cb4e5303078fadcbcd4e476f114fab9b5007005711839325c", size = 102156, upload_time = "2025-05-10T22:21:01.352Z" },
 ]
 
+[[package]]
+name = "cloudpathlib"
+version = "0.22.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/bc/d7345595a4467144b9e0b32e5eda9e4633ea6e4982262b0696935adb2229/cloudpathlib-0.22.0.tar.gz", hash = "sha256:6c0cb0ceab4f66a3a05a84055f9318fb8316cae5e096819f3f8e4be64feab6e9", size = 52304, upload_time = "2025-08-30T05:20:04.6Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f1/72/e8e53d8232e801e040f4b557ff3a453cecbb630d53ae107bd5e66a206bb9/cloudpathlib-0.22.0-py3-none-any.whl", hash = "sha256:2fdfaf5c4f85810ae8374d336d04dee371914d0e41a984695ae67308d7a5a009", size = 61520, upload_time = "2025-08-30T05:20:03.232Z" },
+]
+
 [[package]]
 name = "colorama"
 version = "0.4.6"
@@ -384,6 +440,48 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload_time = "2022-10-25T02:36:20.889Z" },
 ]
 
+[[package]]
+name = "confection"
+version = "0.1.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic" },
+    { name = "srsly" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/51/d3/57c6631159a1b48d273b40865c315cf51f89df7a9d1101094ef12e3a37c2/confection-0.1.5.tar.gz", hash = "sha256:8e72dd3ca6bd4f48913cd220f10b8275978e740411654b6e8ca6d7008c590f0e", size = 38924, upload_time = "2024-05-31T16:17:01.559Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/00/3106b1854b45bd0474ced037dfe6b73b90fe68a68968cef47c23de3d43d2/confection-0.1.5-py3-none-any.whl", hash = "sha256:e29d3c3f8eac06b3f77eb9dfb4bf2fc6bcc9622a98ca00a698e3d019c6430b14", size = 35451, upload_time = "2024-05-31T16:16:59.075Z" },
+]
+
+[[package]]
+name = "cymem"
+version = "2.0.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f2/4a/1acd761fb6ac4c560e823ce40536a62f886f2d59b2763b5c3fc7e9d92101/cymem-2.0.11.tar.gz", hash = "sha256:efe49a349d4a518be6b6c6b255d4a80f740a341544bde1a807707c058b88d0bd", size = 10346, upload_time = "2025-01-16T21:50:41.045Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/03/e3/d98e3976f4ffa99cddebc1ce379d4d62e3eb1da22285267f902c99cc3395/cymem-2.0.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3ee54039aad3ef65de82d66c40516bf54586287b46d32c91ea0530c34e8a2745", size = 42005, upload_time = "2025-01-16T21:49:34.977Z" },
+    { url = "https://files.pythonhosted.org/packages/41/b4/7546faf2ab63e59befc95972316d62276cec153f7d4d60e7b0d5e08f0602/cymem-2.0.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4c05ef75b5db217be820604e43a47ccbbafea98ab6659d07cea92fa3c864ea58", size = 41747, upload_time = "2025-01-16T21:49:36.108Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/4e/042f372e5b3eb7f5f3dd7677161771d301de2b6fa3f7c74e1cebcd502552/cymem-2.0.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8d5381e5793ce531bac0dbc00829c8381f18605bb67e4b61d34f8850463da40", size = 217647, upload_time = "2025-01-16T21:49:37.433Z" },
+    { url = "https://files.pythonhosted.org/packages/48/cb/2207679e4b92701f78cf141e1ab4f81f55247dbe154eb426b842a0a993de/cymem-2.0.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2b9d3f42d7249ac81802135cad51d707def058001a32f73fc7fbf3de7045ac7", size = 218857, upload_time = "2025-01-16T21:49:40.09Z" },
+    { url = "https://files.pythonhosted.org/packages/31/7a/76ae3b7a39ab2531029d281e43fcfcaad728c2341b150a81a3a1f5587cf3/cymem-2.0.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:39b78f2195d20b75c2d465732f6b8e8721c5d4eb012777c2cb89bdb45a043185", size = 206148, upload_time = "2025-01-16T21:49:41.383Z" },
+    { url = "https://files.pythonhosted.org/packages/25/f9/d0fc0191ac79f15638ddb59237aa76f234691374d7d7950e10f384bd8a25/cymem-2.0.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2203bd6525a80d8fd0c94654a263af21c0387ae1d5062cceaebb652bf9bad7bc", size = 207112, upload_time = "2025-01-16T21:49:43.986Z" },
+    { url = "https://files.pythonhosted.org/packages/56/c8/75f75889401b20f4c3a7c5965dda09df42913e904ddc2ffe7ef3bdf25061/cymem-2.0.11-cp311-cp311-win_amd64.whl", hash = "sha256:aa54af7314de400634448da1f935b61323da80a49484074688d344fb2036681b", size = 39360, upload_time = "2025-01-16T21:49:45.479Z" },
+    { url = "https://files.pythonhosted.org/packages/71/67/0d74f7e9d79f934368a78fb1d1466b94bebdbff14f8ae94dd3e4ea8738bb/cymem-2.0.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a0fbe19ce653cd688842d81e5819dc63f911a26e192ef30b0b89f0ab2b192ff2", size = 42621, upload_time = "2025-01-16T21:49:46.585Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d6/f7a19c63b48efc3f00a3ee8d69070ac90202e1e378f6cf81b8671f0cf762/cymem-2.0.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de72101dc0e6326f6a2f73e05a438d1f3c6110d41044236d0fbe62925091267d", size = 42249, upload_time = "2025-01-16T21:49:48.973Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/60/cdc434239813eef547fb99b6d0bafe31178501702df9b77c4108c9a216f6/cymem-2.0.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee4395917f6588b8ac1699499128842768b391fe8896e8626950b4da5f9a406", size = 224758, upload_time = "2025-01-16T21:49:51.382Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/68/8fa6efae17cd3b2ba9a2f83b824867c5b65b06f7aec3f8a0d0cabdeffb9b/cymem-2.0.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b02f2b17d760dc3fe5812737b1ce4f684641cdd751d67761d333a3b5ea97b83", size = 227995, upload_time = "2025-01-16T21:49:54.538Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/f3/ceda70bf6447880140602285b7c6fa171cb7c78b623d35345cc32505cd06/cymem-2.0.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:04ee6b4041ddec24512d6e969ed6445e57917f01e73b9dabbe17b7e6b27fef05", size = 215325, upload_time = "2025-01-16T21:49:57.229Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/47/6915eaa521e1ce7a0ba480eecb6870cb4f681bcd64ced88c2f0ed7a744b4/cymem-2.0.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e1048dae7e627ee25f22c87bb670b13e06bc0aecc114b89b959a798d487d1bf4", size = 216447, upload_time = "2025-01-16T21:50:00.432Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/be/8e02bdd31e557f642741a06c8e886782ef78f0b00daffd681922dc9bbc88/cymem-2.0.11-cp312-cp312-win_amd64.whl", hash = "sha256:0c269c7a867d74adeb9db65fa1d226342aacf44d64b7931282f0b0eb22eb6275", size = 39283, upload_time = "2025-01-16T21:50:03.384Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/90/b064e2677e27a35cf3605146abc3285d4f599cc1b6c18fc445ae876dd1e3/cymem-2.0.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4a311c82f743275c84f708df89ac5bf60ddefe4713d532000c887931e22941f", size = 42389, upload_time = "2025-01-16T21:50:05.925Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/60/7aa0561a6c1f0d42643b02c4fdeb2a16181b0ff4e85d73d2d80c6689e92a/cymem-2.0.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:02ed92bead896cca36abad00502b14fa651bdf5d8319461126a2d5ac8c9674c5", size = 41948, upload_time = "2025-01-16T21:50:08.375Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/4e/88a29cc5575374982e527b4ebcab3781bdc826ce693c6418a0f836544246/cymem-2.0.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44ddd3588379f8f376116384af99e3fb5f90091d90f520c341942618bf22f05e", size = 219382, upload_time = "2025-01-16T21:50:13.089Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/3a/8f96e167e93b7f7ec105ed7b25c77bbf215d15bcbf4a24082cdc12234cd6/cymem-2.0.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87ec985623624bbd298762d8163fc194a096cb13282731a017e09ff8a60bb8b1", size = 222974, upload_time = "2025-01-16T21:50:17.969Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/fc/ce016bb0c66a4776345fac7508fddec3b739b9dd4363094ac89cce048832/cymem-2.0.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3385a47285435848e0ed66cfd29b35f3ed8703218e2b17bd7a0c053822f26bf", size = 213426, upload_time = "2025-01-16T21:50:19.349Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/c8/accf7cc768f751447a5050b14a195af46798bc22767ac25f49b02861b1eb/cymem-2.0.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5461e65340d6572eb64deadce79242a446a1d39cb7bf70fe7b7e007eb0d799b0", size = 219195, upload_time = "2025-01-16T21:50:21.407Z" },
+    { url = "https://files.pythonhosted.org/packages/74/65/c162fbac63e867a055240b6600b92ef96c0eb7a1895312ac53c4be93d056/cymem-2.0.11-cp313-cp313-win_amd64.whl", hash = "sha256:25da111adf425c29af0cfd9fecfec1c71c8d82e2244a85166830a0817a66ada7", size = 39090, upload_time = "2025-01-16T21:50:24.239Z" },
+]
+
 [[package]]
 name = "data-source-identification"
 version = "0.1.0"
@@ -410,6 +508,7 @@ dependencies = [
     { name = "marshmallow" },
     { name = "openai" },
     { name = "pdap-access-manager" },
+    { name = "pip" },
     { name = "playwright" },
     { name = "psycopg", extra = ["binary"] },
     { name = "psycopg2-binary" },
@@ -417,6 +516,8 @@ dependencies = [
     { name = "pyjwt" },
     { name = "python-dotenv" },
     { name = "requests" },
+    { name = "side-effects" },
+    { name = "spacy" },
     { name = "sqlalchemy" },
     { name = "starlette" },
     { name = "tqdm" },
@@ -458,6 +559,7 @@ requires-dist = [
     { name = "marshmallow", specifier = "~=3.23.2" },
     { name = "openai", specifier = "~=1.60.1" },
     { name = "pdap-access-manager", specifier = "==0.3.6" },
+    { name = "pip", specifier = ">=25.2" },
     { name = "playwright", specifier = "~=1.49.1" },
     { name = "psycopg", extras = ["binary"], specifier = "~=3.1.20" },
     { name = "psycopg2-binary", specifier = "~=2.9.6" },
@@ -465,6 +567,8 @@ requires-dist = [
     { name = "pyjwt", specifier = "~=2.10.1" },
     { name = "python-dotenv", specifier = "~=1.0.1" },
     { name = "requests", specifier = "~=2.32.3" },
+    { name = "side-effects", specifier = ">=1.6.dev0" },
+    { name = "spacy", specifier = ">=3.8.7" },
     { name = "sqlalchemy", specifier = "~=2.0.36" },
     { name = "starlette", specifier = "~=0.45.3" },
     { name = "tqdm", specifier = ">=4.64.1" },
@@ -551,6 +655,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload_time = "2023-12-24T09:54:30.421Z" },
 ]
 
+[[package]]
+name = "django"
+version = "3.2.25"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "asgiref" },
+    { name = "pytz" },
+    { name = "sqlparse" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ec/68/0e744f07b57bfdf99abbb6b3eb14fcba188867021c05f4a104e04f6d56b8/Django-3.2.25.tar.gz", hash = "sha256:7ca38a78654aee72378594d63e51636c04b8e28574f5505dff630895b5472777", size = 9836336, upload_time = "2024-03-04T08:57:02.257Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/30/8e/cc23c762c5dcd1d367d73cf006a326e0df2bd0e785cba18b658b39904c1e/Django-3.2.25-py3-none-any.whl", hash = "sha256:a52ea7fcf280b16f7b739cec38fa6d3f8953a5456986944c3ca97e79882b4e38", size = 7890550, upload_time = "2024-03-04T08:56:47.529Z" },
+]
+
 [[package]]
 name = "dnspython"
 version = "2.7.0"
@@ -1044,6 +1162,30 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ee/47/3729f00f35a696e68da15d64eb9283c330e776f3b5789bac7f2c0c4df209/jiter-0.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6f7838bc467ab7e8ef9f387bd6de195c43bad82a569c1699cb822f6609dd4cdf", size = 206867, upload_time = "2025-03-10T21:36:25.843Z" },
 ]
 
+[[package]]
+name = "langcodes"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "language-data" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3a/7a/5a97e327063409a5caa21541e6d08ae4a0f2da328447e9f2c7b39e179226/langcodes-3.5.0.tar.gz", hash = "sha256:1eef8168d07e51e131a2497ffecad4b663f6208e7c3ae3b8dc15c51734a6f801", size = 191030, upload_time = "2024-11-19T10:23:45.546Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c3/6b/068c2ea7a712bf805c62445bd9e9c06d7340358ef2824150eceac027444b/langcodes-3.5.0-py3-none-any.whl", hash = "sha256:853c69d1a35e0e13da2f427bb68fb2fa4a8f4fb899e0c62ad8df8d073dcfed33", size = 182974, upload_time = "2024-11-19T10:23:42.824Z" },
+]
+
+[[package]]
+name = "language-data"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "marisa-trie" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/dd/ce/3f144716a9f2cbf42aa86ebc8b085a184be25c80aa453eea17c294d239c1/language_data-1.3.0.tar.gz", hash = "sha256:7600ef8aa39555145d06c89f0c324bf7dab834ea0b0a439d8243762e3ebad7ec", size = 5129310, upload_time = "2024-11-19T10:21:37.912Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/e9/5a5ffd9b286db82be70d677d0a91e4d58f7912bb8dd026ddeeb4abe70679/language_data-1.3.0-py3-none-any.whl", hash = "sha256:e2ee943551b5ae5f89cd0e801d1fc3835bb0ef5b7e9c3a4e8e17b2b214548fbf", size = 5385760, upload_time = "2024-11-19T10:21:36.005Z" },
+]
+
 [[package]]
 name = "lxml"
 version = "5.1.1"
@@ -1082,6 +1224,62 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/87/fb/99f81ac72ae23375f22b7afdb7642aba97c00a713c217124420147681a2f/mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59", size = 78509, upload_time = "2025-04-10T12:50:53.297Z" },
 ]
 
+[[package]]
+name = "marisa-trie"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c5/e3/c9066e74076b90f9701ccd23d6a0b8c1d583feefdec576dc3e1bb093c50d/marisa_trie-1.3.1.tar.gz", hash = "sha256:97107fd12f30e4f8fea97790343a2d2d9a79d93697fe14e1b6f6363c984ff85b", size = 212454, upload_time = "2025-08-26T15:13:18.401Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a7/bf/2f1fe6c9fcd2b509c6dfaaf26e35128947d6d3718d0b39510903c55b7bed/marisa_trie-1.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5ef045f694ef66079b4e00c4c9063a00183d6af7d1ff643de6ea5c3b0d9af01b", size = 174027, upload_time = "2025-08-26T15:12:01.434Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/5a/de7936d58ed0de847180cee2b95143d420223c5ade0c093d55113f628237/marisa_trie-1.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cbd28f95d5f30d9a7af6130869568e75bfd7ef2e0adfb1480f1f44480f5d3603", size = 158478, upload_time = "2025-08-26T15:12:02.429Z" },
+    { url = "https://files.pythonhosted.org/packages/48/cc/80611aadefcd0bcf8cd1795cb4643bb27213319a221ba04fe071da0b75cd/marisa_trie-1.3.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b173ec46d521308f7c97d96d6e05cf2088e0548f82544ec9a8656af65593304d", size = 1257535, upload_time = "2025-08-26T15:12:04.271Z" },
+    { url = "https://files.pythonhosted.org/packages/36/89/c4eeefb956318047036e6bdc572b6112b2059d595e85961267a90aa40458/marisa_trie-1.3.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:954fef9185f8a79441b4e433695116636bf66402945cfee404f8983bafa59788", size = 1275566, upload_time = "2025-08-26T15:12:05.874Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/63/d775a2fdfc4b555120381cd2aa6dff1845576bc14fb13796ae1b1e8dbaf7/marisa_trie-1.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ca644534f15f85bba14c412afc17de07531e79a766ce85b8dbf3f8b6e7758f20", size = 2199831, upload_time = "2025-08-26T15:12:07.175Z" },
+    { url = "https://files.pythonhosted.org/packages/50/aa/e5053927dc3cac77acc9b27f6f87e75c880f5d3d5eac9111fe13b1d8bf6f/marisa_trie-1.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3834304fdeaa1c9b73596ad5a6c01a44fc19c13c115194704b85f7fbdf0a7b8e", size = 2283830, upload_time = "2025-08-26T15:12:08.319Z" },
+    { url = "https://files.pythonhosted.org/packages/71/3e/e314906d0de5b1a44780a23c79bb62a9aafd876e2a4e80fb34f58c721da4/marisa_trie-1.3.1-cp311-cp311-win32.whl", hash = "sha256:70b4c96f9119cfeb4dc6a0cf4afc9f92f0b002cde225bcd910915d976c78e66a", size = 117335, upload_time = "2025-08-26T15:12:09.776Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/2b/85623566621135de3d57497811f94679b4fb2a8f16148ef67133c2abab7a/marisa_trie-1.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:986eaf35a7f63c878280609ecd37edf8a074f7601c199acfec81d03f1ee9a39a", size = 143985, upload_time = "2025-08-26T15:12:10.988Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/40/ee7ea61b88d62d2189b5c4a27bc0fc8d9c32f8b8dc6daf1c93a7b7ad34ac/marisa_trie-1.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5b7c1e7fa6c3b855e8cfbabf38454d7decbaba1c567d0cd58880d033c6b363bd", size = 173454, upload_time = "2025-08-26T15:12:12.13Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/fc/58635811586898041004b2197a085253706ede211324a53ec01612a50e20/marisa_trie-1.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c12b44c190deb0d67655021da1f2d0a7d61a257bf844101cf982e68ed344f28d", size = 155305, upload_time = "2025-08-26T15:12:13.374Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/98/88ca0c98d37034a3237acaf461d210cbcfeb6687929e5ba0e354971fa3ed/marisa_trie-1.3.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9688c7b45f744366a4ef661e399f24636ebe440d315ab35d768676c59c613186", size = 1244834, upload_time = "2025-08-26T15:12:14.795Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/5f/93b3e3607ccd693a768eafee60829cd14ea1810b75aa48e8b20e27b332c4/marisa_trie-1.3.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:99a00cab4cf9643a87977c87a5c8961aa44fff8d5dd46e00250135f686e7dedf", size = 1265148, upload_time = "2025-08-26T15:12:16.229Z" },
+    { url = "https://files.pythonhosted.org/packages/db/6e/051d7d25c7fb2b3df605c8bd782513ebbb33fddf3bae6cf46cf268cca89f/marisa_trie-1.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:83efc045fc58ca04c91a96c9b894d8a19ac6553677a76f96df01ff9f0405f53d", size = 2172726, upload_time = "2025-08-26T15:12:18.467Z" },
+    { url = "https://files.pythonhosted.org/packages/58/da/244d9d4e414ce6c73124cba4cc293dd140bf3b04ca18dec64c2775cca951/marisa_trie-1.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0b9816ab993001a7854b02a7daec228892f35bd5ab0ac493bacbd1b80baec9f1", size = 2256104, upload_time = "2025-08-26T15:12:20.168Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/f1/1a36ecd7da6668685a7753522af89a19928ffc80f1cc1dbc301af216f011/marisa_trie-1.3.1-cp312-cp312-win32.whl", hash = "sha256:c785fd6dae9daa6825734b7b494cdac972f958be1f9cb3fb1f32be8598d2b936", size = 115624, upload_time = "2025-08-26T15:12:21.233Z" },
+    { url = "https://files.pythonhosted.org/packages/35/b2/aabd1c9f1c102aa31d66633ed5328c447be166e0a703f9723e682478fd83/marisa_trie-1.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:9868b7a8e0f648d09ffe25ac29511e6e208cc5fb0d156c295385f9d5dc2a138e", size = 138562, upload_time = "2025-08-26T15:12:22.632Z" },
+    { url = "https://files.pythonhosted.org/packages/46/a2/8331b995c1b3eee83aa745f4a6502d737ec523d5955a48f167d4177db105/marisa_trie-1.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9de573d933db4753a50af891bcb3ffbfe14e200406214c223aa5dfe2163f316d", size = 172272, upload_time = "2025-08-26T15:12:24.016Z" },
+    { url = "https://files.pythonhosted.org/packages/97/b8/7b9681b5c0ea1bb950f907a4e3919eb7f7b7b3febafaae346f3b3f199f6f/marisa_trie-1.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f4bae4f920f2a1082eaf766c1883df7da84abdf333bafa15b8717c10416a615e", size = 154671, upload_time = "2025-08-26T15:12:25.013Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/16/929c1f83fdcff13f8d08500f434aaa18c21c8168d16cf81585d69085e980/marisa_trie-1.3.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bf9f2b97fcfd5e2dbb0090d0664023872dcde990df0b545eca8d0ce95795a409", size = 1238754, upload_time = "2025-08-26T15:12:26.217Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/0a/b0e04d3ef91a87d4c7ea0b66c004fdfc6e65c9ed83edaebecfb482dfe0ed/marisa_trie-1.3.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecdb19d33b26738a32602ef432b06cc6deeca4b498ce67ba8e5e39c8a7c19745", size = 1262653, upload_time = "2025-08-26T15:12:27.422Z" },
+    { url = "https://files.pythonhosted.org/packages/de/1f/0ecf610ddc9a209ee63116baabb47584d5b8ecd01610091a593d9429537e/marisa_trie-1.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a7416f1a084eb889c5792c57317875aeaa86abfe0bdc6f167712cebcec1d36ee", size = 2172399, upload_time = "2025-08-26T15:12:28.926Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/74/6b47deff3b3920449c135b9187c80f0d656adcdc5d41463745a61b012ea1/marisa_trie-1.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee428575377e29c636f2b4b3b0488875dcea310c6c5b3412ec4ef997f7bb37cc", size = 2255138, upload_time = "2025-08-26T15:12:30.271Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/fa/3dbcbe93dfaa626a5b3e741e7bcf3d7389aa5777175213bd8d9a9d3c992d/marisa_trie-1.3.1-cp313-cp313-win32.whl", hash = "sha256:d0f87bdf660f01e88ab3a507955697b2e3284065afa0b94fc9e77d6ad153ed5e", size = 115391, upload_time = "2025-08-26T15:12:31.465Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/ce/ddfab303646b21aef07ff9dbc83fba92e5d493f49d3bc03d899ffd45c86f/marisa_trie-1.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:a83f5f7ae3494e0cc25211296252b1b86901c788ed82c83adda19d0c98f828d6", size = 139130, upload_time = "2025-08-26T15:12:32.4Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/1e/734b618048ad05c50cb1673ce2c6e836dc38ddeeeb011ed1804af07327a4/marisa_trie-1.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a850b151bd1e3a5d9afef113adc22727d696603659d575d7e84f994bd8d04bf1", size = 175131, upload_time = "2025-08-26T15:12:33.728Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/78/c7051147cc918cb8ff4a2920e11a9b17d9dcb4d8fc122122694b486e2bfe/marisa_trie-1.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9dc61fb8f8993589544f6df268229c6cf0a56ad4ed3e8585a9cd23c5ad79527b", size = 163094, upload_time = "2025-08-26T15:12:35.312Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/b8/3b904178d7878319aacaabae5131c1f281519aaac0f8c68c8ed312912ccf/marisa_trie-1.3.1-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4bd41a6e73c0d0adafe4de449b6d35530a4ce6a836a6ee839baf117785ecfd7", size = 1279812, upload_time = "2025-08-26T15:12:36.831Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/bf/e77a1284247b980560b4104bbdd5d06ed2c2ae3d56ab954f97293b6dbbcd/marisa_trie-1.3.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8c8b2386d2d22c57880ed20a913ceca86363765623175671137484a7d223f07a", size = 1285690, upload_time = "2025-08-26T15:12:38.754Z" },
+    { url = "https://files.pythonhosted.org/packages/48/82/f6f10db5ec72de2642499f3a6e4e8607bbd2cfb28269ea08d0d8ddac3313/marisa_trie-1.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9c56001badaf1779afae5c24b7ab85938644ab8ef3c5fd438ab5d49621b84482", size = 2197943, upload_time = "2025-08-26T15:12:40.584Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/d0/74b6c3011b1ebf4a8131430156b14c3af694082cf34c392fff766096fd4b/marisa_trie-1.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:83a3748088d117a9b15d8981c947df9e4f56eb2e4b5456ae34fe1f83666c9185", size = 2280132, upload_time = "2025-08-26T15:12:42.059Z" },
+    { url = "https://files.pythonhosted.org/packages/28/b2/b8b0cb738fa3ab07309ed92025c6e1b278f84c7255e976921a52b30d8d1b/marisa_trie-1.3.1-cp313-cp313t-win32.whl", hash = "sha256:137010598d8cebc53dbfb7caf59bde96c33a6af555e3e1bdbf30269b6a157e1e", size = 126446, upload_time = "2025-08-26T15:12:43.339Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/c6/2381648d0c946556ef51c673397cea40712d945444ceed0a0a0b51a174d2/marisa_trie-1.3.1-cp313-cp313t-win_amd64.whl", hash = "sha256:ec633e108f277f2b7f4671d933a909f39bba549910bf103e2940b87a14da2783", size = 153885, upload_time = "2025-08-26T15:12:44.309Z" },
+    { url = "https://files.pythonhosted.org/packages/40/8a/590f25a281e08879791aabec7b8584c7934ff3d5f9d52859197d587246ec/marisa_trie-1.3.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:389721481c14a92fa042e4b91ae065bff13e2bc567c85a10aa9d9de80aaa8622", size = 172803, upload_time = "2025-08-26T15:12:45.342Z" },
+    { url = "https://files.pythonhosted.org/packages/20/7f/fd19a4aa57ad169d08e518a6ee2438e7e77bfba7786c59f65891db69d202/marisa_trie-1.3.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0e6f3b45def6ff23e254eeaa9079267004f0069d0a34eba30a620780caa4f2cb", size = 155506, upload_time = "2025-08-26T15:12:46.701Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/05/857832b8fe6b2ec441de1154eadc66dee067ce5fb6673c3ee0b8616108ee/marisa_trie-1.3.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a96ef3e461ecc85ec7d2233ddc449ff5a3fbdc520caea752bc5bc8faa975231", size = 1239979, upload_time = "2025-08-26T15:12:47.943Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/08/f9ea8b720a627d54e8e19f19a0ec1cc2011e01aa2b4f40d078e7f5e9e21f/marisa_trie-1.3.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5370f9ef6c008e502537cc1ff518c80ddf749367ce90179efa0e7f6275903a76", size = 1255705, upload_time = "2025-08-26T15:12:49.24Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/c3/42360fb38cdfde5db1783e2d7cfeb8b91eea837f89ef678f308ee026d794/marisa_trie-1.3.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0dcd42774e367ceb423c211a4fc8e7ce586acfaf0929c9c06d98002112075239", size = 2175092, upload_time = "2025-08-26T15:12:50.602Z" },
+    { url = "https://files.pythonhosted.org/packages/09/ba/215b0d821fd37cdc600e834a75708aa2e117124dcf495c9a6c6dc7fdcb6b/marisa_trie-1.3.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:3e2a0e1be95237981bd375a388f44b33d69ea5669a2f79fea038e45fff326595", size = 2250454, upload_time = "2025-08-26T15:12:52.435Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/a3/292ab31a12ec1cb356e6bc8b9cc8aaec920aa892a805757c011d77e8cd93/marisa_trie-1.3.1-cp314-cp314-win32.whl", hash = "sha256:c7a33506d0451112911c69f38d55da3e0e050f2be0ea4e5176865cf03baf26a9", size = 119101, upload_time = "2025-08-26T15:12:53.615Z" },
+    { url = "https://files.pythonhosted.org/packages/95/83/0ea5de53209993cf301dd9d18d4cb22c20c84c753b4357b66660a8b9eb48/marisa_trie-1.3.1-cp314-cp314-win_amd64.whl", hash = "sha256:68678816818efcd4a1787b557af81f215b989ec88680a86c85c34c914d413690", size = 142886, upload_time = "2025-08-26T15:12:54.835Z" },
+    { url = "https://files.pythonhosted.org/packages/37/00/c7e063867988067992a9d9d2aceaede0be7787ca6d77ef34f2eca9d2708e/marisa_trie-1.3.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9e467e13971c64db6aed8afe4c2a131c3f73f048bec3f788a6141216acda598d", size = 175163, upload_time = "2025-08-26T15:12:55.908Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/64/eaf49d10c8506ecd717bbbeda907e474842c298354a444b875741ef4a0d9/marisa_trie-1.3.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:076731f79f8603cb3216cb6e5bbbc56536c89f63f175ad47014219ecb01e5996", size = 163119, upload_time = "2025-08-26T15:12:58.054Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/26/f24dd9c98ce6fc8c8d554b556e1c43f326c5df414b79aba33bd7d2d2fbfd/marisa_trie-1.3.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:82de2de90488d0fbbf74cf9f20e1afd62e320693b88f5e9565fc80b28f5bbad3", size = 1277783, upload_time = "2025-08-26T15:12:59.225Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/1a/efd63e75d1374e08f8ebe2e15ff1b1ed5f6d5cf57614a5b0884bd9c882ee/marisa_trie-1.3.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0c2bc6bee737f4d47fce48c5b03a7bd3214ef2d83eb5c9f84210091370a5f195", size = 1282309, upload_time = "2025-08-26T15:13:00.797Z" },
+    { url = "https://files.pythonhosted.org/packages/33/4c/0cefa1eceec7858766af5939979857ac079c6c5251e00c6991c1a26bb1b7/marisa_trie-1.3.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:56043cf908ddf3d7364498085dbc2855d4ea8969aff3bf2439a79482a79e68e2", size = 2196594, upload_time = "2025-08-26T15:13:02.158Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/64/900f4132fc345be4b40073e66284707afa4cc203d8d0f1fe78c6b111cd47/marisa_trie-1.3.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9651daa1fdc471df5a5fa6a4833d3b01e76ac512eea141a5995681aebac5555f", size = 2277730, upload_time = "2025-08-26T15:13:03.528Z" },
+    { url = "https://files.pythonhosted.org/packages/62/ab/6d6cf25a5c8835589a601a9a916ec5cdee740e277fed8ee620df546834bb/marisa_trie-1.3.1-cp314-cp314t-win32.whl", hash = "sha256:c6571462417cda2239b1ade86ceaf3852da9b52c6286046e87d404afc6da20a7", size = 131409, upload_time = "2025-08-26T15:13:05.106Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/61/c4efc044141429e67e8fd5536be86d76303f250179c7f92b2cc0c72e8d0b/marisa_trie-1.3.1-cp314-cp314t-win_amd64.whl", hash = "sha256:9e6496bbad3068e3bbbb934b1e1307bf1a9cb4609f9ec47b57e8ea37f1b5ee40", size = 162564, upload_time = "2025-08-26T15:13:06.112Z" },
+]
+
 [[package]]
 name = "markdown-it-py"
 version = "3.0.0"
@@ -1256,6 +1454,35 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351, upload_time = "2024-01-28T18:52:31.981Z" },
 ]
 
+[[package]]
+name = "murmurhash"
+version = "1.0.13"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/54/e9/02efbc6dfc2dd2085da3daacf9a8c17e8356019eceaedbfa21555e32d2af/murmurhash-1.0.13.tar.gz", hash = "sha256:737246d41ee00ff74b07b0bd1f0888be304d203ce668e642c86aa64ede30f8b7", size = 13258, upload_time = "2025-05-22T12:35:57.019Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2c/d1/9d13a02d9c8bfff10b1f68d19df206eaf2a8011defeccf7eb05ea0b8c54e/murmurhash-1.0.13-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b20d168370bc3ce82920121b78ab35ae244070a9b18798f4a2e8678fa03bd7e0", size = 26410, upload_time = "2025-05-22T12:35:20.786Z" },
+    { url = "https://files.pythonhosted.org/packages/14/b0/3ee762e98cf9a8c2df9c8b377c326f3dd4495066d4eace9066fca46eba7a/murmurhash-1.0.13-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cef667d2e83bdceea3bc20c586c491fa442662ace1aea66ff5e3a18bb38268d8", size = 26679, upload_time = "2025-05-22T12:35:21.808Z" },
+    { url = "https://files.pythonhosted.org/packages/39/06/24618f79cd5aac48490932e50263bddfd1ea90f7123d49bfe806a5982675/murmurhash-1.0.13-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:507148e50929ba1fce36898808573b9f81c763d5676f3fc6e4e832ff56b66992", size = 125970, upload_time = "2025-05-22T12:35:23.222Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/09/0e7afce0a422692506c85474a26fb3a03c1971b2b5f7e7745276c4b3de7f/murmurhash-1.0.13-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64d50f6173d266ad165beb8bca6101d824217fc9279f9e9981f4c0245c1e7ee6", size = 123390, upload_time = "2025-05-22T12:35:24.303Z" },
+    { url = "https://files.pythonhosted.org/packages/22/4c/c98f579b1a951b2bcc722a35270a2eec105c1e21585c9b314a02079e3c4d/murmurhash-1.0.13-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0f272e15a84a8ae5f8b4bc0a68f9f47be38518ddffc72405791178058e9d019a", size = 124007, upload_time = "2025-05-22T12:35:25.446Z" },
+    { url = "https://files.pythonhosted.org/packages/df/f8/1b0dcebc8df8e091341617102b5b3b97deb6435f345b84f75382c290ec2c/murmurhash-1.0.13-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9423e0b0964ed1013a06c970199538c7ef9ca28c0be54798c0f1473a6591761", size = 123705, upload_time = "2025-05-22T12:35:26.709Z" },
+    { url = "https://files.pythonhosted.org/packages/79/17/f2a38558e150a0669d843f75e128afb83c1a67af41885ea2acb940e18e2a/murmurhash-1.0.13-cp311-cp311-win_amd64.whl", hash = "sha256:83b81e7084b696df3d853f2c78e0c9bda6b285d643f923f1a6fa9ab145d705c5", size = 24572, upload_time = "2025-05-22T12:35:30.38Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/53/56ce2d8d4b9ab89557cb1d00ffce346b80a2eb2d8c7944015e5c83eacdec/murmurhash-1.0.13-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bbe882e46cb3f86e092d8a1dd7a5a1c992da1ae3b39f7dd4507b6ce33dae7f92", size = 26859, upload_time = "2025-05-22T12:35:31.815Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/85/3a0ad54a61257c31496545ae6861515d640316f93681d1dd917e7be06634/murmurhash-1.0.13-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:52a33a12ecedc432493692c207c784b06b6427ffaa897fc90b7a76e65846478d", size = 26900, upload_time = "2025-05-22T12:35:34.267Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/cd/6651de26744b50ff11c79f0c0d41244db039625de53c0467a7a52876b2d8/murmurhash-1.0.13-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:950403a7f0dc2d9c8d0710f07c296f2daab66299d9677d6c65d6b6fa2cb30aaa", size = 131367, upload_time = "2025-05-22T12:35:35.258Z" },
+    { url = "https://files.pythonhosted.org/packages/50/6c/01ded95ddce33811c9766cae4ce32e0a54288da1d909ee2bcaa6ed13b9f1/murmurhash-1.0.13-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fde9fb5d2c106d86ff3ef2e4a9a69c2a8d23ba46e28c6b30034dc58421bc107b", size = 128943, upload_time = "2025-05-22T12:35:36.358Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/27/e539a9622d7bea3ae22706c1eb80d4af80f9dddd93b54d151955c2ae4011/murmurhash-1.0.13-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3aa55d62773745616e1ab19345dece122f6e6d09224f7be939cc5b4c513c8473", size = 129108, upload_time = "2025-05-22T12:35:37.864Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/84/18af5662e07d06839ad4db18ce026e6f8ef850d7b0ba92817b28dad28ba6/murmurhash-1.0.13-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:060dfef1b405cf02c450f182fb629f76ebe7f79657cced2db5054bc29b34938b", size = 129175, upload_time = "2025-05-22T12:35:38.928Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/8d/b01d3ee1f1cf3957250223b7c6ce35454f38fbf4abe236bf04a3f769341d/murmurhash-1.0.13-cp312-cp312-win_amd64.whl", hash = "sha256:a8e79627d44a6e20a6487effc30bfe1c74754c13d179106e68cc6d07941b022c", size = 24869, upload_time = "2025-05-22T12:35:40.035Z" },
+    { url = "https://files.pythonhosted.org/packages/00/b4/8919dfdc4a131ad38a57b2c5de69f4bd74538bf546637ee59ebaebe6e5a4/murmurhash-1.0.13-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b8a7f8befd901379b6dc57a9e49c5188454113747ad6aa8cdd951a6048e10790", size = 26852, upload_time = "2025-05-22T12:35:41.061Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/32/ce78bef5d6101568bcb12f5bb5103fabcbe23723ec52e76ff66132d5dbb7/murmurhash-1.0.13-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f741aab86007510199193eee4f87c5ece92bc5a6ca7d0fe0d27335c1203dface", size = 26900, upload_time = "2025-05-22T12:35:42.097Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/4c/0f47c0b4f6b31a1de84d65f9573832c78cd47b4b8ce25ab5596a8238d150/murmurhash-1.0.13-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82614f18fa6d9d83da6bb0918f3789a3e1555d0ce12c2548153e97f79b29cfc9", size = 130033, upload_time = "2025-05-22T12:35:43.113Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/cb/e47233e32fb792dcc9fb18a2cf65f795d47179b29c2b4a2034689f14c707/murmurhash-1.0.13-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91f22a48b9454712e0690aa0b76cf0156a5d5a083d23ec7e209cfaeef28f56ff", size = 130619, upload_time = "2025-05-22T12:35:44.229Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/f1/f89911bf304ba5d385ccd346cc7fbb1c1450a24f093b592c3bfe87768467/murmurhash-1.0.13-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c4bc7938627b8fcb3d598fe6657cc96d1e31f4eba6a871b523c1512ab6dacb3e", size = 127643, upload_time = "2025-05-22T12:35:45.369Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/24/262229221f6840c1a04a46051075e99675e591571abcca6b9a8b6aa1602b/murmurhash-1.0.13-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:58a61f1fc840f9ef704e638c39b8517bab1d21f1a9dbb6ba3ec53e41360e44ec", size = 127981, upload_time = "2025-05-22T12:35:46.503Z" },
+    { url = "https://files.pythonhosted.org/packages/18/25/addbc1d28f83252732ac3e57334d42f093890b4c2cce483ba01a42bc607c/murmurhash-1.0.13-cp313-cp313-win_amd64.whl", hash = "sha256:c451a22f14c2f40e7abaea521ee24fa0e46fbec480c4304c25c946cdb6e81883", size = 24880, upload_time = "2025-05-22T12:35:47.625Z" },
+]
+
 [[package]]
 name = "numpy"
 version = "1.26.4"
@@ -1416,6 +1643,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6e/23/e98758924d1b3aac11a626268eabf7f3cf177e7837c28d47bf84c64532d0/pendulum-3.1.0-py3-none-any.whl", hash = "sha256:f9178c2a8e291758ade1e8dd6371b1d26d08371b4c7730a6e9a3ef8b16ebae0f", size = 111799, upload_time = "2025-04-19T14:02:34.739Z" },
 ]
 
+[[package]]
+name = "pip"
+version = "25.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/20/16/650289cd3f43d5a2fadfd98c68bd1e1e7f2550a1a5326768cddfbcedb2c5/pip-25.2.tar.gz", hash = "sha256:578283f006390f85bb6282dffb876454593d637f5d1be494b5202ce4877e71f2", size = 1840021, upload_time = "2025-07-30T21:50:15.401Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/3f/945ef7ab14dc4f9d7f40288d2df998d1837ee0888ec3659c813487572faa/pip-25.2-py3-none-any.whl", hash = "sha256:6d67a2b4e7f14d8b31b8b52648866fa717f45a1eb70e83002f4331d07e953717", size = 1752557, upload_time = "2025-07-30T21:50:13.323Z" },
+]
+
 [[package]]
 name = "playwright"
 version = "1.49.1"
@@ -1443,6 +1679,39 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556, upload_time = "2024-04-20T21:34:40.434Z" },
 ]
 
+[[package]]
+name = "preshed"
+version = "3.0.10"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cymem" },
+    { name = "murmurhash" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/4d/3a/db814f67a05b6d7f9c15d38edef5ec9b21415710705b393883de92aee5ef/preshed-3.0.10.tar.gz", hash = "sha256:5a5c8e685e941f4ffec97f1fbf32694b8107858891a4bc34107fac981d8296ff", size = 15039, upload_time = "2025-05-26T15:18:33.612Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/08/99/c3709638f687da339504d1daeca48604cadb338bf3556a1484d1f0cd95e6/preshed-3.0.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d96c4fe2b41c1cdcc8c4fc1fdb10f922a6095c0430a3ebe361fe62c78902d068", size = 131486, upload_time = "2025-05-26T15:17:52.231Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/27/0fd36b63caa8bbf57b31a121d9565d385bbd7521771d4eb93e17d326873d/preshed-3.0.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cb01ea930b96f3301526a2ab26f41347d07555e4378c4144c6b7645074f2ebb0", size = 127938, upload_time = "2025-05-26T15:17:54.19Z" },
+    { url = "https://files.pythonhosted.org/packages/90/54/6a876d9cc8d401a9c1fb6bb8ca5a31b3664d0bcb888a9016258a1ae17344/preshed-3.0.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dd1f0a7b7d150e229d073fd4fe94f72610cae992e907cee74687c4695873a98", size = 842263, upload_time = "2025-05-26T15:17:55.398Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/7d/ff19f74d15ee587905bafa3582883cfe2f72b574e6d691ee64dc690dc276/preshed-3.0.10-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fd7b350c280137f324cd447afbf6ba9a849af0e8898850046ac6f34010e08bd", size = 842913, upload_time = "2025-05-26T15:17:56.687Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/3a/1c345a26463345557705b61965e1e0a732cc0e9c6dfd4787845dbfa50b4a/preshed-3.0.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cf6a5fdc89ad06079aa6ee63621e417d4f4cf2a3d8b63c72728baad35a9ff641", size = 820548, upload_time = "2025-05-26T15:17:58.057Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/6b/71f25e2b7a23dba168f43edfae0bb508552dbef89114ce65c73f2ea7172f/preshed-3.0.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b4c29a7bd66985808ad181c9ad05205a6aa7400cd0f98426acd7bc86588b93f8", size = 840379, upload_time = "2025-05-26T15:17:59.565Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/86/d8f32b0b31a36ee8770a9b1a95321430e364cd0ba4bfebb7348aed2f198d/preshed-3.0.10-cp311-cp311-win_amd64.whl", hash = "sha256:1367c1fd6f44296305315d4e1c3fe3171787d4d01c1008a76bc9466bd79c3249", size = 117655, upload_time = "2025-05-26T15:18:00.836Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/14/322a4f58bc25991a87f216acb1351800739b0794185d27508ee86c35f382/preshed-3.0.10-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6e9c46933d55c8898c8f7a6019a8062cd87ef257b075ada2dd5d1e57810189ea", size = 131367, upload_time = "2025-05-26T15:18:02.408Z" },
+    { url = "https://files.pythonhosted.org/packages/38/80/67507653c35620cace913f617df6d6f658b87e8da83087b851557d65dd86/preshed-3.0.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c4ebc4f8ef0114d55f2ffdce4965378129c7453d0203664aeeb03055572d9e4", size = 126535, upload_time = "2025-05-26T15:18:03.589Z" },
+    { url = "https://files.pythonhosted.org/packages/db/b1/ab4f811aeaf20af0fa47148c1c54b62d7e8120d59025bd0a3f773bb67725/preshed-3.0.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ab5ab4c6dfd3746fb4328e7fbeb2a0544416b872db02903bfac18e6f5cd412f", size = 864907, upload_time = "2025-05-26T15:18:04.794Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/db/fe37c1f99cfb26805dd89381ddd54901307feceb267332eaaca228e9f9c1/preshed-3.0.10-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40586fd96ae3974c552a7cd78781b6844ecb1559ee7556586f487058cf13dd96", size = 869329, upload_time = "2025-05-26T15:18:06.353Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/fd/efb6a6233d1cd969966f3f65bdd8e662579c3d83114e5c356cec1927b1f7/preshed-3.0.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a606c24cda931306b98e0edfafed3309bffcf8d6ecfe07804db26024c4f03cd6", size = 846829, upload_time = "2025-05-26T15:18:07.716Z" },
+    { url = "https://files.pythonhosted.org/packages/14/49/0e4ce5db3bf86b081abb08a404fb37b7c2dbfd7a73ec6c0bc71b650307eb/preshed-3.0.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:394015566f9354738be903447039e8dbc6d93ba5adf091af694eb03c4e726b1e", size = 874008, upload_time = "2025-05-26T15:18:09.364Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/17/76d6593fc2d055d4e413b68a8c87b70aa9b7697d4972cb8062559edcf6e9/preshed-3.0.10-cp312-cp312-win_amd64.whl", hash = "sha256:fd7e38225937e580420c84d1996dde9b4f726aacd9405093455c3a2fa60fede5", size = 116701, upload_time = "2025-05-26T15:18:11.905Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/5e/87671bc58c4f6c8cf0a5601ccd74b8bb50281ff28aa4ab3e3cad5cd9d06a/preshed-3.0.10-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:23e6e0581a517597f3f76bc24a4cdb0ba5509933d4f61c34fca49649dd71edf9", size = 129184, upload_time = "2025-05-26T15:18:13.331Z" },
+    { url = "https://files.pythonhosted.org/packages/92/69/b3969a3c95778def5bf5126484a1f7d2ad324d1040077f55f56e027d8ea4/preshed-3.0.10-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:574e6d6056981540310ff181b47a2912f4bddc91bcace3c7a9c6726eafda24ca", size = 124258, upload_time = "2025-05-26T15:18:14.497Z" },
+    { url = "https://files.pythonhosted.org/packages/32/df/6e828ec4565bf33bd4803a3eb3b1102830b739143e5d6c132bf7181a58ec/preshed-3.0.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2bd658dd73e853d1bb5597976a407feafa681b9d6155bc9bc7b4c2acc2a6ee96", size = 825445, upload_time = "2025-05-26T15:18:15.71Z" },
+    { url = "https://files.pythonhosted.org/packages/05/3d/478b585f304920e51f328c9231e22f30dc64baa68e079e08a46ab72be738/preshed-3.0.10-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b95396046328ffb461a68859ce2141aca4815b8624167832d28ced70d541626", size = 831690, upload_time = "2025-05-26T15:18:17.08Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/65/938f21f77227e8d398d46fb10b9d1b3467be859468ce8db138fc3d50589c/preshed-3.0.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3e6728b2028bbe79565eb6cf676b5bae5ce1f9cc56e4bf99bb28ce576f88054d", size = 808593, upload_time = "2025-05-26T15:18:18.535Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/1c/2a3961fc88bc72300ff7e4ca54689bda90d2d77cc994167cc09a310480b6/preshed-3.0.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c4ef96cb28bf5f08de9c070143113e168efccbb68fd4961e7d445f734c051a97", size = 837333, upload_time = "2025-05-26T15:18:19.937Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/8c/d3e30f80b2ef21f267f09f0b7d18995adccc928ede5b73ea3fe54e1303f4/preshed-3.0.10-cp313-cp313-win_amd64.whl", hash = "sha256:97e0e2edfd25a7dfba799b49b3c5cc248ad0318a76edd9d5fd2c82aa3d5c64ed", size = 115769, upload_time = "2025-05-26T15:18:21.842Z" },
+]
+
 [[package]]
 name = "propcache"
 version = "0.3.1"
@@ -1897,6 +2166,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863, upload_time = "2024-01-23T06:32:58.246Z" },
 ]
 
+[[package]]
+name = "python-env-utils"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "python-dateutil" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/57/96/c49c675b9a8cfb79b7377bb5e357feafb810dd2831201cde4e499c0a5e52/python-env-utils-0.4.1.tar.gz", hash = "sha256:6357d9ae024e5039158ce337bafeca662453f41cd7789a4517217c1a9093ce57", size = 5711, upload_time = "2017-04-09T18:43:59.347Z" }
+
 [[package]]
 name = "python-multipart"
 version = "0.0.20"
@@ -2050,6 +2328,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload_time = "2023-10-24T04:13:38.866Z" },
 ]
 
+[[package]]
+name = "side-effects"
+version = "1.6.dev0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "django" },
+    { name = "python-env-utils" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/44/39/c7feca6a6154195b135a4539802bc3c909b931e296c868d6974ff0c9d819/side-effects-1.6.dev0.tar.gz", hash = "sha256:9d069359fc46dbcb78938ca4a7c1e6266db84de0cdf5fc2d8ce664bfe5cae255", size = 16186, upload_time = "2020-01-01T21:29:09.983Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6c/24/a6def6872e165cc8d3846e5b9c2615f6f566c424d5eb6d99a15eaad7c558/side_effects-1.6.dev0-py3-none-any.whl", hash = "sha256:343f8f34de51f477238e03b0c33d79a5ef31604991a44c187ebfce0fae628c97", size = 13563, upload_time = "2020-01-01T21:29:13.045Z" },
+]
+
 [[package]]
 name = "simplejson"
 version = "3.20.1"
@@ -2107,6 +2398,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload_time = "2024-12-04T17:35:26.475Z" },
 ]
 
+[[package]]
+name = "smart-open"
+version = "7.3.0.post1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "wrapt" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/18/2b/5e7234c68ed5bc872ad6ae77b8a421c2ed70dcb1190b44dc1abdeed5e347/smart_open-7.3.0.post1.tar.gz", hash = "sha256:ce6a3d9bc1afbf6234ad13c010b77f8cd36d24636811e3c52c3b5160f5214d1e", size = 51557, upload_time = "2025-07-03T10:06:31.271Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/08/5b/a2a3d4514c64818925f4e886d39981f1926eeb5288a4549c6b3c17ed66bb/smart_open-7.3.0.post1-py3-none-any.whl", hash = "sha256:c73661a2c24bf045c1e04e08fffc585b59af023fe783d57896f590489db66fb4", size = 61946, upload_time = "2025-07-03T10:06:29.599Z" },
+]
+
 [[package]]
 name = "sniffio"
 version = "1.3.1"
@@ -2125,6 +2428,74 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e7/9c/0e6afc12c269578be5c0c1c9f4b49a8d32770a080260c333ac04cc1c832d/soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4", size = 36677, upload_time = "2025-04-20T18:50:07.196Z" },
 ]
 
+[[package]]
+name = "spacy"
+version = "3.8.7"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "catalogue" },
+    { name = "cymem" },
+    { name = "jinja2" },
+    { name = "langcodes" },
+    { name = "murmurhash" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "preshed" },
+    { name = "pydantic" },
+    { name = "requests" },
+    { name = "setuptools" },
+    { name = "spacy-legacy" },
+    { name = "spacy-loggers" },
+    { name = "srsly" },
+    { name = "thinc" },
+    { name = "tqdm" },
+    { name = "typer" },
+    { name = "wasabi" },
+    { name = "weasel" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1e/9e/fb4e1cefe3fbd51ea6a243e5a3d2bc629baa9a28930bf4be6fe5672fa1ca/spacy-3.8.7.tar.gz", hash = "sha256:700fd174c6c552276be142c48e70bb53cae24c4dd86003c4432af9cb93e4c908", size = 1316143, upload_time = "2025-05-23T08:55:39.538Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/29/c5/5fbb3a4e694d4855a5bab87af9664377c48b89691f180ad3cde4faeaf35c/spacy-3.8.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bdff8b9b556468a6dd527af17f0ddf9fb0b0bee92ee7703339ddf542361cff98", size = 6746140, upload_time = "2025-05-23T08:54:23.483Z" },
+    { url = "https://files.pythonhosted.org/packages/03/2a/43afac516eb82409ca47d7206f982beaf265d2ba06a72ca07cf06b290c20/spacy-3.8.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9194b7cf015ed9b4450ffb162da49c8a9305e76b468de036b0948abdfc748a37", size = 6392440, upload_time = "2025-05-23T08:54:25.12Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/83/2ea68c18e2b1b9a6f6b30ef63eb9d07e979626b9595acfdb5394f18923c4/spacy-3.8.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7dc38b78d48b9c2a80a3eea95f776304993f63fc307f07cdd104441442f92f1e", size = 32699126, upload_time = "2025-05-23T08:54:27.385Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/0a/bb90e9aa0b3c527876627567d82517aabab08006ccf63796c33b0242254d/spacy-3.8.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e43bd70772751b8fc7a14f338d087a3d297195d43d171832923ef66204b23ab", size = 33008865, upload_time = "2025-05-23T08:54:30.248Z" },
+    { url = "https://files.pythonhosted.org/packages/39/dd/8e906ba378457107ab0394976ea9f7b12fdb2cad682ef1a2ccf473d61e5f/spacy-3.8.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c402bf5dcf345fd96d202378c54bc345219681e3531f911d99567d569328c45f", size = 31933169, upload_time = "2025-05-23T08:54:33.199Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/b5/42df07eb837a923fbb42509864d5c7c2072d010de933dccdfb3c655b3a76/spacy-3.8.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4234189861e486d86f1269e50542d87e8a6391a1ee190652479cf1a793db115f", size = 32776322, upload_time = "2025-05-23T08:54:36.891Z" },
+    { url = "https://files.pythonhosted.org/packages/92/e7/8176484801c67dcd814f141991fe0a3c9b5b4a3583ea30c2062e93d1aa6b/spacy-3.8.7-cp311-cp311-win_amd64.whl", hash = "sha256:e9d12e2eb7f36bc11dd9edae011032fe49ea100d63e83177290d3cbd80eaa650", size = 14938936, upload_time = "2025-05-23T08:54:40.322Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/10/89852f40f926e0902c11c34454493ba0d15530b322711e754b89a6d7dfe6/spacy-3.8.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:88b397e37793cea51df298e6c651a763e49877a25bead5ba349761531a456687", size = 6265335, upload_time = "2025-05-23T08:54:42.876Z" },
+    { url = "https://files.pythonhosted.org/packages/16/fb/b5d54522969a632c06f4af354763467553b66d5bf0671ac39f3cceb3fd54/spacy-3.8.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f70b676955fa6959347ca86ed6edd8ff0d6eb2ba20561fdfec76924bd3e540f9", size = 5906035, upload_time = "2025-05-23T08:54:44.824Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/03/70f06753fd65081404ade30408535eb69f627a36ffce2107116d1aa16239/spacy-3.8.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c4b5a624797ade30c25b5b69daa35a93ee24bcc56bd79b0884b2565f76f35d6", size = 33420084, upload_time = "2025-05-23T08:54:46.889Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/19/b60e1ebf4985ee2b33d85705b89a5024942b65dad04dbdc3fb46f168b410/spacy-3.8.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9d83e006df66decccefa3872fa958b3756228fb216d83783595444cf42ca10c", size = 33922188, upload_time = "2025-05-23T08:54:49.781Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/a3/1fb1a49dc6d982d96fffc30c3a31bb431526008eea72ac3773f6518720a6/spacy-3.8.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0dca25deba54f3eb5dcfbf63bf16e613e6c601da56f91c4a902d38533c098941", size = 31939285, upload_time = "2025-05-23T08:54:53.162Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/55/6cf1aff8e5c01ee683e828f3ccd9282d2aff7ca1143a9349ee3d0c1291ff/spacy-3.8.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5eef3f805a1c118d9b709a23e2d378f5f20da5a0d6258c9cfdc87c4cb234b4fc", size = 32988845, upload_time = "2025-05-23T08:54:57.776Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/47/c17ee61b51aa8497d8af0999224b4b62485111a55ec105a06886685b2c68/spacy-3.8.7-cp312-cp312-win_amd64.whl", hash = "sha256:25d7a68e445200c9e9dc0044f8b7278ec0ef01ccc7cb5a95d1de2bd8e3ed6be2", size = 13918682, upload_time = "2025-05-23T08:55:00.387Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/95/7125bea6d432c601478bf922f7a568762c8be425bbde5b66698260ab0358/spacy-3.8.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dda7d57f42ec57c19fbef348095a9c82504e4777bca7b8db4b0d8318ba280fc7", size = 6235950, upload_time = "2025-05-23T08:55:02.92Z" },
+    { url = "https://files.pythonhosted.org/packages/96/c3/d2362846154d4d341136774831605df02d61f49ac637524a15f4f2794874/spacy-3.8.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:de0e0bddb810ed05bce44bcb91460eabe52bc56323da398d2ca74288a906da35", size = 5878106, upload_time = "2025-05-23T08:55:04.496Z" },
+    { url = "https://files.pythonhosted.org/packages/50/b6/b2943acfbfc4fc12642dac9feb571e712dd1569ab481db8f3daedee045fe/spacy-3.8.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a2e58f92b684465777a7c1a65d5578b1dc36fe55c48d9964fb6d46cc9449768", size = 33085866, upload_time = "2025-05-23T08:55:06.65Z" },
+    { url = "https://files.pythonhosted.org/packages/65/98/c4415cbb217ac0b502dbb3372136015c699dd16a0c47cd6d338cd15f4bed/spacy-3.8.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46330da2eb357d6979f40ea8fc16ee5776ee75cd0c70aac2a4ea10c80364b8f3", size = 33398424, upload_time = "2025-05-23T08:55:10.477Z" },
+    { url = "https://files.pythonhosted.org/packages/12/45/12a198858f1f11c21844876e039ba90df59d550527c72996d418c1faf78d/spacy-3.8.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:86b6a6ad23ca5440ef9d29c2b1e3125e28722c927db612ae99e564d49202861c", size = 31530066, upload_time = "2025-05-23T08:55:13.329Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/df/80524f99822eb96c9649200042ec5912357eec100cf0cd678a2e9ef0ecb3/spacy-3.8.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ccfe468cbb370888153df145ce3693af8e54dae551940df49057258081b2112f", size = 32613343, upload_time = "2025-05-23T08:55:16.711Z" },
+    { url = "https://files.pythonhosted.org/packages/02/99/881f6f24c279a5a70b8d69aaf8266fd411a0a58fd1c8848112aaa348f6f6/spacy-3.8.7-cp313-cp313-win_amd64.whl", hash = "sha256:ca81e416ff35209769e8b5dd5d13acc52e4f57dd9d028364bccbbe157c2ae86b", size = 13911250, upload_time = "2025-05-23T08:55:19.606Z" },
+]
+
+[[package]]
+name = "spacy-legacy"
+version = "3.0.12"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d9/79/91f9d7cc8db5642acad830dcc4b49ba65a7790152832c4eceb305e46d681/spacy-legacy-3.0.12.tar.gz", hash = "sha256:b37d6e0c9b6e1d7ca1cf5bc7152ab64a4c4671f59c85adaf7a3fcb870357a774", size = 23806, upload_time = "2023-01-23T09:04:15.104Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c3/55/12e842c70ff8828e34e543a2c7176dac4da006ca6901c9e8b43efab8bc6b/spacy_legacy-3.0.12-py2.py3-none-any.whl", hash = "sha256:476e3bd0d05f8c339ed60f40986c07387c0a71479245d6d0f4298dbd52cda55f", size = 29971, upload_time = "2023-01-23T09:04:13.45Z" },
+]
+
+[[package]]
+name = "spacy-loggers"
+version = "1.0.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/67/3d/926db774c9c98acf66cb4ed7faf6c377746f3e00b84b700d0868b95d0712/spacy-loggers-1.0.5.tar.gz", hash = "sha256:d60b0bdbf915a60e516cc2e653baeff946f0cfc461b452d11a4d5458c6fe5f24", size = 20811, upload_time = "2023-09-11T12:26:52.323Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/33/78/d1a1a026ef3af911159398c939b1509d5c36fe524c7b644f34a5146c4e16/spacy_loggers-1.0.5-py3-none-any.whl", hash = "sha256:196284c9c446cc0cdb944005384270d775fdeaf4f494d8e269466cfa497ef645", size = 22343, upload_time = "2023-09-11T12:26:50.586Z" },
+]
+
 [[package]]
 name = "sqlalchemy"
 version = "2.0.40"
@@ -2162,6 +2533,47 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/7c/5fc8e802e7506fe8b55a03a2e1dab156eae205c91bee46305755e086d2e2/sqlalchemy-2.0.40-py3-none-any.whl", hash = "sha256:32587e2e1e359276957e6fe5dad089758bc042a971a8a09ae8ecf7a8fe23d07a", size = 1903894, upload_time = "2025-03-27T18:40:43.796Z" },
 ]
 
+[[package]]
+name = "sqlparse"
+version = "0.5.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e5/40/edede8dd6977b0d3da179a342c198ed100dd2aba4be081861ee5911e4da4/sqlparse-0.5.3.tar.gz", hash = "sha256:09f67787f56a0b16ecdbde1bfc7f5d9c3371ca683cfeaa8e6ff60b4807ec9272", size = 84999, upload_time = "2024-12-10T12:05:30.728Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a9/5c/bfd6bd0bf979426d405cc6e71eceb8701b148b16c21d2dc3c261efc61c7b/sqlparse-0.5.3-py3-none-any.whl", hash = "sha256:cf2196ed3418f3ba5de6af7e82c694a9fbdbfecccdfc72e281548517081f16ca", size = 44415, upload_time = "2024-12-10T12:05:27.824Z" },
+]
+
+[[package]]
+name = "srsly"
+version = "2.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "catalogue" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b7/e8/eb51b1349f50bac0222398af0942613fdc9d1453ae67cbe4bf9936a1a54b/srsly-2.5.1.tar.gz", hash = "sha256:ab1b4bf6cf3e29da23dae0493dd1517fb787075206512351421b89b4fc27c77e", size = 466464, upload_time = "2025-01-17T09:26:26.919Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/df/9c/a248bb49de499fe0990e3cb0fb341c2373d8863ef9a8b5799353cade5731/srsly-2.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:58f0736794ce00a71d62a39cbba1d62ea8d5be4751df956e802d147da20ecad7", size = 635917, upload_time = "2025-01-17T09:25:25.109Z" },
+    { url = "https://files.pythonhosted.org/packages/41/47/1bdaad84502df973ecb8ca658117234cf7fb20e1dec60da71dce82de993f/srsly-2.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7a8269c40859806d71920396d185f4f38dc985cdb6a28d3a326a701e29a5f629", size = 634374, upload_time = "2025-01-17T09:25:26.609Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/2a/d73c71989fcf2a6d1fa518d75322aff4db01a8763f167f8c5e00aac11097/srsly-2.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:889905900401fefc1032e22b73aecbed8b4251aa363f632b2d1f86fc16f1ad8e", size = 1108390, upload_time = "2025-01-17T09:25:29.32Z" },
+    { url = "https://files.pythonhosted.org/packages/35/a3/9eda9997a8bd011caed18fdaa5ce606714eb06d8dab587ed0522b3e92ab1/srsly-2.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf454755f22589df49c25dc799d8af7b47dce3d861dded35baf0f0b6ceab4422", size = 1110712, upload_time = "2025-01-17T09:25:31.051Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/ef/4b50bc05d06349f905b27f824cc23b652098efd4be19aead3af4981df647/srsly-2.5.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cc0607c8a59013a51dde5c1b4e465558728e9e0a35dcfa73c7cbefa91a0aad50", size = 1081244, upload_time = "2025-01-17T09:25:32.611Z" },
+    { url = "https://files.pythonhosted.org/packages/90/af/d4a2512d9a5048d2b18efead39d4c4404bddd4972935bbc68211292a736c/srsly-2.5.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d5421ba3ab3c790e8b41939c51a1d0f44326bfc052d7a0508860fb79a47aee7f", size = 1091692, upload_time = "2025-01-17T09:25:34.15Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/da/657a685f63028dcb00ccdc4ac125ed347c8bff6fa0dab6a9eb3dc45f3223/srsly-2.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:b96ea5a9a0d0379a79c46d255464a372fb14c30f59a8bc113e4316d131a530ab", size = 632627, upload_time = "2025-01-17T09:25:37.36Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/f6/bebc20d75bd02121fc0f65ad8c92a5dd2570e870005e940faa55a263e61a/srsly-2.5.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:683b54ed63d7dfee03bc2abc4b4a5f2152f81ec217bbadbac01ef1aaf2a75790", size = 636717, upload_time = "2025-01-17T09:25:40.236Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/e8/9372317a4742c70b87b413335adfcdfb2bee4f88f3faba89fabb9e6abf21/srsly-2.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:459d987130e57e83ce9e160899afbeb871d975f811e6958158763dd9a8a20f23", size = 634697, upload_time = "2025-01-17T09:25:43.605Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/00/c6a7b99ab27b051a27bd26fe1a8c1885225bb8980282bf9cb99f70610368/srsly-2.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:184e3c98389aab68ff04aab9095bd5f1a8e5a72cc5edcba9d733bac928f5cf9f", size = 1134655, upload_time = "2025-01-17T09:25:45.238Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/e6/861459e8241ec3b78c111081bd5efa414ef85867e17c45b6882954468d6e/srsly-2.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00c2a3e4856e63b7efd47591d049aaee8e5a250e098917f50d93ea68853fab78", size = 1143544, upload_time = "2025-01-17T09:25:47.485Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/85/8448fe874dd2042a4eceea5315cfff3af03ac77ff5073812071852c4e7e2/srsly-2.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:366b4708933cd8d6025c13c2cea3331f079c7bb5c25ec76fca392b6fc09818a0", size = 1098330, upload_time = "2025-01-17T09:25:52.55Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/7e/04d0e1417da140b2ac4053a3d4fcfc86cd59bf4829f69d370bb899f74d5d/srsly-2.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c8a0b03c64eb6e150d772c5149befbadd981cc734ab13184b0561c17c8cef9b1", size = 1110670, upload_time = "2025-01-17T09:25:54.02Z" },
+    { url = "https://files.pythonhosted.org/packages/96/1a/a8cd627eaa81a91feb6ceab50155f4ceff3eef6107916cb87ef796958427/srsly-2.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:7952538f6bba91b9d8bf31a642ac9e8b9ccc0ccbb309feb88518bfb84bb0dc0d", size = 632598, upload_time = "2025-01-17T09:25:55.499Z" },
+    { url = "https://files.pythonhosted.org/packages/42/94/cab36845aad6e2c22ecee1178accaa365657296ff87305b805648fd41118/srsly-2.5.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84b372f7ef1604b4a5b3cee1571993931f845a5b58652ac01bcb32c52586d2a8", size = 634883, upload_time = "2025-01-17T09:25:58.363Z" },
+    { url = "https://files.pythonhosted.org/packages/67/8b/501f51f4eaee7e1fd7327764799cb0a42f5d0de042a97916d30dbff770fc/srsly-2.5.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6ac3944c112acb3347a39bfdc2ebfc9e2d4bace20fe1c0b764374ac5b83519f2", size = 632842, upload_time = "2025-01-17T09:25:59.777Z" },
+    { url = "https://files.pythonhosted.org/packages/07/be/5b8fce4829661e070a7d3e262d2e533f0e297b11b8993d57240da67d7330/srsly-2.5.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6118f9c4b221cde0a990d06a42c8a4845218d55b425d8550746fe790acf267e9", size = 1118516, upload_time = "2025-01-17T09:26:01.234Z" },
+    { url = "https://files.pythonhosted.org/packages/91/60/a34e97564eac352c0e916c98f44b6f566b7eb6a9fb60bcd60ffa98530762/srsly-2.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7481460110d9986781d9e4ac0f5f991f1d6839284a80ad268625f9a23f686950", size = 1127974, upload_time = "2025-01-17T09:26:04.007Z" },
+    { url = "https://files.pythonhosted.org/packages/70/a2/f642334db0cabd187fa86b8773257ee6993c6009338a6831d4804e2c5b3c/srsly-2.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e57b8138082f09e35db60f99757e16652489e9e3692471d8e0c39aa95180688", size = 1086098, upload_time = "2025-01-17T09:26:05.612Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/9b/be48e185c5a010e71b5135e4cdf317ff56b8ac4bc08f394bbf882ac13b05/srsly-2.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bab90b85a63a1fe0bbc74d373c8bb9bb0499ddfa89075e0ebe8d670f12d04691", size = 1100354, upload_time = "2025-01-17T09:26:07.215Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/e2/745aeba88a8513017fbac2fd2f9f07b8a36065e51695f818541eb795ec0c/srsly-2.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:e73712be1634b5e1de6f81c273a7d47fe091ad3c79dc779c03d3416a5c117cee", size = 630634, upload_time = "2025-01-17T09:26:10.018Z" },
+]
+
 [[package]]
 name = "starlette"
 version = "0.45.3"
@@ -2183,6 +2595,38 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a6/a5/c0b6468d3824fe3fde30dbb5e1f687b291608f9473681bbf7dabbf5a87d7/text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8", size = 78154, upload_time = "2019-08-30T21:37:03.543Z" },
 ]
 
+[[package]]
+name = "thinc"
+version = "8.3.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "blis" },
+    { name = "catalogue" },
+    { name = "confection" },
+    { name = "cymem" },
+    { name = "murmurhash" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "preshed" },
+    { name = "pydantic" },
+    { name = "setuptools" },
+    { name = "srsly" },
+    { name = "wasabi" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b5/ff/60c9bcfe28e56c905aac8e61a838c7afe5dc3073c9beed0b63a26ace0bb7/thinc-8.3.4.tar.gz", hash = "sha256:b5925482498bbb6dca0771e375b35c915818f735891e93d93a662dab15f6ffd8", size = 193903, upload_time = "2025-01-13T12:47:51.698Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/85/47/68187c78a04cdc31cbd3ae393068f994b60476b5ecac6dfe7d04b124aacf/thinc-8.3.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a8bb4b47358a1855803b375f4432cefdf373f46ef249b554418d2e77c7323040", size = 839320, upload_time = "2025-01-13T12:47:12.317Z" },
+    { url = "https://files.pythonhosted.org/packages/49/ea/066dd415e61fcef20083bbca41c2c02e640fea71326531f2619708efee1e/thinc-8.3.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:00ed92f9a34b9794f51fcd48467c863f4eb7c5b41559aef6ef3c980c21378fec", size = 774196, upload_time = "2025-01-13T12:47:15.315Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/68/36c1a92a374891e0d496677c59f5f9fdc1e57bbb214c487bb8bb3e9290c2/thinc-8.3.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85691fca84a6a1506f7ddbd2c1706a5524d56f65582e76b2e260a06d9e83e86d", size = 3922504, upload_time = "2025-01-13T12:47:22.07Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/8a/48e463240a586e91f83c87660986e520aa91fbd839f6631ee9bc0fbb3cbd/thinc-8.3.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:eae1573fc19e514defc1bfd4f93f0b4bfc1dcefdb6d70bad1863825747f24800", size = 4932946, upload_time = "2025-01-13T12:47:24.177Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/98/f910b8d8113ab9b955a68e9bbf0d5bd0e828f22dd6d3c226af6ec3970817/thinc-8.3.4-cp311-cp311-win_amd64.whl", hash = "sha256:81e8638f9bdc38e366674acc4b63cf7c6267266a15477963a5db21b3d9f1aa36", size = 1490133, upload_time = "2025-01-13T12:47:26.152Z" },
+    { url = "https://files.pythonhosted.org/packages/90/ff/d1b5d7e1a7f95581e9a736f50a5a9aff72327ddbbc629a68070c36acefd9/thinc-8.3.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c9da6375b106df5186bd2bfd1273bc923c01ab7d482f8942e4ee528a28965c3a", size = 825099, upload_time = "2025-01-13T12:47:27.881Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/0b/d207c917886dc40671361de0880ec3ea0443a718aae9dbb0a50ac0849f92/thinc-8.3.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:07091c6b5faace50857c4cf0982204969d77388d0a6f156dd2442297dceeb838", size = 761024, upload_time = "2025-01-13T12:47:29.739Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/a3/3ec5e9d7cbebc3257b8223a3d188216b91ab6ec1e66b6fdd99d22394bc62/thinc-8.3.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd40ad71bcd8b1b9daa0462e1255b1c1e86e901c2fd773966601f44a95878032", size = 3710390, upload_time = "2025-01-13T12:47:33.019Z" },
+    { url = "https://files.pythonhosted.org/packages/40/ee/955c74e4e6ff2f694c99dcbbf7be8d478a8868503aeb3474517277c07667/thinc-8.3.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eb10823b3a3f1c6440998b11bf9a3571dd859feaed0fdb510a1c1097d9dc6a86", size = 4731524, upload_time = "2025-01-13T12:47:35.203Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/44/3786431e5c1eeebed3d7a4c97122896ca6d4a502b03d02c2171c417052fd/thinc-8.3.4-cp312-cp312-win_amd64.whl", hash = "sha256:b5e5e7bf5dae142fd50ed9785971292c4aab4d9ed18e4947653b6a0584d5227c", size = 1455883, upload_time = "2025-01-13T12:47:36.914Z" },
+]
+
 [[package]]
 name = "tqdm"
 version = "4.67.1"
@@ -2320,6 +2764,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/63/9a/0962b05b308494e3202d3f794a6e85abe471fe3cafdbcf95c2e8c713aabd/uvloop-0.21.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5c39f217ab3c663dc699c04cbd50c13813e31d917642d459fdcec07555cc553", size = 4660018, upload_time = "2024-10-14T23:38:10.888Z" },
 ]
 
+[[package]]
+name = "wasabi"
+version = "1.1.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ac/f9/054e6e2f1071e963b5e746b48d1e3727470b2a490834d18ad92364929db3/wasabi-1.1.3.tar.gz", hash = "sha256:4bb3008f003809db0c3e28b4daf20906ea871a2bb43f9914197d540f4f2e0878", size = 30391, upload_time = "2024-05-31T16:56:18.99Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/06/7c/34330a89da55610daa5f245ddce5aab81244321101614751e7537f125133/wasabi-1.1.3-py3-none-any.whl", hash = "sha256:f76e16e8f7e79f8c4c8be49b4024ac725713ab10cd7f19350ad18a8e3f71728c", size = 27880, upload_time = "2024-05-31T16:56:16.699Z" },
+]
+
 [[package]]
 name = "watchfiles"
 version = "1.0.5"
@@ -2369,6 +2825,26 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a8/b4/c57b99518fadf431f3ef47a610839e46e5f8abf9814f969859d1c65c02c7/watchfiles-1.0.5-cp313-cp313-win_amd64.whl", hash = "sha256:f436601594f15bf406518af922a89dcaab416568edb6f65c4e5bbbad1ea45c11", size = 291087, upload_time = "2025-04-08T10:35:52.458Z" },
 ]
 
+[[package]]
+name = "weasel"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cloudpathlib" },
+    { name = "confection" },
+    { name = "packaging" },
+    { name = "pydantic" },
+    { name = "requests" },
+    { name = "smart-open" },
+    { name = "srsly" },
+    { name = "typer" },
+    { name = "wasabi" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a7/1a/9c522dd61b52939c217925d3e55c95f9348b73a66a956f52608e1e59a2c0/weasel-0.4.1.tar.gz", hash = "sha256:aabc210f072e13f6744e5c3a28037f93702433405cd35673f7c6279147085aa9", size = 38417, upload_time = "2024-05-15T08:52:54.765Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/87/abd57374044e1f627f0a905ac33c1a7daab35a3a815abfea4e1bafd3fdb1/weasel-0.4.1-py3-none-any.whl", hash = "sha256:24140a090ea1ac512a2b2f479cc64192fd1d527a7f3627671268d08ed5ac418c", size = 50270, upload_time = "2024-05-15T08:52:52.977Z" },
+]
+
 [[package]]
 name = "websockets"
 version = "15.0.1"
@@ -2411,6 +2887,65 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload_time = "2025-03-05T20:03:39.41Z" },
 ]
 
+[[package]]
+name = "wrapt"
+version = "1.17.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload_time = "2025-08-12T05:53:21.714Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/52/db/00e2a219213856074a213503fdac0511203dceefff26e1daa15250cc01a0/wrapt-1.17.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:273a736c4645e63ac582c60a56b0acb529ef07f78e08dc6bfadf6a46b19c0da7", size = 53482, upload_time = "2025-08-12T05:51:45.79Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/30/ca3c4a5eba478408572096fe9ce36e6e915994dd26a4e9e98b4f729c06d9/wrapt-1.17.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5531d911795e3f935a9c23eb1c8c03c211661a5060aab167065896bbf62a5f85", size = 38674, upload_time = "2025-08-12T05:51:34.629Z" },
+    { url = "https://files.pythonhosted.org/packages/31/25/3e8cc2c46b5329c5957cec959cb76a10718e1a513309c31399a4dad07eb3/wrapt-1.17.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0610b46293c59a3adbae3dee552b648b984176f8562ee0dba099a56cfbe4df1f", size = 38959, upload_time = "2025-08-12T05:51:56.074Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/8f/a32a99fc03e4b37e31b57cb9cefc65050ea08147a8ce12f288616b05ef54/wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b32888aad8b6e68f83a8fdccbf3165f5469702a7544472bdf41f582970ed3311", size = 82376, upload_time = "2025-08-12T05:52:32.134Z" },
+    { url = "https://files.pythonhosted.org/packages/31/57/4930cb8d9d70d59c27ee1332a318c20291749b4fba31f113c2f8ac49a72e/wrapt-1.17.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cccf4f81371f257440c88faed6b74f1053eef90807b77e31ca057b2db74edb1", size = 83604, upload_time = "2025-08-12T05:52:11.663Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/f3/1afd48de81d63dd66e01b263a6fbb86e1b5053b419b9b33d13e1f6d0f7d0/wrapt-1.17.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8a210b158a34164de8bb68b0e7780041a903d7b00c87e906fb69928bf7890d5", size = 82782, upload_time = "2025-08-12T05:52:12.626Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/d7/4ad5327612173b144998232f98a85bb24b60c352afb73bc48e3e0d2bdc4e/wrapt-1.17.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:79573c24a46ce11aab457b472efd8d125e5a51da2d1d24387666cd85f54c05b2", size = 82076, upload_time = "2025-08-12T05:52:33.168Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/59/e0adfc831674a65694f18ea6dc821f9fcb9ec82c2ce7e3d73a88ba2e8718/wrapt-1.17.3-cp311-cp311-win32.whl", hash = "sha256:c31eebe420a9a5d2887b13000b043ff6ca27c452a9a22fa71f35f118e8d4bf89", size = 36457, upload_time = "2025-08-12T05:53:03.936Z" },
+    { url = "https://files.pythonhosted.org/packages/83/88/16b7231ba49861b6f75fc309b11012ede4d6b0a9c90969d9e0db8d991aeb/wrapt-1.17.3-cp311-cp311-win_amd64.whl", hash = "sha256:0b1831115c97f0663cb77aa27d381237e73ad4f721391a9bfb2fe8bc25fa6e77", size = 38745, upload_time = "2025-08-12T05:53:02.885Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/1e/c4d4f3398ec073012c51d1c8d87f715f56765444e1a4b11e5180577b7e6e/wrapt-1.17.3-cp311-cp311-win_arm64.whl", hash = "sha256:5a7b3c1ee8265eb4c8f1b7d29943f195c00673f5ab60c192eba2d4a7eae5f46a", size = 36806, upload_time = "2025-08-12T05:52:53.368Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/41/cad1aba93e752f1f9268c77270da3c469883d56e2798e7df6240dcb2287b/wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0", size = 53998, upload_time = "2025-08-12T05:51:47.138Z" },
+    { url = "https://files.pythonhosted.org/packages/60/f8/096a7cc13097a1869fe44efe68dace40d2a16ecb853141394047f0780b96/wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba", size = 39020, upload_time = "2025-08-12T05:51:35.906Z" },
+    { url = "https://files.pythonhosted.org/packages/33/df/bdf864b8997aab4febb96a9ae5c124f700a5abd9b5e13d2a3214ec4be705/wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd", size = 39098, upload_time = "2025-08-12T05:51:57.474Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/81/5d931d78d0eb732b95dc3ddaeeb71c8bb572fb01356e9133916cd729ecdd/wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828", size = 88036, upload_time = "2025-08-12T05:52:34.784Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/38/2e1785df03b3d72d34fc6252d91d9d12dc27a5c89caef3335a1bbb8908ca/wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9", size = 88156, upload_time = "2025-08-12T05:52:13.599Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/8b/48cdb60fe0603e34e05cffda0b2a4adab81fd43718e11111a4b0100fd7c1/wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396", size = 87102, upload_time = "2025-08-12T05:52:14.56Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/51/d81abca783b58f40a154f1b2c56db1d2d9e0d04fa2d4224e357529f57a57/wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc", size = 87732, upload_time = "2025-08-12T05:52:36.165Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/b1/43b286ca1392a006d5336412d41663eeef1ad57485f3e52c767376ba7e5a/wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe", size = 36705, upload_time = "2025-08-12T05:53:07.123Z" },
+    { url = "https://files.pythonhosted.org/packages/28/de/49493f962bd3c586ab4b88066e967aa2e0703d6ef2c43aa28cb83bf7b507/wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c", size = 38877, upload_time = "2025-08-12T05:53:05.436Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/48/0f7102fe9cb1e8a5a77f80d4f0956d62d97034bbe88d33e94699f99d181d/wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6", size = 36885, upload_time = "2025-08-12T05:52:54.367Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/f6/759ece88472157acb55fc195e5b116e06730f1b651b5b314c66291729193/wrapt-1.17.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0", size = 54003, upload_time = "2025-08-12T05:51:48.627Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/a9/49940b9dc6d47027dc850c116d79b4155f15c08547d04db0f07121499347/wrapt-1.17.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54a30837587c6ee3cd1a4d1c2ec5d24e77984d44e2f34547e2323ddb4e22eb77", size = 39025, upload_time = "2025-08-12T05:51:37.156Z" },
+    { url = "https://files.pythonhosted.org/packages/45/35/6a08de0f2c96dcdd7fe464d7420ddb9a7655a6561150e5fc4da9356aeaab/wrapt-1.17.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:16ecf15d6af39246fe33e507105d67e4b81d8f8d2c6598ff7e3ca1b8a37213f7", size = 39108, upload_time = "2025-08-12T05:51:58.425Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/37/6faf15cfa41bf1f3dba80cd3f5ccc6622dfccb660ab26ed79f0178c7497f/wrapt-1.17.3-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6fd1ad24dc235e4ab88cda009e19bf347aabb975e44fd5c2fb22a3f6e4141277", size = 88072, upload_time = "2025-08-12T05:52:37.53Z" },
+    { url = "https://files.pythonhosted.org/packages/78/f2/efe19ada4a38e4e15b6dff39c3e3f3f73f5decf901f66e6f72fe79623a06/wrapt-1.17.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ed61b7c2d49cee3c027372df5809a59d60cf1b6c2f81ee980a091f3afed6a2d", size = 88214, upload_time = "2025-08-12T05:52:15.886Z" },
+    { url = "https://files.pythonhosted.org/packages/40/90/ca86701e9de1622b16e09689fc24b76f69b06bb0150990f6f4e8b0eeb576/wrapt-1.17.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:423ed5420ad5f5529db9ce89eac09c8a2f97da18eb1c870237e84c5a5c2d60aa", size = 87105, upload_time = "2025-08-12T05:52:17.914Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/e0/d10bd257c9a3e15cbf5523025252cc14d77468e8ed644aafb2d6f54cb95d/wrapt-1.17.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e01375f275f010fcbf7f643b4279896d04e571889b8a5b3f848423d91bf07050", size = 87766, upload_time = "2025-08-12T05:52:39.243Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/cf/7d848740203c7b4b27eb55dbfede11aca974a51c3d894f6cc4b865f42f58/wrapt-1.17.3-cp313-cp313-win32.whl", hash = "sha256:53e5e39ff71b3fc484df8a522c933ea2b7cdd0d5d15ae82e5b23fde87d44cbd8", size = 36711, upload_time = "2025-08-12T05:53:10.074Z" },
+    { url = "https://files.pythonhosted.org/packages/57/54/35a84d0a4d23ea675994104e667ceff49227ce473ba6a59ba2c84f250b74/wrapt-1.17.3-cp313-cp313-win_amd64.whl", hash = "sha256:1f0b2f40cf341ee8cc1a97d51ff50dddb9fcc73241b9143ec74b30fc4f44f6cb", size = 38885, upload_time = "2025-08-12T05:53:08.695Z" },
+    { url = "https://files.pythonhosted.org/packages/01/77/66e54407c59d7b02a3c4e0af3783168fff8e5d61def52cda8728439d86bc/wrapt-1.17.3-cp313-cp313-win_arm64.whl", hash = "sha256:7425ac3c54430f5fc5e7b6f41d41e704db073309acfc09305816bc6a0b26bb16", size = 36896, upload_time = "2025-08-12T05:52:55.34Z" },
+    { url = "https://files.pythonhosted.org/packages/02/a2/cd864b2a14f20d14f4c496fab97802001560f9f41554eef6df201cd7f76c/wrapt-1.17.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cf30f6e3c077c8e6a9a7809c94551203c8843e74ba0c960f4a98cd80d4665d39", size = 54132, upload_time = "2025-08-12T05:51:49.864Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/46/d011725b0c89e853dc44cceb738a307cde5d240d023d6d40a82d1b4e1182/wrapt-1.17.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e228514a06843cae89621384cfe3a80418f3c04aadf8a3b14e46a7be704e4235", size = 39091, upload_time = "2025-08-12T05:51:38.935Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/9e/3ad852d77c35aae7ddebdbc3b6d35ec8013af7d7dddad0ad911f3d891dae/wrapt-1.17.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:5ea5eb3c0c071862997d6f3e02af1d055f381b1d25b286b9d6644b79db77657c", size = 39172, upload_time = "2025-08-12T05:51:59.365Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/f7/c983d2762bcce2326c317c26a6a1e7016f7eb039c27cdf5c4e30f4160f31/wrapt-1.17.3-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:281262213373b6d5e4bb4353bc36d1ba4084e6d6b5d242863721ef2bf2c2930b", size = 87163, upload_time = "2025-08-12T05:52:40.965Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/0f/f673f75d489c7f22d17fe0193e84b41540d962f75fce579cf6873167c29b/wrapt-1.17.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4a8d2b25efb6681ecacad42fca8859f88092d8732b170de6a5dddd80a1c8fa", size = 87963, upload_time = "2025-08-12T05:52:20.326Z" },
+    { url = "https://files.pythonhosted.org/packages/df/61/515ad6caca68995da2fac7a6af97faab8f78ebe3bf4f761e1b77efbc47b5/wrapt-1.17.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:373342dd05b1d07d752cecbec0c41817231f29f3a89aa8b8843f7b95992ed0c7", size = 86945, upload_time = "2025-08-12T05:52:21.581Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/bd/4e70162ce398462a467bc09e768bee112f1412e563620adc353de9055d33/wrapt-1.17.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d40770d7c0fd5cbed9d84b2c3f2e156431a12c9a37dc6284060fb4bec0b7ffd4", size = 86857, upload_time = "2025-08-12T05:52:43.043Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/b8/da8560695e9284810b8d3df8a19396a6e40e7518059584a1a394a2b35e0a/wrapt-1.17.3-cp314-cp314-win32.whl", hash = "sha256:fbd3c8319de8e1dc79d346929cd71d523622da527cca14e0c1d257e31c2b8b10", size = 37178, upload_time = "2025-08-12T05:53:12.605Z" },
+    { url = "https://files.pythonhosted.org/packages/db/c8/b71eeb192c440d67a5a0449aaee2310a1a1e8eca41676046f99ed2487e9f/wrapt-1.17.3-cp314-cp314-win_amd64.whl", hash = "sha256:e1a4120ae5705f673727d3253de3ed0e016f7cd78dc463db1b31e2463e1f3cf6", size = 39310, upload_time = "2025-08-12T05:53:11.106Z" },
+    { url = "https://files.pythonhosted.org/packages/45/20/2cda20fd4865fa40f86f6c46ed37a2a8356a7a2fde0773269311f2af56c7/wrapt-1.17.3-cp314-cp314-win_arm64.whl", hash = "sha256:507553480670cab08a800b9463bdb881b2edeed77dc677b0a5915e6106e91a58", size = 37266, upload_time = "2025-08-12T05:52:56.531Z" },
+    { url = "https://files.pythonhosted.org/packages/77/ed/dd5cf21aec36c80443c6f900449260b80e2a65cf963668eaef3b9accce36/wrapt-1.17.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ed7c635ae45cfbc1a7371f708727bf74690daedc49b4dba310590ca0bd28aa8a", size = 56544, upload_time = "2025-08-12T05:51:51.109Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/96/450c651cc753877ad100c7949ab4d2e2ecc4d97157e00fa8f45df682456a/wrapt-1.17.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:249f88ed15503f6492a71f01442abddd73856a0032ae860de6d75ca62eed8067", size = 40283, upload_time = "2025-08-12T05:51:39.912Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/86/2fcad95994d9b572db57632acb6f900695a648c3e063f2cd344b3f5c5a37/wrapt-1.17.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5a03a38adec8066d5a37bea22f2ba6bbf39fcdefbe2d91419ab864c3fb515454", size = 40366, upload_time = "2025-08-12T05:52:00.693Z" },
+    { url = "https://files.pythonhosted.org/packages/64/0e/f4472f2fdde2d4617975144311f8800ef73677a159be7fe61fa50997d6c0/wrapt-1.17.3-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5d4478d72eb61c36e5b446e375bbc49ed002430d17cdec3cecb36993398e1a9e", size = 108571, upload_time = "2025-08-12T05:52:44.521Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/01/9b85a99996b0a97c8a17484684f206cbb6ba73c1ce6890ac668bcf3838fb/wrapt-1.17.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223db574bb38637e8230eb14b185565023ab624474df94d2af18f1cdb625216f", size = 113094, upload_time = "2025-08-12T05:52:22.618Z" },
+    { url = "https://files.pythonhosted.org/packages/25/02/78926c1efddcc7b3aa0bc3d6b33a822f7d898059f7cd9ace8c8318e559ef/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e405adefb53a435f01efa7ccdec012c016b5a1d3f35459990afc39b6be4d5056", size = 110659, upload_time = "2025-08-12T05:52:24.057Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/ee/c414501ad518ac3e6fe184753632fe5e5ecacdcf0effc23f31c1e4f7bfcf/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:88547535b787a6c9ce4086917b6e1d291aa8ed914fdd3a838b3539dc95c12804", size = 106946, upload_time = "2025-08-12T05:52:45.976Z" },
+    { url = "https://files.pythonhosted.org/packages/be/44/a1bd64b723d13bb151d6cc91b986146a1952385e0392a78567e12149c7b4/wrapt-1.17.3-cp314-cp314t-win32.whl", hash = "sha256:41b1d2bc74c2cac6f9074df52b2efbef2b30bdfe5f40cb78f8ca22963bc62977", size = 38717, upload_time = "2025-08-12T05:53:15.214Z" },
+    { url = "https://files.pythonhosted.org/packages/79/d9/7cfd5a312760ac4dd8bf0184a6ee9e43c33e47f3dadc303032ce012b8fa3/wrapt-1.17.3-cp314-cp314t-win_amd64.whl", hash = "sha256:73d496de46cd2cdbdbcce4ae4bcdb4afb6a11234a1df9c085249d55166b95116", size = 41334, upload_time = "2025-08-12T05:53:14.178Z" },
+    { url = "https://files.pythonhosted.org/packages/46/78/10ad9781128ed2f99dbc474f43283b13fea8ba58723e98844367531c18e9/wrapt-1.17.3-cp314-cp314t-win_arm64.whl", hash = "sha256:f38e60678850c42461d4202739f9bf1e3a737c7ad283638251e79cc49effb6b6", size = 38471, upload_time = "2025-08-12T05:52:57.784Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload_time = "2025-08-12T05:53:20.674Z" },
+]
+
 [[package]]
 name = "xxhash"
 version = "3.5.0"