diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..dfdb8b77 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.sh text eol=lf diff --git a/.github/workflows/common_crawler.yaml b/.github/workflows/common_crawler.yaml deleted file mode 100644 index 52b4007d..00000000 --- a/.github/workflows/common_crawler.yaml +++ /dev/null @@ -1,40 +0,0 @@ -name: Common Crawler - -# Pull request will run every day at 1AM. -on: - workflow_dispatch: -env: - # The access token enabling write access to the Huggingface Database - HUGGINGFACE_ACCESS_TOKEN: ${{ secrets.HUGGINGFACE_ACCESS_TOKEN }} - -jobs: - build-and-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - with: - # This is necessary to push commits back to the repository - persist-credentials: true - fetch-depth: 0 # Fetch all history for all tags and branches - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.11.8 - - name: Upgrade pip - run: python -m pip install --upgrade pip - - name: Install dependencies - run: pip install -r source_collectors/common_crawler/requirements_common_crawler_action.txt - - name: Run script - run: python source_collectors/common_crawler/main.py CC-MAIN-2024-10 *.gov police --config source_collectors/common_crawler/config.ini --pages 20 - - name: Configure Git - run: | - git config --local user.email "action@github.com" - git config --local user.name "GitHub Action" - - name: Add common_crawler cache and common_crawler batch_info - run: | - git add source_collectors/common_crawler/data/cache.json - git add source_collectors/common_crawler/data/batch_info.csv - - name: Commit changes - run: git commit -m "Update common_crawler cache and batch_info" - - name: Push changes - run: git push \ No newline at end of file diff --git a/.github/workflows/populate_labelstudio.yml b/.github/workflows/populate_labelstudio.yml deleted file mode 100644 index 09ca68b2..00000000 --- a/.github/workflows/populate_labelstudio.yml +++ /dev/null @@ -1,94 +0,0 @@ -name: Populate LabelStudio - -on: - workflow_dispatch: - inputs: - crawl_id: - description: 'Common Crawl Corpus' - required: true - default: 'CC-MAIN-2024-10' - url: - description: 'URL type' - required: true - default: '*.gov' - keyword: - description: 'keyword' - required: true - default: 'police' - pages: - description: 'num pages' - required: true - default: '2' - record_type: - description: 'record type' - required: false - - -jobs: - run-script: - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - with: - ref: main - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.x' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r annotation_pipeline/requirements.txt - - - name: Run main script - env: - HUGGINGFACE_ACCESS_TOKEN: ${{ secrets.HUGGINGFACE_ACCESS_TOKEN }} - LABEL_STUDIO_ACCESS_TOKEN: ${{ secrets.LABEL_STUDIO_ACCESS_TOKEN }} - LABEL_STUDIO_PROJECT_ID: ${{ secrets.LABEL_STUDIO_PROJECT_ID }} - LABEL_STUDIO_ORGANIZATION: ${{ secrets.LABEL_STUDIO_ORGANIZATION }} - run: | - if [ -n "${{ github.event.inputs.record_type }}" ]; then - python annotation_pipeline/populate_labelstudio.py ${{ github.event.inputs.crawl_id }} "${{ github.event.inputs.url }}" ${{ github.event.inputs.keyword }} --pages ${{ github.event.inputs.pages }} --record_type "${{ github.event.inputs.record_type }}" - else - python annotation_pipeline/populate_labelstudio.py ${{ github.event.inputs.crawl_id }} "${{ github.event.inputs.url }}" ${{ github.event.inputs.keyword }} --pages ${{ github.event.inputs.pages }} - fi - - - name: Check created/modified files - run: | - echo "Checking files in annotation_pipeline/data/" - ls -R annotation_pipeline/data/ - - - name: Create new branch - run: | - BRANCH_NAME=bot-update-$(date +%Y%m%d%H%M%S) - git checkout -b $BRANCH_NAME - echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV - - - name: Commit and push outputs - run: | - git config --global user.name "github-actions[bot]" - git config --global user.email "action@github.com" - git add annotation_pipeline/data/batch_info.csv - git add annotation_pipeline/data/cache.json - if [ -d "annotation_pipeline/data/tag_collector" ]; then - git add annotation_pipeline/data/tag_collector/* - fi - git commit -m "Update batch info, cache, and collected urls & tags" - git log -1 --stat - git push --set-upstream origin $BRANCH_NAME - - - name: Create pull request - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - BRANCH_NAME: ${{ env.BRANCH_NAME }} - run: | - PR_TITLE="Update batch info, cache, and collected urls & tags" - PR_BODY="This PR was created automatically by a GitHub Action." - echo "Creating PR from branch $BRANCH_NAME to main" - curl -X POST -H "Authorization: token $GITHUB_TOKEN" \ - -d "{\"title\":\"$PR_TITLE\",\"body\":\"$PR_BODY\",\"head\":\"$BRANCH_NAME\",\"base\":\"main\"}" \ - https://api.github.com/repos/${{ github.repository }}/pulls diff --git a/.github/workflows/test_app.yml b/.github/workflows/test_app.yml index c83608ac..64bf664e 100644 --- a/.github/workflows/test_app.yml +++ b/.github/workflows/test_app.yml @@ -1,27 +1,12 @@ -# This workflow will test the Source Collector App -# Utilizing the docker-compose file in the root directory name: Test Source Collector App -on: pull_request -#jobs: -# build: -# runs-on: ubuntu-latest -# steps: -# - name: Checkout repository -# uses: actions/checkout@v4 -# - name: Run docker-compose -# uses: hoverkraft-tech/compose-action@v2.0.1 -# with: -# compose-file: "docker-compose.yml" -# - name: Execute tests in the running service -# run: | -# docker ps -a && docker exec data-source-identification-app-1 pytest /app/tests/test_automated +on: pull_request jobs: container-job: runs-on: ubuntu-latest timeout-minutes: 20 - container: python:3.12.8 + container: python:3.11.9 services: postgres: @@ -34,22 +19,28 @@ jobs: --health-timeout 5s --health-retries 5 + env: + POSTGRES_PASSWORD: postgres + POSTGRES_USER: postgres + POSTGRES_DB: postgres + POSTGRES_HOST: postgres + POSTGRES_PORT: 5432 + GOOGLE_API_KEY: TEST + GOOGLE_CSE_ID: TEST + steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt + + - name: Install uv and set the python version + uses: astral-sh/setup-uv@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install the project + run: uv sync --locked --all-extras --dev + - name: Run tests run: | - pytest tests/test_automated - pytest tests/test_alembic - env: - POSTGRES_PASSWORD: postgres - POSTGRES_USER: postgres - POSTGRES_DB: postgres - POSTGRES_HOST: postgres - POSTGRES_PORT: 5432 - GOOGLE_API_KEY: TEST - GOOGLE_CSE_ID: TEST + uv run pytest tests/test_automated + uv run pytest tests/test_alembic diff --git a/agency_identifier/__init__.py b/.project-root similarity index 100% rename from agency_identifier/__init__.py rename to .project-root diff --git a/Dockerfile b/Dockerfile index 8e64b85d..42736a8e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,52 @@ # Dockerfile for Source Collector FastAPI app -FROM python:3.12.8 +FROM python:3.11.9-slim +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ # Set working directory WORKDIR /app -# Copy project files -COPY . . +COPY pyproject.toml uv.lock ./ # Install dependencies -RUN pip install --no-cache-dir -r requirements.txt +ENV UV_PROJECT_ENVIRONMENT="/usr/local/" +RUN uv sync --locked --no-dev +# Must call from the root directory because uv does not add playwright to path +RUN playwright install-deps chromium +RUN playwright install chromium + + +# Copy project files +COPY api ./api +COPY collector_db ./collector_db +COPY collector_manager ./collector_manager +COPY core ./core +COPY html_tag_collector ./html_tag_collector +COPY hugging_face/url_relevance ./hugging_face/url_relevance +COPY hugging_face/url_record_type_labeling ./hugging_face/url_record_type_labeling +COPY hugging_face/HuggingFaceInterface.py ./hugging_face/HuggingFaceInterface.py +COPY source_collectors ./source_collectors +COPY util ./util +COPY alembic.ini ./alembic.ini +COPY alembic ./alembic +COPY apply_migrations.py ./apply_migrations.py +COPY security_manager ./security_manager +COPY pdap_api_client ./pdap_api_client +COPY execute.sh ./execute.sh +COPY .project-root ./.project-root + +COPY tests/conftest.py ./tests/conftest.py +COPY tests/__init__.py ./tests/__init__.py +COPY tests/test_automated ./tests/test_automated +COPY tests/test_alembic ./tests/test_alembic +COPY tests/helpers ./tests/helpers + +COPY llm_api_logic ./llm_api_logic # Expose the application port EXPOSE 80 -RUN chmod +x execute.sh \ No newline at end of file +RUN chmod +x execute.sh +# Use the below for ease of local development, but remove when pushing to GitHub +# Because there is no .env file in the repository (for security reasons) +#COPY .env ./.env diff --git a/ENV.md b/ENV.md index a8210fb9..fdd7d029 100644 --- a/ENV.md +++ b/ENV.md @@ -2,17 +2,43 @@ This page provides a full list, with description, of all the environment variabl Please ensure these are properly defined in a `.env` file in the root directory. -| Name | Description | Example | -|--------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| -| `LABEL_STUDIO_ACCESS_TOKEN` | The access token for the Label Studio API. The access token for the Label Studio API. This can be obtained by logging into Label Studio and navigating to the [user account section](https://app.heartex.com/user/account), where the access token can be copied. | `abc123` | -| `LABEL_STUDIO_PROJECT_ID` | The project ID for the Label Studio API. This can be obtained by logging into Label Studio and navigating to the relevant project, where the project id will be in the URL, as in `https://app.heartex.com/projects/58475/` | `58475` | -| `LABEL_STUDIO_ORGANIZATION_ID` | The organization ID for the Label Studio API. This can be obtained by logging into Label Studio and navigating to the [Organization section](https://app.heartex.com/organization?page=1), where the organization ID can be copied. | `6758` | -| `GOOGLE_API_KEY` | The API key required for accessing the Google Custom Search API | `abc123` | -| `GOOGLE_CSE_ID` | The CSE ID required for accessing the Google Custom Search API | `abc123` | -|`POSTGRES_USER` | The username for the test database | `test_source_collector_user` | -|`POSTGRES_PASSWORD` | The password for the test database | `HanviliciousHamiltonHilltops` | -|`POSTGRES_DB` | The database name for the test database | `source_collector_test_db` | -|`POSTGRES_HOST` | The host for the test database | `127.0.0.1` | -|`POSTGRES_PORT` | The port for the test database | `5432` | -|`DS_APP_SECRET_KEY`| The secret key used for decoding JWT tokens produced by the Data Sources App. Must match the secret token that is used in the Data Sources App for encoding. |`abc123`| -|`DEV`| Set to any value to run the application in development mode. |`true`| +| Name | Description | Example | +|----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------| +| `GOOGLE_API_KEY` | The API key required for accessing the Google Custom Search API | `abc123` | +| `GOOGLE_CSE_ID` | The CSE ID required for accessing the Google Custom Search API | `abc123` | +|`POSTGRES_USER` | The username for the test database | `test_source_collector_user` | +|`POSTGRES_PASSWORD` | The password for the test database | `HanviliciousHamiltonHilltops` | +|`POSTGRES_DB` | The database name for the test database | `source_collector_test_db` | +|`POSTGRES_HOST` | The host for the test database | `127.0.0.1` | +|`POSTGRES_PORT` | The port for the test database | `5432` | +|`DS_APP_SECRET_KEY`| The secret key used for decoding JWT tokens produced by the Data Sources App. Must match the secret token `JWT_SECRET_KEY` that is used in the Data Sources App for encoding. | `abc123` | +|`DEV`| Set to any value to run the application in development mode. | `true` | +|`DEEPSEEK_API_KEY`| The API key required for accessing the DeepSeek API. | `abc123` | +|`OPENAI_API_KEY`| The API key required for accessing the OpenAI API. | `abc123` | +|`PDAP_EMAIL`| An email address for accessing the PDAP API.[^1] | `abc123@test.com` | +|`PDAP_PASSWORD`| A password for accessing the PDAP API.[^1] | `abc123` | +|`PDAP_API_KEY`| An API key for accessing the PDAP API. | `abc123` | +|`PDAP_API_URL`| The URL for the PDAP API| `https://data-sources-v2.pdap.dev/api`| +|`DISCORD_WEBHOOK_URL`| The URL for the Discord webhook used for notifications| `abc123` | + +[^1:] The user account in question will require elevated permissions to access certain endpoints. At a minimum, the user will require the `source_collector` and `db_write` permissions. + +## Foreign Data Wrapper (FDW) +``` +FDW_DATA_SOURCES_HOST=127.0.0.1 # The host of the Data Sources Database, used for FDW setup +FDW_DATA_SOURCES_PORT=1234 # The port of the Data Sources Database, used for FDW setup +FDW_DATA_SOURCES_USER=fdw_user # The username for the Data Sources Database, used for FDW setup +FDW_DATA_SOURCES_PASSWORD=password # The password for the Data Sources Database, used for FDW setup +FDW_DATA_SOURCES_DB=db_name # The database name for the Data Sources Database, used for FDW setup + +``` + +## Data Dumper + +``` +PROD_DATA_SOURCES_HOST=127.0.0.1 # The host of the production Data Sources Database, used for Data Dumper +PROD_DATA_SOURCES_PORT=1234 # The port of the production Data Sources Database, used for Data Dumper +PROD_DATA_SOURCES_USER=dump_user # The username for the production Data Sources Database, used for Data Dumper +PROD_DATA_SOURCES_PASSWORD=password # The password for the production Data Sources Database, used for Data Dumper +PROD_DATA_SOURCES_DB=db_name # The database name for the production Data Sources Database, used for Data Dumper +``` \ No newline at end of file diff --git a/README.md b/README.md index 5a39d2bd..78b6fbfe 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,12 @@ core | A module which integrates other components, such as collector_manager and api | API for interacting with collector_manager, core, and collector_db local_database | Resources for setting up a test database for local development +## Installation + +``` +uv sync +``` + ## How to use 1. Create an .env file in this directory with these contents, or set the environment variable another way: `VUE_APP_PDAP_API_KEY=KeyGoesHere` diff --git a/agency_identifier/README.md b/agency_identifier/README.md deleted file mode 100644 index c1dadcf2..00000000 --- a/agency_identifier/README.md +++ /dev/null @@ -1,40 +0,0 @@ -# Agency Identifier - -The Agency Identifier is a Python application that matches URLs with an agency from the PDAP database. It takes a list of URLs as input, either from a CSV file or a DataFrame, and returns a DataFrame with the matched agencies. - -## How to use - -### Running from the command line - -1. Clone the repository. -2. Create a CSV file containing a list of URLs to be identified. The URLs should be listed one per line, and the file should have at least a "url" column. -3. Run the command `python3 identifier.py [url_file]`, replacing `[url_file]` with the path to your CSV file. -4. The results will be written to a file named `results.csv` in the same directory. - -### Using the "identifier_main" function - -If you're using the Agency Identifier in your own Python code, you can import the `process_and_write_data` function. This function takes a DataFrame as an argument and returns a DataFrame with the matched agencies. - -Here's an example of how to use it: - -```python -import polar as pl -from identifier import process_and_write_data - -# Create a DataFrame with the URLs to be identified -df = pl.DataFrame({"url": ["http://agency1.com/page1", "http://agency2.com/page2"]}) - -# Call the identifier_main function -result = process_and_write_data(df) - -# Print the resulting DataFrame -print(result) -``` - -# Requirements - -- Python 3 -- urllib -- re -- polars -- requests \ No newline at end of file diff --git a/agency_identifier/identifier.py b/agency_identifier/identifier.py deleted file mode 100644 index 786aeba6..00000000 --- a/agency_identifier/identifier.py +++ /dev/null @@ -1,234 +0,0 @@ -import os -import re -import sys -from urllib.parse import urlparse - -import polars -import requests - -API_URL = "https://data-sources.pdap.io/api/agencies/" - - -def get_page_data(page: int) -> dict: - """Fetches a page of data from the API. - - Args: - page (int): The page number to fetch. - - Returns: - dict: The data for the page. - """ - api_key = "Bearer " + os.getenv("VUE_APP_PDAP_API_KEY") - response = requests.get(f"{API_URL}{page}", headers={"Authorization": api_key}) - if response.status_code != 200: - raise Exception("Request to PDAP API failed. Response code:", response.status_code) - return response.json()["data"] - - -def get_agencies_data() -> polars.DataFrame: - """Retrives a list of agency dictionaries from file. - - Returns: - list: List of agency dictionaries. - """ - page = 1 - agencies_df = polars.DataFrame() - results = get_page_data(page) - - while results: - # Use list comprehension to clean results - clean_results = clean_page_data_results(results) - new_agencies_df = polars.DataFrame(clean_results) - if not new_agencies_df.is_empty(): - agencies_df = polars.concat([agencies_df, new_agencies_df]) - page += 1 - results = get_page_data(page) - - return agencies_df - - -def clean_page_data_results(results: list[dict[str, str]]) -> list[dict[str, str]]: - clean_results = [] - for result in results: - clean_result = {} - for k, v in result.items(): - if v is None: - clean_result[k] = "" - else: - clean_result[k] = v - clean_results.append(clean_result) - return clean_results - - -def parse_hostname(url: str) -> str: - """Retrieves the hostname (example.com) from a url string. - - Args: - url (str): Url to parse. - - Returns: - str: The url's hostname. - """ - try: - # Remove leading and trailing whitespaces and quotes - url = url.strip().strip('"') - - # Add "http://" to the url if it's not present - if not re.match(r'http(s)?://', url): - url = "http://" + url - - # Parse the url and retrieve the hostname - parsed_url = urlparse(url) - hostname = parsed_url.hostname - - # Remove "www." from the hostname - hostname = re.sub(r'^www\.', '', hostname) - except Exception as e: - print(f"An error occurred while parsing the URL: {e}") - raise e - return hostname - - -def remove_http(url: str) -> str: - """Removes http(s)://www. from a given url so that different protocols don't throw off the matcher. - - Args: - url (str): Url to remove http from. - - Returns: - str: The url without http(s)://www. - """ - try: - # Remove http(s)://www. and www. prefixes from the url - url = re.sub(r'^(http(s)?://)?(www\.)?', '', url) - # Ensure the url ends with a / - if not url.endswith('/'): - url += '/' - except Exception as e: - print(f"An error occurred while processing the URL: {e}") - raise e - return url - - -def match_agencies(agencies, agency_hostnames, url): - """Attempts to match a url with an agency. - - Args: - agencies (list): List of agency dictionaries. - agency_hostnames (list): List of corresponding agency hostnames. - url (str): Url to match. - - Returns: - dict: Dictionary of a match in the form {"url": url, "agency": matched_agency}. - """ - url = url.strip().strip('"') - url_hostname = parse_hostname(url) - - if url_hostname in agency_hostnames: - # All agencies with the same hostname as the url are found - matched_agency = [ - agencies[i] for i, agency_hostname in enumerate(agency_hostnames) if url_hostname == agency_hostname - ] - else: - return {"url": url, "agency": [], "status": "No match found"} - - # More than one agency was found - if len(matched_agency) > 1: - url_no_http = remove_http(url) - - for agency in matched_agency: - agency_homepage = remove_http(agency["homepage_url"]) - # It is assumed that if the url begins with the agency's url, then it belongs to that agency - if url_no_http.startswith(agency_homepage): - return {"url": url, "agency": agency, "status": "Match found"} - break - - return {"url": url, "agency": [], "status": "Contested match"} - - return {"url": url, "agency": matched_agency[0], "status": "Match found"} - - -def match_urls_to_agencies_and_clean_data(urls_df: polars.DataFrame) -> polars.DataFrame: - agencies_df = get_agencies_data() - # Filter out agencies without a homepage_url set - # Define column names as variables for flexibility - homepage_url_col = "homepage_url" - hostname_col = "hostname" - count_data_sources_col = "count_data_sources" - max_data_sources_col = "max_data_sources" - - # Perform operations on DataFrame - try: - agencies_df = ( - agencies_df - # Filter out rows without a homepage_url - .filter(polars.col(homepage_url_col).is_not_null()) - .filter(polars.col(homepage_url_col) != "") - # Add a new column 'hostname' by applying the parse_hostname function to 'homepage_url' - .with_columns(polars.col(homepage_url_col).map_elements(parse_hostname).alias(hostname_col), - polars.col(count_data_sources_col).fill_null(0)) - # Add a new column 'max_data_sources' which is the max of 'count_data_sources' over 'hostname' - .with_columns(polars.col(count_data_sources_col).max().over(hostname_col).alias(max_data_sources_col)) - # Filter rows where 'count_data_sources' equals 'max_data_sources' - .filter(polars.col(count_data_sources_col) == polars.col(max_data_sources_col)) - # Keep only unique rows based on 'homepage_url' - .unique(subset=[homepage_url_col]) - ) - print("Indentifying agencies...") - # Add a new column 'hostname' by applying the parse_hostname function to 'url' - urls_df = urls_df.with_columns(polars.col("url").map_elements(parse_hostname).alias("hostname")) - - # Join urls_df with agencies_df on 'hostname' - matched_agencies_df = urls_df.join(agencies_df, on="hostname", how="left") - - # Replace all null values with an empty string - matched_agencies_clean_df = matched_agencies_df.with_columns(polars.all().fill_null("")) - except Exception as e: - print(f"An error occurred while processing the data: {e}") - raise e - return matched_agencies_clean_df - - -def read_data(file_path: str) -> polars.DataFrame: - try: - return polars.read_csv(file_path) - except Exception as e: - print(f"An error occurred while reading the file: {e}") - raise e - - -def write_data(df: polars.DataFrame, file_path: str): - try: - df.write_csv(file_path) - print("Results written to results.csv") - except Exception as e: - print(f"An error occurred while writing to the file: {e}") - raise e - - -def process_data(urls_df: polars.DataFrame) -> polars.DataFrame: - matched_agencies_df = match_urls_to_agencies_and_clean_data(urls_df) - - # Filter out rows where the hostname is not null - matches_only = matched_agencies_df.filter(polars.col("hostname").is_not_null()) - num_matches = len(matches_only) - num_urls = len(urls_df) - percent_urls_matched = 100 * float(num_matches) / float(num_urls) - - # Print the number and percentage of URLs that were matched - print(f"\n{num_matches} / {num_urls} ({percent_urls_matched:0.1f}%) of urls identified") - - # Return the DataFrame containing only the matched URLs - return matches_only - - -def process_and_write_data(input_file: str, output_file: str): - urls_df = read_data(input_file) - matches_only = process_data(urls_df) - if not matches_only.is_empty(): - write_data(matches_only, output_file) - - -if __name__ == "__main__": - process_and_write_data(sys.argv[1], "results.csv") - print("Results written to results.csv") diff --git a/alembic.ini b/alembic.ini index 7cc1a0d5..cfa2db9a 100644 --- a/alembic.ini +++ b/alembic.ini @@ -3,13 +3,13 @@ [alembic] # path to migration scripts # Use forward slashes (/) also on windows to provide an os agnostic path -script_location = collector_db/alembic +script_location = alembic # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s # Uncomment the line below if you want the files to be prepended with date and time # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file # for all available tokens -# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s +file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s # sys.path path, will be prepended to sys.path if present. # defaults to the current working directory. diff --git a/collector_db/alembic/README.md b/alembic/README.md similarity index 100% rename from collector_db/alembic/README.md rename to alembic/README.md diff --git a/collector_db/alembic/env.py b/alembic/env.py similarity index 82% rename from collector_db/alembic/env.py rename to alembic/env.py index 69587988..7eaa1a8b 100644 --- a/collector_db/alembic/env.py +++ b/alembic/env.py @@ -1,3 +1,4 @@ +from datetime import datetime from logging.config import fileConfig from alembic import context @@ -59,6 +60,13 @@ def run_migrations_online() -> None: and associate a connection with the context. """ + + def process_revision_directives(context, revision, directives): + # 20210801211024 for a migration generated on Aug 1st, 2021 at 21:10:24 + rev_id = datetime.now().strftime("%Y%m%d%H%M%S") + for directive in directives: + directive.rev_id = rev_id + connectable = engine_from_config( config.get_section(config.config_ini_section, {}), prefix="sqlalchemy.", @@ -67,7 +75,9 @@ def run_migrations_online() -> None: with connectable.connect() as connection: context.configure( - connection=connection, target_metadata=target_metadata + connection=connection, + target_metadata=target_metadata, + process_revision_directives=process_revision_directives ) with context.begin_transaction(): diff --git a/collector_db/alembic/script.py.mako b/alembic/script.py.mako similarity index 100% rename from collector_db/alembic/script.py.mako rename to alembic/script.py.mako diff --git a/alembic/versions/072b32a45b1c_add_task_tables_and_linking_logic.py b/alembic/versions/072b32a45b1c_add_task_tables_and_linking_logic.py new file mode 100644 index 00000000..f408396f --- /dev/null +++ b/alembic/versions/072b32a45b1c_add_task_tables_and_linking_logic.py @@ -0,0 +1,79 @@ +"""Add Task Tables and linking logic + +Revision ID: 072b32a45b1c +Revises: dae00e5aa8dd +Create Date: 2025-01-27 15:48:02.713484 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from collector_db.enums import PGEnum + +# revision identifiers, used by Alembic. +revision: str = '072b32a45b1c' +down_revision: Union[str, None] = 'dae00e5aa8dd' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +task_type = PGEnum( + 'HTML', + 'Relevancy', + 'Record Type', + name='task_type', +) + + +def upgrade() -> None: + op.create_table('tasks', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('task_type', task_type, nullable=False), + sa.Column( + 'task_status', + PGEnum( + 'complete', 'error', 'in-process', 'aborted', + name='batch_status', + create_type=False + ), + nullable=False + ), + sa.Column('updated_at', sa.TIMESTAMP(), server_default=sa.text('now()'), nullable=False), + sa.PrimaryKeyConstraint('id'), + ) + op.create_table('task_errors', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('task_id', sa.Integer(), nullable=False), + sa.Column('error', sa.Text(), nullable=False), + sa.Column('updated_at', sa.TIMESTAMP(), server_default=sa.text('now()'), nullable=False), + sa.ForeignKeyConstraint(['task_id'], ['tasks.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('link_task_urls', + sa.Column('task_id', sa.Integer(), nullable=False), + sa.Column('url_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['task_id'], ['tasks.id'], ondelete='CASCADE'), + sa.ForeignKeyConstraint(['url_id'], ['urls.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('task_id', 'url_id'), + sa.UniqueConstraint('task_id', 'url_id', name='uq_task_id_url_id') + ) + # Change to URL Error Info requires deleting prior data + op.execute("DELETE FROM url_error_info;") + + op.add_column('url_error_info', sa.Column('task_id', sa.Integer(), nullable=False)) + op.add_column("url_metadata", sa.Column('notes', sa.Text(), nullable=True)) + op.create_unique_constraint('uq_url_id_error', 'url_error_info', ['url_id', 'task_id']) + op.create_foreign_key("url_error_info_task_id_fkey", 'url_error_info', 'tasks', ['task_id'], ['id']) + + +def downgrade() -> None: + + op.drop_constraint("url_error_info_task_id_fkey", 'url_error_info', type_='foreignkey') + op.drop_constraint('uq_url_id_error', 'url_error_info', type_='unique') + op.drop_column('url_error_info', 'task_id') + op.drop_table('link_task_urls') + op.drop_table('task_errors') + op.drop_table('tasks') + + task_type.drop(op.get_bind(), checkfirst=True) diff --git a/collector_db/alembic/versions/108dac321086_update_metadata_validation_status.py b/alembic/versions/108dac321086_update_metadata_validation_status.py similarity index 81% rename from collector_db/alembic/versions/108dac321086_update_metadata_validation_status.py rename to alembic/versions/108dac321086_update_metadata_validation_status.py index 5212865a..aa05ee1b 100644 --- a/collector_db/alembic/versions/108dac321086_update_metadata_validation_status.py +++ b/alembic/versions/108dac321086_update_metadata_validation_status.py @@ -43,13 +43,13 @@ def upgrade() -> None: def downgrade() -> None: validation_status.create(op.get_bind()) - - op.alter_column( - table_name="url_metadata", - column_name="validation_status", - existing_type=metadata_validation_status, - type_=validation_status, - postgresql_using="validation_status::text::validation_status" - ) + # + # op.alter_column( + # table_name="url_metadata", + # column_name="validation_status", + # existing_type=metadata_validation_status, + # type_=validation_status, + # postgresql_using="validation_status::text::validation_status" + # ) metadata_validation_status.drop(op.get_bind(), checkfirst=True) diff --git a/alembic/versions/19bf57df581a_add_url_agency_suggestions.py b/alembic/versions/19bf57df581a_add_url_agency_suggestions.py new file mode 100644 index 00000000..608fcd1b --- /dev/null +++ b/alembic/versions/19bf57df581a_add_url_agency_suggestions.py @@ -0,0 +1,69 @@ +"""Add url_agency_suggestions + +Revision ID: 19bf57df581a +Revises: 072b32a45b1c +Create Date: 2025-02-02 10:33:02.029875 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from collector_db.enums import PGEnum +# revision identifiers, used by Alembic. +revision: str = '19bf57df581a' +down_revision: Union[str, None] = '072b32a45b1c' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +suggestion_type_enum = PGEnum( + 'Auto Suggestion', + 'Manual Suggestion', + 'Unknown', + 'New Agency', + 'Confirmed', name='url_agency_suggestion_type' +) + +old_task_options = ( + 'HTML', + 'Relevancy', + 'Record Type', +) +new_task_options = old_task_options + ('Agency Identification',) + +old_task_type_enum = PGEnum( + *old_task_options, + name='task_type_old' +) + +new_task_type_enum = PGEnum( + *new_task_options, + name='task_type' +) + +def upgrade() -> None: + op.execute("ALTER TYPE task_type ADD VALUE 'Agency Identification';") + op.create_table('url_agency_suggestions', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('url_id', sa.Integer(), nullable=False), + sa.Column('suggestion_type', suggestion_type_enum, nullable=False), + sa.Column('agency_id', sa.Integer(), nullable=True), + sa.Column('agency_name', sa.String(), nullable=True), + sa.Column('state', sa.String(), nullable=True), + sa.Column('county', sa.String(), nullable=True), + sa.Column('locality', sa.String(), nullable=True), + sa.Column('updated_at', sa.TIMESTAMP(), server_default=sa.text('now()'), nullable=False), + sa.ForeignKeyConstraint(['url_id'], ['urls.id'], ), + sa.PrimaryKeyConstraint('id') + ) + + +def downgrade() -> None: + op.drop_table('url_agency_suggestions') + suggestion_type_enum.drop(op.get_bind(), checkfirst=True) + old_task_type_enum.create(op.get_bind()) + op.execute("DELETE FROM TASKS;") + op.execute("ALTER TABLE tasks ALTER COLUMN task_type TYPE task_type_old USING task_type::text::task_type_old;") + new_task_type_enum.drop(op.get_bind(), checkfirst=True) + op.execute("ALTER TYPE task_type_old RENAME TO task_type;") diff --git a/alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py b/alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py new file mode 100644 index 00000000..b081ec9d --- /dev/null +++ b/alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py @@ -0,0 +1,44 @@ +"""Update confirmed_url_agency unique constraint to be only url_id + +Revision ID: 0c6dc00806ce +Revises: 76f902fe18cd +Create Date: 2025-02-23 08:55:07.046607 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '0c6dc00806ce' +down_revision: Union[str, None] = '76f902fe18cd' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.drop_constraint( + constraint_name="uq_confirmed_url_agency", + table_name="confirmed_url_agency", + ) + + op.create_unique_constraint( + constraint_name="uq_confirmed_url_agency", + table_name="confirmed_url_agency", + columns=["url_id"], + ) + + +def downgrade() -> None: + op.drop_constraint( + constraint_name="uq_confirmed_url_agency", + table_name="confirmed_url_agency", + ) + + op.create_unique_constraint( + constraint_name="uq_confirmed_url_agency", + table_name="confirmed_url_agency", + columns=["url_id", "agency_id"], + ) \ No newline at end of file diff --git a/alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py b/alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py new file mode 100644 index 00000000..55442f50 --- /dev/null +++ b/alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py @@ -0,0 +1,294 @@ +"""Overhaul annotation organization + +New Tables +- AutoRelevantSuggestions +- AutoRecordTypeSuggestions +- UserRelevantSuggestions +- UserRecordTypeSuggestions + +New Columns for `URL` +- `agency_id` +- `record_type` +- `relevant` + +Removed Tables +- `URLMetadata` +- `ConfirmedURLAgency` +- `MetadataAnnotation` + +Update URL Status to just three enum value: +- VALIDATED +- SUBMITTED +- PENDING + +Revision ID: 33421c0590bb +Revises: 0c6dc00806ce +Create Date: 2025-02-23 10:23:19.696248 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy import UniqueConstraint + +from util.alembic_helpers import switch_enum_type + +# revision identifiers, used by Alembic. +revision: str = '33421c0590bb' +down_revision: Union[str, None] = '0c6dc00806ce' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +record_type_values = [ + "Accident Reports", + "Arrest Records", + "Calls for Service", + "Car GPS", + "Citations", + "Dispatch Logs", + "Dispatch Recordings", + "Field Contacts", + "Incident Reports", + "Misc Police Activity", + "Officer Involved Shootings", + "Stops", + "Surveys", + "Use of Force Reports", + "Vehicle Pursuits", + "Complaints & Misconduct", + "Daily Activity Logs", + "Training & Hiring Info", + "Personnel Records", + "Annual & Monthly Reports", + "Budgets & Finances", + "Contact Info & Agency Meta", + "Geographic", + "List of Data Sources", + "Policies & Contracts", + "Crime Maps & Reports", + "Crime Statistics", + "Media Bulletins", + "Records Request Info", + "Resources", + "Sex Offender Registry", + "Wanted Persons", + "Booking Reports", + "Court Cases", + "Incarceration Records", + "Other" +] + + +record_type_enum = sa.Enum(*record_type_values, name='record_type') + +def run_data_migrations(): + + op.execute( + """ + INSERT INTO AUTO_RELEVANT_SUGGESTIONS (url_id, relevant) + SELECT url_id, LOWER(value)::boolean + FROM public.url_metadata + WHERE validation_source = 'Machine Learning' + and attribute = 'Relevant' + """ + ) + + op.execute( + """ + INSERT INTO AUTO_RECORD_TYPE_SUGGESTIONS(url_id, record_type) + SELECT url_id, value::record_type + FROM public.url_metadata + WHERE validation_source = 'Machine Learning' + and attribute = 'Record Type' + """ + ) + + op.execute( + """ + INSERT INTO USER_RELEVANT_SUGGESTIONS(url_id, relevant, user_id) + SELECT um.url_id, LOWER(um.value)::boolean, ma.user_id + FROM public.url_metadata um + INNER join metadata_annotations ma on um.id = ma.metadata_id + where um.attribute = 'Relevant' + """ + ) + + op.execute( + """ + INSERT INTO USER_RECORD_TYPE_SUGGESTIONS(url_id, record_type, user_id) + SELECT um.url_id, um.value::record_type, ma.user_id + FROM public.url_metadata um + INNER join metadata_annotations ma on um.id = ma.metadata_id + where um.attribute = 'Record Type' + + """ + ) + +def upgrade() -> None: + + # Create the new tables + op.create_table( + 'auto_relevant_suggestions', + sa.Column('id', sa.Integer(), primary_key=True), + sa.Column('url_id', sa.Integer(), sa.ForeignKey('urls.id', ondelete='CASCADE'), nullable=False), + sa.Column('relevant', sa.Boolean(), nullable=False), + sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')), + sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')), + UniqueConstraint( + 'url_id', + name='auto_relevant_suggestions_uq_url_id' + ) + ) + + op.create_table( + 'auto_record_type_suggestions', + sa.Column('id', sa.Integer(), primary_key=True), + sa.Column( + 'url_id', + sa.Integer(), + sa.ForeignKey('urls.id', ondelete='CASCADE'), + nullable=False + ), + sa.Column('record_type', record_type_enum, nullable=False), + sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')), + sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')), + UniqueConstraint( + 'url_id', + name='auto_record_type_suggestions_uq_url_id' + ) + ) + + op.create_table( + 'user_relevant_suggestions', + sa.Column('id', sa.Integer(), primary_key=True), + sa.Column( + 'url_id', + sa.Integer(), + sa.ForeignKey('urls.id', ondelete='CASCADE'), + nullable=False + ), + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('relevant', sa.Boolean(), nullable=False), + sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')), + sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')), + sa.UniqueConstraint("url_id", "user_id", name="uq_user_relevant_suggestions") + ) + + op.create_table( + 'user_record_type_suggestions', + sa.Column('id', sa.Integer(), primary_key=True), + sa.Column( + 'url_id', + sa.Integer(), + sa.ForeignKey('urls.id', ondelete='CASCADE'), + nullable=False + ), + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('record_type', record_type_enum, nullable=False), + sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')), + sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')), + sa.UniqueConstraint("url_id", "user_id", name="uq_user_record_type_suggestions") + ) + + # Add the new columns + op.add_column( + 'urls', + sa.Column('record_type', record_type_enum, nullable=True) + ) + + op.add_column( + 'urls', + sa.Column('relevant', sa.Boolean(), nullable=True) + ) + + op.add_column( + 'urls', + sa.Column( + 'agency_id', + sa.Integer(), + sa.ForeignKey('agencies.agency_id', ondelete='NO ACTION'), + nullable=True + ) + ) + + run_data_migrations() + + # Delete the old tables + op.drop_table('metadata_annotations') + op.drop_table('url_metadata') + op.drop_table('confirmed_url_agency') + + switch_enum_type( + table_name='urls', + column_name='outcome', + enum_name='url_status', + new_enum_values=['pending', 'submitted', 'validated', 'error', 'duplicate'] + ) + + + + + +def downgrade() -> None: + # Drop the new tables + op.drop_table('auto_relevant_suggestions') + op.drop_table('auto_record_type_suggestions') + op.drop_table('user_relevant_suggestions') + op.drop_table('user_record_type_suggestions') + + # Drop the new columns + op.drop_column('urls', 'record_type') + op.drop_column('urls', 'relevant') + op.drop_column('urls', 'agency_id') + + # Create the old tables + op.create_table( + 'url_metadata', + sa.Column('id', sa.Integer(), primary_key=True), + sa.Column('url_id', sa.Integer(), sa.ForeignKey('urls.id', ondelete='CASCADE'), nullable=False), + sa.Column('attribute', sa.String(), nullable=False), + sa.Column('value', sa.String(), nullable=False), + sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')), + sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')), + sa.UniqueConstraint( + "url_id", + "attribute", + name="uq_url_id_attribute"), + ) + + op.create_table( + 'confirmed_url_agency', + sa.Column('id', sa.Integer(), primary_key=True), + sa.Column('url_id', sa.Integer(), sa.ForeignKey('urls.id', ondelete='CASCADE'), nullable=False), + sa.Column( + 'agency_id', + sa.Integer(), + sa.ForeignKey('agencies.agency_id', ondelete='CASCADE'), nullable=False), + sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')), + sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')), + sa.UniqueConstraint("url_id", name="uq_confirmed_url_agency") + ) + + op.create_table( + 'metadata_annotations', + sa.Column('id', sa.Integer(), primary_key=True), + sa.Column('metadata_id', sa.Integer(), sa.ForeignKey('url_metadata.id', ondelete='CASCADE'), nullable=False), + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')), + sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')), + sa.UniqueConstraint( + "user_id", + "metadata_id", + name="metadata_annotations_uq_user_id_metadata_id"), + ) + + switch_enum_type( + table_name='urls', + column_name='outcome', + enum_name='url_status', + new_enum_values=['pending', 'submitted', 'human_labeling', 'rejected', 'duplicate', 'error'] + ) + + # Drop enum + record_type_enum.drop(op.get_bind()) diff --git a/alembic/versions/2025_03_11_1539-69f7cc4f56d4_create_approving_user_url_table.py b/alembic/versions/2025_03_11_1539-69f7cc4f56d4_create_approving_user_url_table.py new file mode 100644 index 00000000..f38d33dc --- /dev/null +++ b/alembic/versions/2025_03_11_1539-69f7cc4f56d4_create_approving_user_url_table.py @@ -0,0 +1,34 @@ +"""Create approving_user_url table + +Revision ID: 69f7cc4f56d4 +Revises: 33421c0590bb +Create Date: 2025-03-11 15:39:27.563567 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '69f7cc4f56d4' +down_revision: Union[str, None] = '33421c0590bb' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + 'approving_user_url', + sa.Column('id', sa.Integer(), nullable=False, primary_key=True), + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('url_id', sa.Integer(), nullable=False), + sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')), + sa.ForeignKeyConstraint(['url_id'], ['urls.id'], ), + sa.UniqueConstraint('url_id', name='approving_user_url_uq_user_id_url_id') + ) + + +def downgrade() -> None: + op.drop_table('approving_user_url') diff --git a/alembic/versions/2025_03_15_1745-6eb8084e2f48_add_name_description_and_url_optional_.py b/alembic/versions/2025_03_15_1745-6eb8084e2f48_add_name_description_and_url_optional_.py new file mode 100644 index 00000000..e8b542f9 --- /dev/null +++ b/alembic/versions/2025_03_15_1745-6eb8084e2f48_add_name_description_and_url_optional_.py @@ -0,0 +1,62 @@ +"""Add name, description, and url optional data source metadata + +Revision ID: 6eb8084e2f48 +Revises: 69f7cc4f56d4 +Create Date: 2025-03-15 17:45:46.619721 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from util.alembic_helpers import switch_enum_type + +# revision identifiers, used by Alembic. +revision: str = '6eb8084e2f48' +down_revision: Union[str, None] = '69f7cc4f56d4' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Add name and description columns to URL table + op.add_column('urls', sa.Column('name', sa.String(), nullable=True)) + op.add_column('urls', sa.Column('description', sa.String(), nullable=True)) + + # Create URL_optional_data_source_metadata + op.create_table( + 'url_optional_data_source_metadata', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('url_id', sa.Integer(), nullable=False), + sa.Column('record_formats', sa.ARRAY(sa.String()), nullable=True), + sa.Column('data_portal_type', sa.String(), nullable=True), + sa.Column('supplying_entity', sa.String(), nullable=True), + sa.ForeignKeyConstraint(['url_id'], ['urls.id'], ), + sa.PrimaryKeyConstraint('id') + ) + + # Add 'Misc Metadata' to TaskType enum + switch_enum_type( + table_name='tasks', + column_name='task_type', + enum_name='task_type', + new_enum_values=['HTML', 'Relevancy', 'Record Type', 'Agency Identification', 'Misc Metadata'] + ) + + +def downgrade() -> None: + # Remove name and description columns from URL table + op.drop_column('urls', 'name') + op.drop_column('urls', 'description') + + # Drop URL_optional_data_source_metadata + op.drop_table('url_optional_data_source_metadata') + + # Remove 'Misc Metadata' from TaskType enum + switch_enum_type( + table_name='tasks', + column_name='task_type', + enum_name='task_type', + new_enum_values=['HTML', 'Relevancy', 'Record Type', 'Agency Identification'] + ) diff --git a/alembic/versions/2025_03_28_0807-5ea47dacd0ef_remove_agency_id_parameter_from_urls.py b/alembic/versions/2025_03_28_0807-5ea47dacd0ef_remove_agency_id_parameter_from_urls.py new file mode 100644 index 00000000..bc3f9bd3 --- /dev/null +++ b/alembic/versions/2025_03_28_0807-5ea47dacd0ef_remove_agency_id_parameter_from_urls.py @@ -0,0 +1,56 @@ +"""Remove agency_id parameter from URLs + +Revision ID: 5ea47dacd0ef +Revises: 6eb8084e2f48 +Create Date: 2025-03-28 08:07:24.442764 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '5ea47dacd0ef' +down_revision: Union[str, None] = '6eb8084e2f48' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Remove agency ID column from URLs + op.drop_column( + 'urls', + 'agency_id' + ) + + op.create_table( + 'confirmed_url_agency', + sa.Column('id', sa.Integer(), primary_key=True), + sa.Column('url_id', sa.Integer(), sa.ForeignKey('urls.id', ondelete='CASCADE'), nullable=False), + sa.Column( + 'agency_id', + sa.Integer(), + sa.ForeignKey('agencies.agency_id', ondelete='CASCADE'), nullable=False), + sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')), + sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')), + sa.UniqueConstraint( + 'url_id', 'agency_id', + name="uq_confirmed_url_agency" + ) + ) + + +def downgrade() -> None: + op.add_column( + 'urls', + sa.Column( + 'agency_id', + sa.Integer(), + sa.ForeignKey('agencies.agency_id', ondelete='NO ACTION'), + nullable=True + ) + ) + + op.drop_table('confirmed_url_agency') \ No newline at end of file diff --git a/alembic/versions/2025_03_29_1716-33a546c93441_add_data_source_id_column_to_url_table.py b/alembic/versions/2025_03_29_1716-33a546c93441_add_data_source_id_column_to_url_table.py new file mode 100644 index 00000000..b92fe1ef --- /dev/null +++ b/alembic/versions/2025_03_29_1716-33a546c93441_add_data_source_id_column_to_url_table.py @@ -0,0 +1,31 @@ +"""Add data source ID column to URL table + +Revision ID: 33a546c93441 +Revises: 45271f8fe75d +Create Date: 2025-03-29 17:16:11.863064 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '33a546c93441' +down_revision: Union[str, None] = '45271f8fe75d' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + 'urls', + sa.Column('data_source_id', sa.Integer(), nullable=True) + ) + # Add unique constraint to data_source_id column + op.create_unique_constraint('uq_data_source_id', 'urls', ['data_source_id']) + + +def downgrade() -> None: + op.drop_column('urls', 'data_source_id') diff --git a/alembic/versions/2025_04_02_2040-4c70177eba78_add_rejected_batch_status.py b/alembic/versions/2025_04_02_2040-4c70177eba78_add_rejected_batch_status.py new file mode 100644 index 00000000..fcb9821b --- /dev/null +++ b/alembic/versions/2025_04_02_2040-4c70177eba78_add_rejected_batch_status.py @@ -0,0 +1,47 @@ +"""Add rejected batch status + +Revision ID: 4c70177eba78 +Revises: 5ea47dacd0ef +Create Date: 2025-04-02 20:40:54.982954 + +""" +from typing import Sequence, Union + + +from util.alembic_helpers import switch_enum_type + +# revision identifiers, used by Alembic. +revision: str = '4c70177eba78' +down_revision: Union[str, None] = '5ea47dacd0ef' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + switch_enum_type( + table_name='urls', + column_name='outcome', + enum_name='url_status', + new_enum_values=[ + 'pending', + 'submitted', + 'validated', + 'duplicate', + 'rejected', + 'error' + ] + ) + +def downgrade() -> None: + switch_enum_type( + table_name='urls', + column_name='outcome', + enum_name='url_status', + new_enum_values=[ + 'pending', + 'submitted', + 'validated', + 'duplicate', + 'error', + ] + ) diff --git a/alembic/versions/2025_04_02_2051-e3fe6d099583_rename_approving_user_url_to_reviewing_.py b/alembic/versions/2025_04_02_2051-e3fe6d099583_rename_approving_user_url_to_reviewing_.py new file mode 100644 index 00000000..c9c4eec1 --- /dev/null +++ b/alembic/versions/2025_04_02_2051-e3fe6d099583_rename_approving_user_url_to_reviewing_.py @@ -0,0 +1,25 @@ +"""Rename approving_user_url to reviewing_user_url + +Revision ID: e3fe6d099583 +Revises: 4c70177eba78 +Create Date: 2025-04-02 20:51:10.738159 + +""" +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = 'e3fe6d099583' +down_revision: Union[str, None] = '4c70177eba78' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.rename_table('approving_user_url', 'reviewing_user_url') + + +def downgrade() -> None: + op.rename_table('reviewing_user_url', 'approving_user_url') diff --git a/alembic/versions/2025_04_02_2114-45271f8fe75d_remove_relevant_column_from_urls.py b/alembic/versions/2025_04_02_2114-45271f8fe75d_remove_relevant_column_from_urls.py new file mode 100644 index 00000000..3f884391 --- /dev/null +++ b/alembic/versions/2025_04_02_2114-45271f8fe75d_remove_relevant_column_from_urls.py @@ -0,0 +1,27 @@ +"""Remove relevant column from urls + +Revision ID: 45271f8fe75d +Revises: e3fe6d099583 +Create Date: 2025-04-02 21:14:29.778488 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '45271f8fe75d' +down_revision: Union[str, None] = 'e3fe6d099583' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.drop_column('urls', 'relevant') + + + +def downgrade() -> None: + op.add_column('urls', sa.Column('relevant', sa.BOOLEAN(), nullable=True)) diff --git a/alembic/versions/2025_04_15_1338-b363794fa4e9_add_submit_url_task_type_enum.py b/alembic/versions/2025_04_15_1338-b363794fa4e9_add_submit_url_task_type_enum.py new file mode 100644 index 00000000..e1d5b725 --- /dev/null +++ b/alembic/versions/2025_04_15_1338-b363794fa4e9_add_submit_url_task_type_enum.py @@ -0,0 +1,48 @@ +"""Add Submit URL Task Type Enum + +Revision ID: b363794fa4e9 +Revises: 33a546c93441 +Create Date: 2025-04-15 13:38:58.293627 + +""" +from typing import Sequence, Union + + +from util.alembic_helpers import switch_enum_type + +# revision identifiers, used by Alembic. +revision: str = 'b363794fa4e9' +down_revision: Union[str, None] = '33a546c93441' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + switch_enum_type( + table_name='tasks', + column_name='task_type', + enum_name='task_type', + new_enum_values=[ + "HTML", + "Relevancy", + "Record Type", + "Agency Identification", + "Misc Metadata", + "Submit Approved URLs" + ] + ) + + +def downgrade() -> None: + switch_enum_type( + table_name='tasks', + column_name='task_type', + enum_name='task_type', + new_enum_values=[ + "HTML", + "Relevancy", + "Record Type", + "Agency Identification", + "Misc Metadata", + ] + ) \ No newline at end of file diff --git a/alembic/versions/2025_04_15_1532-ed06a5633d2e_revert_to_pending_validated_urls_.py b/alembic/versions/2025_04_15_1532-ed06a5633d2e_revert_to_pending_validated_urls_.py new file mode 100644 index 00000000..82ce97a4 --- /dev/null +++ b/alembic/versions/2025_04_15_1532-ed06a5633d2e_revert_to_pending_validated_urls_.py @@ -0,0 +1,42 @@ +"""Revert to pending validated URLs without name and add constraint + +Revision ID: ed06a5633d2e +Revises: b363794fa4e9 +Create Date: 2025-04-15 15:32:26.465488 + +""" +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = 'ed06a5633d2e' +down_revision: Union[str, None] = 'b363794fa4e9' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + + op.execute( + """ + UPDATE public.urls + SET OUTCOME = 'pending' + WHERE OUTCOME = 'validated' AND NAME IS NULL + """ + ) + + op.create_check_constraint( + 'url_name_not_null_when_validated', + 'urls', + "NAME IS NOT NULL OR OUTCOME != 'validated'" + ) + + +def downgrade() -> None: + op.drop_constraint( + 'url_name_not_null_when_validated', + 'urls', + type_='check' + ) diff --git a/alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py b/alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py new file mode 100644 index 00000000..775caddf --- /dev/null +++ b/alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py @@ -0,0 +1,61 @@ +"""Set user annotation tables to allow only one annotation per url + +Revision ID: 997f5bf53772 +Revises: ed06a5633d2e +Create Date: 2025-04-16 19:54:59.798580 + +""" +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = '997f5bf53772' +down_revision: Union[str, None] = 'ed06a5633d2e' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Delete entries with more than one annotation + # Relevance + op.execute(""" + with ranked as( + SELECT + id, + ROW_NUMBER() OVER (PARTITION BY URL_ID ORDER BY id) as rn + FROM + USER_RELEVANT_SUGGESTIONS + ) + DELETE FROM user_relevant_suggestions + USING ranked + WHERE USER_RELEVANT_SUGGESTIONS.id = ranked.id + and ranked.rn > 1 + """) + # Record Type + op.execute(""" + with ranked as( + SELECT + id, + ROW_NUMBER() OVER (PARTITION BY URL_ID ORDER BY id) as rn + FROM + USER_RECORD_TYPE_SUGGESTIONS + ) + DELETE FROM user_record_type_suggestions + USING ranked + WHERE USER_RECORD_TYPE_SUGGESTIONS.id = ranked.id + and ranked.rn > 1 + """) + + # Add unique constraint to url_id column + op.create_unique_constraint('uq_user_relevant_suggestions_url_id', 'user_relevant_suggestions', ['url_id']) + op.create_unique_constraint('uq_user_record_type_suggestions_url_id', 'user_record_type_suggestions', ['url_id']) + op.create_unique_constraint('uq_user_agency_suggestions_url_id', 'user_url_agency_suggestions', ['url_id']) + + + +def downgrade() -> None: + op.drop_constraint('uq_user_relevant_suggestions_url_id', 'user_relevant_suggestions', type_='unique') + op.drop_constraint('uq_user_record_type_suggestions_url_id', 'user_record_type_suggestions', type_='unique') + op.drop_constraint('uq_user_agency_suggestions_url_id', 'user_url_agency_suggestions', type_='unique') \ No newline at end of file diff --git a/alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py b/alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py new file mode 100644 index 00000000..882c2c5f --- /dev/null +++ b/alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py @@ -0,0 +1,36 @@ +"""Change batch completed to ready to label + +Revision ID: e285e6e7cf71 +Revises: 997f5bf53772 +Create Date: 2025-04-17 09:09:38.137131 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from util.alembic_helpers import switch_enum_type, alter_enum_value + +# revision identifiers, used by Alembic. +revision: str = 'e285e6e7cf71' +down_revision: Union[str, None] = '997f5bf53772' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + alter_enum_value( + enum_name="batch_status", + old_value="complete", + new_value="ready to label" + ) + + + +def downgrade() -> None: + alter_enum_value( + enum_name="batch_status", + old_value="ready to label", + new_value="complete" + ) diff --git a/alembic/versions/2025_05_03_0956-028565b77b9e_add_manual_strategy_to_batch_strategy_.py b/alembic/versions/2025_05_03_0956-028565b77b9e_add_manual_strategy_to_batch_strategy_.py new file mode 100644 index 00000000..9ec86fee --- /dev/null +++ b/alembic/versions/2025_05_03_0956-028565b77b9e_add_manual_strategy_to_batch_strategy_.py @@ -0,0 +1,60 @@ +"""Add manual strategy to Batch strategy enum + +Revision ID: 028565b77b9e +Revises: e285e6e7cf71 +Create Date: 2025-05-03 09:56:51.134406 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from util.alembic_helpers import switch_enum_type + +# revision identifiers, used by Alembic. +revision: str = '028565b77b9e' +down_revision: Union[str, None] = 'e285e6e7cf71' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + switch_enum_type( + table_name="batches", + column_name="strategy", + enum_name="batch_strategy", + new_enum_values=[ + "example", + "ckan", + "muckrock_county_search", + "auto_googler", + "muckrock_all_search", + "muckrock_simple_search", + "common_crawler", + "manual" + ], + ) + + +def downgrade() -> None: + # Delete all batches with manual strategy + op.execute(""" + DELETE FROM BATCHES + WHERE STRATEGY = 'manual' + """) + + switch_enum_type( + table_name="batches", + column_name="strategy", + enum_name="batch_strategy", + new_enum_values=[ + "example", + "ckan", + "muckrock_county_search", + "auto_googler", + "muckrock_all_search", + "muckrock_simple_search", + "common_crawler" + ], + ) diff --git a/alembic/versions/2025_05_06_0816-e55e16e0738f_create_backlogsnapshot_table.py b/alembic/versions/2025_05_06_0816-e55e16e0738f_create_backlogsnapshot_table.py new file mode 100644 index 00000000..4d2fe7c5 --- /dev/null +++ b/alembic/versions/2025_05_06_0816-e55e16e0738f_create_backlogsnapshot_table.py @@ -0,0 +1,31 @@ +"""Create BacklogSnapshot Table + +Revision ID: e55e16e0738f +Revises: 028565b77b9e +Create Date: 2025-05-06 08:16:29.385305 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'e55e16e0738f' +down_revision: Union[str, None] = '028565b77b9e' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + 'backlog_snapshot', + sa.Column('id', sa.Integer(), nullable=False, primary_key=True), + sa.Column('count_pending_total', sa.Integer(), nullable=False), + sa.Column('created_at', sa.DateTime(), nullable=False), + ) + + +def downgrade() -> None: + op.drop_table('backlog_snapshot') diff --git a/alembic/versions/2025_05_06_0919-f25852e17c04_create_url_annotation_flags_view.py b/alembic/versions/2025_05_06_0919-f25852e17c04_create_url_annotation_flags_view.py new file mode 100644 index 00000000..09f8d825 --- /dev/null +++ b/alembic/versions/2025_05_06_0919-f25852e17c04_create_url_annotation_flags_view.py @@ -0,0 +1,47 @@ +"""Create URL Annotation Flags View + +Revision ID: f25852e17c04 +Revises: e55e16e0738f +Create Date: 2025-05-06 09:19:54.000410 + +""" +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = 'f25852e17c04' +down_revision: Union[str, None] = 'e55e16e0738f' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.execute(""" + CREATE OR REPLACE VIEW url_annotation_flags AS + ( + SELECT u.id, + CASE WHEN arts.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_auto_record_type_suggestion, + CASE WHEN ars.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_auto_relevant_suggestion, + CASE WHEN auas.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_auto_agency_suggestion, + CASE WHEN urts.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_user_record_type_suggestion, + CASE WHEN urs.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_user_relevant_suggestion, + CASE WHEN uuas.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_user_agency_suggestion, + CASE WHEN cua.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_confirmed_agency, + CASE WHEN ruu.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS was_reviewed + FROM urls u + LEFT JOIN public.auto_record_type_suggestions arts ON u.id = arts.url_id + LEFT JOIN public.auto_relevant_suggestions ars ON u.id = ars.url_id + LEFT JOIN public.automated_url_agency_suggestions auas ON u.id = auas.url_id + LEFT JOIN public.user_record_type_suggestions urts ON u.id = urts.url_id + LEFT JOIN public.user_relevant_suggestions urs ON u.id = urs.url_id + LEFT JOIN public.user_url_agency_suggestions uuas ON u.id = uuas.url_id + LEFT JOIN public.reviewing_user_url ruu ON u.id = ruu.url_id + LEFT JOIN public.confirmed_url_agency cua on u.id = cua.url_id + ) + """) + + +def downgrade() -> None: + op.execute("DROP VIEW url_annotation_flags;") diff --git a/alembic/versions/2025_05_06_1115-6f2007bbcce3_create_url_data_sources_table.py b/alembic/versions/2025_05_06_1115-6f2007bbcce3_create_url_data_sources_table.py new file mode 100644 index 00000000..499de2e4 --- /dev/null +++ b/alembic/versions/2025_05_06_1115-6f2007bbcce3_create_url_data_sources_table.py @@ -0,0 +1,79 @@ +"""Create url_data_sources table + +Revision ID: 6f2007bbcce3 +Revises: f25852e17c04 +Create Date: 2025-05-06 11:15:24.485465 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision: str = '6f2007bbcce3' +down_revision: Union[str, None] = 'f25852e17c04' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Create url_data_sources_table table + op.create_table( + 'url_data_sources', + sa.Column( + 'id', + sa.Integer(), + primary_key=True + ), + sa.Column( + 'url_id', + sa.Integer(), + sa.ForeignKey( + 'urls.id', + ondelete='CASCADE' + ), + nullable=False + ), + sa.Column( + 'data_source_id', + sa.Integer(), + nullable=False + ), + sa.Column( + 'created_at', + sa.TIMESTAMP(), + nullable=False, + server_default=sa.text('now()') + ), + sa.UniqueConstraint('url_id', name='uq_url_data_sources_url_id'), + sa.UniqueConstraint('data_source_id', name='uq_url_data_sources_data_source_id') + ) + + # Migrate existing urls with a data source ID + op.execute(""" + INSERT INTO url_data_sources + (url_id, data_source_id) + SELECT id, data_source_id + FROM urls + WHERE data_source_id IS NOT NULL + """) + + # Drop existing data source ID column from urls table + op.drop_column('urls', 'data_source_id') + + +def downgrade() -> None: + + op.drop_table('url_data_sources') + + op.add_column( + 'urls', + sa.Column( + 'data_source_id', + sa.Integer(), + nullable=True + ) + ) + + diff --git a/alembic/versions/2025_05_11_1054-9d4002437ebe_set_default_created_at_for_backlog_.py b/alembic/versions/2025_05_11_1054-9d4002437ebe_set_default_created_at_for_backlog_.py new file mode 100644 index 00000000..fbdb5645 --- /dev/null +++ b/alembic/versions/2025_05_11_1054-9d4002437ebe_set_default_created_at_for_backlog_.py @@ -0,0 +1,38 @@ +"""Set default created_at for backlog_snapshot + +Revision ID: 9d4002437ebe +Revises: 6f2007bbcce3 +Create Date: 2025-05-11 10:54:22.797147 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '9d4002437ebe' +down_revision: Union[str, None] = '6f2007bbcce3' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.alter_column( + table_name='backlog_snapshot', + column_name='created_at', + existing_type=sa.DateTime(), + nullable=False, + server_default=sa.text('now()') + ) + + +def downgrade() -> None: + op.alter_column( + table_name='backlog_snapshot', + column_name='created_at', + existing_type=sa.DateTime(), + nullable=False, + server_default=None + ) diff --git a/alembic/versions/2025_05_13_0704-864107b703ae_create_url_checked_for_duplicate_table.py b/alembic/versions/2025_05_13_0704-864107b703ae_create_url_checked_for_duplicate_table.py new file mode 100644 index 00000000..e2e5947f --- /dev/null +++ b/alembic/versions/2025_05_13_0704-864107b703ae_create_url_checked_for_duplicate_table.py @@ -0,0 +1,81 @@ +"""Create url_checked_for_duplicate table + +Revision ID: 864107b703ae +Revises: 9d4002437ebe +Create Date: 2025-05-13 07:04:22.592396 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from util.alembic_helpers import switch_enum_type + +# revision identifiers, used by Alembic. +revision: str = '864107b703ae' +down_revision: Union[str, None] = '9d4002437ebe' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + 'url_checked_for_duplicate', + sa.Column( + 'id', + sa.Integer(), + primary_key=True + ), + sa.Column( + 'url_id', + sa.Integer(), + sa.ForeignKey( + 'urls.id', + ondelete='CASCADE' + ), + nullable=False + ), + sa.Column( + 'created_at', + sa.DateTime(), + nullable=False, + server_default=sa.text('now()') + ), + ) + + switch_enum_type( + table_name='tasks', + column_name='task_type', + enum_name='task_type', + new_enum_values=[ + "HTML", + "Relevancy", + "Record Type", + "Agency Identification", + "Misc Metadata", + "Submit Approved URLs", + "Duplicate Detection" + ] + ) + + +def downgrade() -> None: + op.drop_table('url_checked_for_duplicate') + + # Delete tasks of type "Duplicate Detection" + op.execute("DELETE FROM TASKS WHERE TASK_TYPE = 'Duplicate Detection';") + + switch_enum_type( + table_name='tasks', + column_name='task_type', + enum_name='task_type', + new_enum_values=[ + "HTML", + "Relevancy", + "Record Type", + "Agency Identification", + "Misc Metadata", + "Submit Approved URLs", + ] + ) diff --git a/collector_db/alembic/versions/5a5ca06f36fa_create_url_error_info_table_and_url_error_status.py b/alembic/versions/5a5ca06f36fa_create_url_error_info_table_and_url_error_status.py similarity index 100% rename from collector_db/alembic/versions/5a5ca06f36fa_create_url_error_info_table_and_url_error_status.py rename to alembic/versions/5a5ca06f36fa_create_url_error_info_table_and_url_error_status.py diff --git a/alembic/versions/76f902fe18cd_add_approved_enum_value_to_urlstatus.py b/alembic/versions/76f902fe18cd_add_approved_enum_value_to_urlstatus.py new file mode 100644 index 00000000..b548cc54 --- /dev/null +++ b/alembic/versions/76f902fe18cd_add_approved_enum_value_to_urlstatus.py @@ -0,0 +1,80 @@ +"""Add approved enum value to URLStatus + +Revision ID: 76f902fe18cd +Revises: d7eb670edaf0 +Create Date: 2025-02-21 13:46:00.621485 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = '76f902fe18cd' +down_revision: Union[str, None] = 'd7eb670edaf0' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +old_enum_values = ('pending', 'submitted', 'human_labeling', 'rejected', 'duplicate', 'error') +new_enum_values = old_enum_values + ('approved',) + +old_outcome_enum = postgresql.ENUM( + *old_enum_values, + name='url_status' +) + +tmp_new_outcome_enum = postgresql.ENUM( + *new_enum_values, + name='tmp_url_status' +) +new_outcome_enum = postgresql.ENUM( + *new_enum_values, + name='url_status' +) + +common_args = { + "table_name": "urls", + "column_name": "outcome", +} + +def upgrade() -> None: + tmp_new_outcome_enum.create(op.get_bind(), checkfirst=True) + op.alter_column( + **common_args, + existing_type=old_outcome_enum, + type_=tmp_new_outcome_enum, + postgresql_using='outcome::text::tmp_url_status' + ) + old_outcome_enum.drop(op.get_bind(), checkfirst=True) + new_outcome_enum.create(op.get_bind(), checkfirst=True) + + op.alter_column( + **common_args, + existing_type=tmp_new_outcome_enum, + type_=new_outcome_enum, + postgresql_using='outcome::text::url_status' + ) + tmp_new_outcome_enum.drop(op.get_bind(), checkfirst=True) + +def downgrade() -> None: + tmp_new_outcome_enum.create(op.get_bind()) + op.alter_column( + **common_args, + existing_type=new_outcome_enum, + type_=tmp_new_outcome_enum, + postgresql_using='outcome::text::tmp_url_status' + ) + + new_outcome_enum.drop(op.get_bind(), checkfirst=True) + old_outcome_enum.create(op.get_bind(), checkfirst=True) + + op.alter_column( + **common_args, + existing_type=tmp_new_outcome_enum, + type_=old_outcome_enum, + postgresql_using='outcome::text::url_status' + ) + + tmp_new_outcome_enum.drop(op.get_bind(), checkfirst=True) \ No newline at end of file diff --git a/collector_db/alembic/versions/86692fc1d862_add_url_metadata_table.py b/alembic/versions/86692fc1d862_add_url_metadata_table.py similarity index 100% rename from collector_db/alembic/versions/86692fc1d862_add_url_metadata_table.py rename to alembic/versions/86692fc1d862_add_url_metadata_table.py diff --git a/alembic/versions/8c44e02733ae_add_user_url_agency_suggestions_and_.py b/alembic/versions/8c44e02733ae_add_user_url_agency_suggestions_and_.py new file mode 100644 index 00000000..87c069fa --- /dev/null +++ b/alembic/versions/8c44e02733ae_add_user_url_agency_suggestions_and_.py @@ -0,0 +1,59 @@ +"""Add user_url_agency_suggestions and trigger + +Revision ID: 8c44e02733ae +Revises: 19bf57df581a +Create Date: 2025-02-05 10:33:46.002025 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy import Column, Integer +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = '8c44e02733ae' +down_revision: Union[str, None] = '19bf57df581a' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + table_name='url_agency_suggestions', + column=Column( + name="user_id", + type_=Integer, + nullable=True + ) + ) + + op.execute( + """ + CREATE OR REPLACE FUNCTION user_url_agency_suggestions_value() + RETURNS TRIGGER AS $$ + BEGIN + IF NEW.suggestion_type = 'Manual Suggestion' and NEW.user_id IS NULL THEN + RAISE EXCEPTION 'User ID must not be null when suggestion type is "Manual Suggestion"'; + END IF; + RETURN NEW; + END; + $$ LANGUAGE plpgsql; + + CREATE TRIGGER enforce_url_agency_suggestions_manual_suggestion_user_id + BEFORE INSERT ON url_agency_suggestions + FOR EACH ROW + EXECUTE FUNCTION user_url_agency_suggestions_value(); + + """ + ) + + +def downgrade() -> None: + op.execute("DROP TRIGGER IF EXISTS enforce_url_agency_suggestions_manual_suggestion_user_id ON url_agency_suggestions;") + op.execute( + """ + DROP FUNCTION IF EXISTS user_url_agency_suggestions_value(); + """ + ) diff --git a/collector_db/alembic/versions/9afd8a5633c9_create_htmlcontent_and_rooturl_tables.py b/alembic/versions/9afd8a5633c9_create_htmlcontent_and_rooturl_tables.py similarity index 100% rename from collector_db/alembic/versions/9afd8a5633c9_create_htmlcontent_and_rooturl_tables.py rename to alembic/versions/9afd8a5633c9_create_htmlcontent_and_rooturl_tables.py diff --git a/collector_db/alembic/versions/a4750e7ff8e7_add_updated_at_to_url_table.py b/alembic/versions/a4750e7ff8e7_add_updated_at_to_url_table.py similarity index 100% rename from collector_db/alembic/versions/a4750e7ff8e7_add_updated_at_to_url_table.py rename to alembic/versions/a4750e7ff8e7_add_updated_at_to_url_table.py diff --git a/collector_db/alembic/versions/d11f07224d1f_initial_creation.py b/alembic/versions/d11f07224d1f_initial_creation.py similarity index 100% rename from collector_db/alembic/versions/d11f07224d1f_initial_creation.py rename to alembic/versions/d11f07224d1f_initial_creation.py diff --git a/alembic/versions/d7eb670edaf0_revise_agency_identification_logic.py b/alembic/versions/d7eb670edaf0_revise_agency_identification_logic.py new file mode 100644 index 00000000..2bb7c157 --- /dev/null +++ b/alembic/versions/d7eb670edaf0_revise_agency_identification_logic.py @@ -0,0 +1,150 @@ +"""Revise agency identification logic + +Revision ID: d7eb670edaf0 +Revises: 8c44e02733ae +Create Date: 2025-02-07 13:10:41.181578 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from collector_db.enums import PGEnum + +# revision identifiers, used by Alembic. +revision: str = 'd7eb670edaf0' +down_revision: Union[str, None] = '8c44e02733ae' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +suggestion_type_enum = PGEnum( + 'Auto Suggestion', + 'Manual Suggestion', + 'Unknown', + 'New Agency', + 'Confirmed', name='url_agency_suggestion_type' +) + +def upgrade(): + # Create agencies table + op.create_table( + "agencies", + sa.Column("agency_id", sa.Integer(), primary_key=True), + sa.Column("name", sa.String(), nullable=False), + sa.Column("state", sa.String(), nullable=True), + sa.Column("county", sa.String(), nullable=True), + sa.Column("locality", sa.String(), nullable=True), + sa.Column("updated_at", sa.DateTime(), nullable=False, server_default=sa.func.now()), + ) + + # Create confirmed_url_agency table + op.create_table( + "confirmed_url_agency", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("agency_id", sa.Integer(), sa.ForeignKey("agencies.agency_id"), nullable=False), + sa.Column("url_id", sa.Integer(), sa.ForeignKey("urls.id"), nullable=False), + ) + op.create_unique_constraint( + "uq_confirmed_url_agency", "confirmed_url_agency", ["agency_id", "url_id"] + ) + + # Create automated_url_agency_suggestions table + op.create_table( + "automated_url_agency_suggestions", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("agency_id", sa.Integer(), sa.ForeignKey("agencies.agency_id"), nullable=True), + sa.Column("url_id", sa.Integer(), sa.ForeignKey("urls.id"), nullable=False), + sa.Column("is_unknown", sa.Boolean(), nullable=True), + ) + op.create_unique_constraint( + "uq_automated_url_agency_suggestions", "automated_url_agency_suggestions", ["agency_id", "url_id"] + ) + op.execute(""" + CREATE OR REPLACE FUNCTION enforce_no_agency_id_if_unknown() + RETURNS TRIGGER AS $$ + BEGIN + IF NEW.is_unknown = TRUE AND NEW.agency_id IS NOT NULL THEN + RAISE EXCEPTION 'agency_id must be null when is_unknown is TRUE'; + END IF; + RETURN NEW; + END; + $$ LANGUAGE plpgsql; + """) + op.execute(""" + CREATE TRIGGER enforce_no_agency_id_if_unknown + BEFORE INSERT ON automated_url_agency_suggestions + FOR EACH ROW + EXECUTE FUNCTION enforce_no_agency_id_if_unknown(); + """) + # Create user_url_agency_suggestions table + op.create_table( + "user_url_agency_suggestions", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("agency_id", sa.Integer(), sa.ForeignKey("agencies.agency_id"), nullable=True), + sa.Column("url_id", sa.Integer(), sa.ForeignKey("urls.id"), nullable=False), + sa.Column("user_id", sa.Integer(), nullable=False), + sa.Column("is_new", sa.Boolean(), nullable=True), + ) + op.create_unique_constraint( + "uq_user_url_agency_suggestions", "user_url_agency_suggestions", ["agency_id", "url_id", "user_id"] + ) + op.execute(""" + CREATE OR REPLACE FUNCTION enforce_no_agency_id_if_new() + RETURNS TRIGGER AS $$ + BEGIN + IF NEW.is_new = TRUE AND NEW.agency_id IS NOT NULL THEN + RAISE EXCEPTION 'agency_id must be null when is_new is TRUE'; + END IF; + RETURN NEW; + END; + $$ LANGUAGE plpgsql; + """) + op.execute(""" + CREATE TRIGGER enforce_no_agency_id_if_new + BEFORE INSERT ON user_url_agency_suggestions + FOR EACH ROW + EXECUTE FUNCTION enforce_no_agency_id_if_new(); + """) + + + + + op.drop_table('url_agency_suggestions') + suggestion_type_enum.drop(op.get_bind(), checkfirst=True) + + + +def downgrade(): + # Drop constraints first + op.drop_constraint("uq_confirmed_url_agency", "confirmed_url_agency", type_="unique") + op.drop_constraint("uq_automated_url_agency_suggestions", "automated_url_agency_suggestions", type_="unique") + op.drop_constraint("uq_user_url_agency_suggestions", "user_url_agency_suggestions", type_="unique") + + # Drop tables + op.drop_table("user_url_agency_suggestions") + op.drop_table("automated_url_agency_suggestions") + op.drop_table("confirmed_url_agency") + op.drop_table("agencies") + + op.create_table('url_agency_suggestions', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('url_id', sa.Integer(), nullable=False), + sa.Column('suggestion_type', suggestion_type_enum, nullable=False), + sa.Column('agency_id', sa.Integer(), nullable=True), + sa.Column('agency_name', sa.String(), nullable=True), + sa.Column('state', sa.String(), nullable=True), + sa.Column('county', sa.String(), nullable=True), + sa.Column('locality', sa.String(), nullable=True), + sa.Column('updated_at', sa.TIMESTAMP(), server_default=sa.text('now()'), nullable=False), + sa.ForeignKeyConstraint(['url_id'], ['urls.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.execute(""" + DROP TRIGGER IF EXISTS enforce_no_agency_id_if_unknown ON automated_url_agency_suggestions; + """) + op.execute(""" + DROP FUNCTION IF EXISTS enforce_no_agency_id_if_unknown; + """) + op.execute("DROP FUNCTION enforce_no_agency_id_if_new()") + diff --git a/alembic/versions/dae00e5aa8dd_create_rooturlcache.py b/alembic/versions/dae00e5aa8dd_create_rooturlcache.py new file mode 100644 index 00000000..c95b10e0 --- /dev/null +++ b/alembic/versions/dae00e5aa8dd_create_rooturlcache.py @@ -0,0 +1,34 @@ +"""Create RootURLCache + +Revision ID: dae00e5aa8dd +Revises: dcd158092de0 +Create Date: 2025-01-19 10:40:19.650982 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = 'dae00e5aa8dd' +down_revision: Union[str, None] = 'dcd158092de0' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table('root_url_cache', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('url', sa.String(), nullable=False), + sa.Column('page_title', sa.String(), nullable=False), + sa.Column('page_description', sa.String(), nullable=True), + sa.Column('updated_at', sa.TIMESTAMP(), server_default=sa.text('now()'), nullable=False), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('url', name='root_url_cache_uq_url') + ) + + +def downgrade() -> None: + op.drop_table('root_url_cache') diff --git a/collector_db/alembic/versions/db6d60feda7d_convert_batch_strategy_status_to_enums.py b/alembic/versions/db6d60feda7d_convert_batch_strategy_status_to_enums.py similarity index 100% rename from collector_db/alembic/versions/db6d60feda7d_convert_batch_strategy_status_to_enums.py rename to alembic/versions/db6d60feda7d_convert_batch_strategy_status_to_enums.py diff --git a/collector_db/alembic/versions/dcd158092de0_create_metadata_annotation_table.py b/alembic/versions/dcd158092de0_create_metadata_annotation_table.py similarity index 100% rename from collector_db/alembic/versions/dcd158092de0_create_metadata_annotation_table.py rename to alembic/versions/dcd158092de0_create_metadata_annotation_table.py diff --git a/collector_db/alembic/versions/e27c5f8409a3_convert_url_outcome_to_enum.py b/alembic/versions/e27c5f8409a3_convert_url_outcome_to_enum.py similarity index 100% rename from collector_db/alembic/versions/e27c5f8409a3_convert_url_outcome_to_enum.py rename to alembic/versions/e27c5f8409a3_convert_url_outcome_to_enum.py diff --git a/annotation_pipeline/README.md b/annotation_pipeline/README.md deleted file mode 100644 index a6d7a1e4..00000000 --- a/annotation_pipeline/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# Annotation Pipeline - -This Python script automates the process of crawling for relevant URLs, scraping HTML content from those pages, formatting the data as Label Studio tasks, and uploading them to Label Studio for annotation. - -## Features - -- **Common Crawl Integration**: Initiates the Common Crawl script to crawl for relevant URLs based on specified parameters such as Common Crawl ID, URL type, keyword, and number of pages to process. - -- **HTML Tag Collector**: Collects HTML tags from the crawled URLs using the tag collector script. - -- **Label Studio Tasks**: Formats the collected data into tasks suitable for Label Studio annotation, including pre-annotation support for assumed record types. - -- **Upload to Label Studio**: Uploads the tasks to Label Studio for review and annotation. - -## Setup - -1. Create venv and install Python dependencies (if not done previously) - (assuming these are run within the annotation_pipeline/ folder): - - `python -m venv annotation-pipeline-env` - - `source annotation-pipeline-env` - - `pip install -r requirements.txt` - -2. Setup Environment variables in data_source_identification/.env - - HUGGINGFACE_ACCESS_TOKEN=... - - LABEL_STUDIO_ACCESS_TOKEN=... - - LABEL_STUDIO_PROJECT_ID=... - - LABEL_STUDIO_ORGANIZATION_ID=... - -## Usage - -Run from the parent directory (data-source-identification/) - -The output logs from common crawl will be stored in `annotation_pipeline/data` by default. This can be modified by editing the `annotation_pipeline/config.ini` file. - -`python annotation_pipeline/populate_labelstudio.py common_crawl_id url keyword --pages num_pages [--record-type record_type]` - -- `common_crawl_id`: ID of the Common Crawl Corpus to search -- `url`: Type of URL to search for (e.g. *.gov for all .gov domains). -- `keyword`: Keyword that must be matched in the full URL -- `--pages num_pages`: Number of pages to search -- `--record-type record_type` (optional): Assumed record type for pre-annotation. - -e.g. `python annotation_pipeline/populate_labelstudio.py CC-MAIN-2024-10 '*.gov' arrest --pages 2 --record-type 'Arrest Records'` diff --git a/annotation_pipeline/config.ini b/annotation_pipeline/config.ini deleted file mode 100644 index 6f2deb96..00000000 --- a/annotation_pipeline/config.ini +++ /dev/null @@ -1,19 +0,0 @@ -# This configuration file contains default settings for the Common Crawler application. -# Settings can be modified to suit different environments or testing needs. - -[DEFAULT] -# Filename for the cache. Stores which pages have been crawled -# at which combinations of index, url search term, and keyword -# to avoid re-crawling them. -cache_filename = cache - -# Directory where data files (both cache and output) are stored. -# Change as needed for different environments. -# Path is relative from working directory that executes common_crawler/main.py -data_dir = annotation_pipeline/data - -# Filename for the output CSV containing crawled URLs. -output_filename = urls - -# Name of the huggingface repo -huggingface_repo_id = PDAP/unlabeled-urls diff --git a/annotation_pipeline/data/batch_info.csv b/annotation_pipeline/data/batch_info.csv deleted file mode 100644 index 35b2c19e..00000000 --- a/annotation_pipeline/data/batch_info.csv +++ /dev/null @@ -1,4 +0,0 @@ -Datetime,Source,Count,Keywords,Notes,Filename -2024-08-12 16:31:20.362180,Common Crawl,0,*.com - police,"CC-MAIN-2024-14, 10 pages, starting at 1",urls_2024-08-12_16-31-20 -2024-08-16 15:18:09.405734,Common Crawl,2,*.com - police,"CC-MAIN-2024-30, 2 pages, starting at 1",urls_2024-08-16_15-18-09 -2024-08-20 14:07:03.339044,Common Crawl,22,*.gov - police,"CC-MAIN-2024-30, 2 pages, starting at 1",urls_2024-08-20_14-07-03 diff --git a/annotation_pipeline/data/cache.json b/annotation_pipeline/data/cache.json deleted file mode 100644 index 066b4285..00000000 --- a/annotation_pipeline/data/cache.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "CC-MAIN-2024-14": { - "*.com": { - "police": 1 - } - }, - "CC-MAIN-2024-30": { - "*.com": { - "police": 2 - }, - "*.gov": { - "police": 2 - } - } -} \ No newline at end of file diff --git a/annotation_pipeline/data/tag_collector/urls_2024-08-16_15-18-09.csv b/annotation_pipeline/data/tag_collector/urls_2024-08-16_15-18-09.csv deleted file mode 100644 index b7b488cd..00000000 --- a/annotation_pipeline/data/tag_collector/urls_2024-08-16_15-18-09.csv +++ /dev/null @@ -1,3 +0,0 @@ -url,url_path,html_title,meta_description,root_page_title,http_response,h1,h2,h3,h4,h5,h6,div_text,batch_id -https://001-adult-toys-n-sex-dolls.com/video/3879/stoya-gets-investigated-by-the-police/,video/3879/stoya-gets-investigated-by-the-police,Stoya gets investigated by the police | One truly amazing adult page with everything included,"Stoya tweeted her accusations, and neither porn star James.",Sex dolls porn clips | One truly amazing adult page with everything included,200,"[""Stoya gets investigated by the police""]","[""Related Videos""]",[],[],[],[],Sex dolls Home Models Categories Sex dolls Home Models Categories Home Models Categories ,2024-08-16 15:18:09 -https://001-adult-toys-n-sex-dolls.com/video/39592/policeman-helps-out-jasmine-jae-glaze-up-her-filth/,video/39592/policeman-helps-out-jasmine-jae-glaze-up-her-filth,Policeman helps out Jasmine Jae glaze up her filth | One truly amazing adult page with everything included,Please rest assured that we are working hard to reach out to.,Sex dolls porn clips | One truly amazing adult page with everything included,200,"[""Policeman helps out Jasmine Jae glaze up her filth""]","[""Related Videos""]",[],[],[],[],Sex dolls Home Models Categories Sex dolls Home Models Categories Home Models Categories ,2024-08-16 15:18:09 diff --git a/annotation_pipeline/data/tag_collector/urls_2024-08-20_14-07-03.csv b/annotation_pipeline/data/tag_collector/urls_2024-08-20_14-07-03.csv deleted file mode 100644 index 66108893..00000000 --- a/annotation_pipeline/data/tag_collector/urls_2024-08-20_14-07-03.csv +++ /dev/null @@ -1,23 +0,0 @@ -url,url_path,html_title,meta_description,root_page_title,http_response,h1,h2,h3,h4,h5,h6,div_text,batch_id -https://origin-www.acquisition.gov/dfars/252.225-7029-acquisition-uniform-components-afghan-military-or-afghan-national-police.,dfars/252.225-7029-acquisition-uniform-components-afghan-military-or-afghan-national-police.,252.225-7029 Acquisition of Uniform Components for Afghan Military or Afghan National Police. | Acquisition.GOV,,Home | Acquisition.GOV,200,"[""DFARS"", ""252.225-7029 Acquisition of Uniform Components for Afghan Military or Afghan National Police.""]","[""Main navigation"", ""Breadcrumb"", ""DFARS Parts"", ""Regulations DFARS Menu"", ""DFARS Appendix"", ""Regulations DFARS Appendix Menu"", ""Upper Footer Menu""]","[""FAR""]","[""Favorite"", ""X""]",[],[],,2024-08-20 14:07:03 -https://acworth-ga.gov/events/category/police-event/,events/category/police-event,"Events from November 16 – November 16 › police-event › – City of Acworth, GA",,"Home - City of Acworth, GA",200,"[""police-event""]","[""Events Search and Views Navigation"", ""November 2024""]","[""Event Views Navigation""]",[],[],[],Open toolbar Accessibility Tools Accessibility Tools Increase Text Increase Text Decrease Text Decrease Text Grayscale Grayscale High Contrast High Contrast Negative Contrast Negative Contrast Light Background Light Background Links Underline Links Underline Readable Font Readable Font Reset Reset Accessibility Tools Increase Text Increase Text Decrease Text Decrease Text Grayscale Grayscale High Contrast High Contrast Negative Contrast Negative Contrast Light Background Light Background Links Underline Links Underline Readable Font Readable Font Reset Reset ,2024-08-20 14:07:03 -https://acworth-ga.gov/events/tag/police-department/,events/tag/police-department,"Events from November 16 – November 16 – City of Acworth, GA",,"Home - City of Acworth, GA",200,"[""police-department""]","[""Events Search and Views Navigation"", ""November 2024""]","[""Event Views Navigation""]",[],[],[],Open toolbar Accessibility Tools Accessibility Tools Increase Text Increase Text Decrease Text Decrease Text Grayscale Grayscale High Contrast High Contrast Negative Contrast Negative Contrast Light Background Light Background Links Underline Links Underline Readable Font Readable Font Reset Reset Accessibility Tools Increase Text Increase Text Decrease Text Decrease Text Grayscale Grayscale High Contrast High Contrast Negative Contrast Negative Contrast Light Background Light Background Links Underline Links Underline Readable Font Readable Font Reset Reset ,2024-08-20 14:07:03 -https://acworth-ga.gov/faq-items/i-got-a-ticket-from-an-acworth-police-officer-what-do-i-do/,faq-items/i-got-a-ticket-from-an-acworth-police-officer-what-do-i-do,"I got a ticket from an Acworth Police officer. What do I do? - City of Acworth, GA",There is a court date listed on the citation. This is an arraignment date. You will be asked to enter a plea at this time. Please be on time for your appointed court date as the judge will give you a lot of valuable information at the opening of the court session.,"Home - City of Acworth, GA",200,"[""I got a ticket from an Acworth Police officer. What do I do?""]",[],[],"[""Please Feel Free to Share This Story:""]",[],[],"Open toolbar Accessibility Tools Accessibility Tools Increase Text Increase Text Decrease Text Decrease Text Grayscale Grayscale High Contrast High Contrast Negative Contrast Negative Contrast Light Background Light Background Links Underline Links Underline Readable Font Readable Font Reset Reset Accessibility Tools Increase Text Increase Text Decrease Text Decrease Text Grayscale Grayscale High Contrast High Contrast Negative Contrast Negative Contrast Light Background Light Background Links Underline Links Underline Readable Font Readable Font Reset Reset Previous Next I got a ticket from an Acworth Police officer. What do I do? There is a court date listed on the citation. This is an arraignment date. You will be asked to enter a plea at this time. Please be on time for your appointed court date as the judge will give you a lot of valuable information at the opening of the court session. Mike Brooks 2024-07-29T13:52:05-04:00 September 15, 2023 | Please Feel Free to Share This Story: Facebook X LinkedIn Pinterest Email 4415 Center Street, Acworth GA 30101 Phone Directory Contact Us © 2023 City of Acworth Acworth is located in the foothills of the North Georgia mountains and is nestled along the banks of Lake Acworth and Lake Allatoona, hence its nickname “The Lake City.” The city boasts a rich history, a charming downtown, abundant outdoor recreational activities, a vibrant restaurant scene, and an active festival and events calendar. Acworth is one of the best, family-friendly destinations in the Atlanta region. Come discover why You’re Welcome in Acworth! ESS | Webmail | Handbook | Laserfiche | Login Previous Next I got a ticket from an Acworth Police officer. What do I do? There is a court date listed on the citation. This is an arraignment date. You will be asked to enter a plea at this time. Please be on time for your appointed court date as the judge will give you a lot of valuable information at the opening of the court session. Mike Brooks 2024-07-29T13:52:05-04:00 September 15, 2023 | Please Feel Free to Share This Story: Facebook X LinkedIn Pinterest Email 4415 Center Street, Acworth GA 30101 Phone Directory Contact Us © 2023 City of Acworth Acworth is located in the foothills of the North Georgia mountains and is nestled along the banks of Lake Acworth and Lake Allatoona, hence its nickname “The Lake City.” The city boasts a rich history, a charming downtown, abundant outdoor recreational activities, a vibrant restaurant scene, and an active festival and events calendar. Acworth is one of the best, family-friendly destinations in the Atlanta region. Come discover why You’re Welcome in Acworth! ESS | Webmail | Handbook | Laserfiche | Login ",2024-08-20 14:07:03 -https://acworth-ga.gov/presentation-introducing-three-new-civilian-members-of-the-acworth-police-department/,presentation-introducing-three-new-civilian-members-of-the-acworth-police-department,"Presentation Introducing Three New Civilian Members of the Acworth Police Department - City of Acworth, GA","At the Thursday, May 18 regular city council meeting, Chief Evans introduced three new civilian members of the Acworth Police Department. Macey Williams serves as Crime Analyst, Madison Harrison serves as Evidence Tech, and Emily Hall serves as Victim Advocate. These employees play an integral role in assisting officers with solving cases, and Chief Evans","Home - City of Acworth, GA",200,[],[],"[""Presentation Introducing Three New Civilian Members of the Acworth Police Department""]","[""Share this on Social Media""]",[],[],"Open toolbar Accessibility Tools Accessibility Tools Increase Text Increase Text Decrease Text Decrease Text Grayscale Grayscale High Contrast High Contrast Negative Contrast Negative Contrast Light Background Light Background Links Underline Links Underline Readable Font Readable Font Reset Reset Accessibility Tools Increase Text Increase Text Decrease Text Decrease Text Grayscale Grayscale High Contrast High Contrast Negative Contrast Negative Contrast Light Background Light Background Links Underline Links Underline Readable Font Readable Font Reset Reset PHONE DIRECTORY RESOURCES Search for: Search Button PHONE DIRECTORY RESOURCES Search for: Search Button NEWS DEPARTMENTS GOVERNANCE & DEVELOPMENT Development Clerks Office Court Services DDA, Tourism, and Historic Preservation OPERATIONS Parks, Recreation, and Community Resources Power, Public Works, and Stormwater SUPPORT SERVICES Administration Customer Service Human Resources Finance Information Technology PUBLIC SAFETY Acworth Police RESIDENTS Public Art Master Plan Application for Boards & Commissions Board of Aldermen Customer Service Parks, Recreation, and Community Resources Historic Acworth Master Fee Schedule E-News Sign Up Online Payments BUSINESS Bids & Projects E-Verify Permits, Applications, & Ordinances City Code of Ordinances Master Fee Schedule Start a Business EVENTS VISIT ACWORTH NEWS DEPARTMENTS GOVERNANCE & DEVELOPMENT Development Clerks Office Court Services DDA, Tourism, and Historic Preservation OPERATIONS Parks, Recreation, and Community Resources Power, Public Works, and Stormwater SUPPORT SERVICES Administration Customer Service Human Resources Finance Information Technology PUBLIC SAFETY Acworth Police RESIDENTS Public Art Master Plan Application for Boards & Commissions Board of Aldermen Customer Service Parks, Recreation, and Community Resources Historic Acworth Master Fee Schedule E-News Sign Up Online Payments BUSINESS Bids & Projects E-Verify Permits, Applications, & Ordinances City Code of Ordinances Master Fee Schedule Start a Business EVENTS VISIT ACWORTH Presentation Introducing Three New Civilian Members of the Acworth Police Department Published On: May 18, 2023 At the Thursday, May 18 regular city council meeting, Chief Evans introduced three new civilian members of the Acworth Police Department. Macey Williams serves as Crime Analyst, Madison Harrison serves as Evidence Tech, and Emily Hall serves as Victim Advocate. These employees play an integral role in assisting officers with solving cases, and Chief Evans was pleased to share with Mayor Allegood and Acworth’s Aldermen how important their new positions are in supporting both the Acworth Police Department and the community as a whole. Share this on Social Media 4415 Center Street, Acworth GA 30101 Phone Directory Contact Us © 2023 City of Acworth Acworth is located in the foothills of the North Georgia mountains and is nestled along the banks of Lake Acworth and Lake Allatoona, hence its nickname “The Lake City.” The city boasts a rich history, a charming downtown, abundant outdoor recreational activities, a vibrant restaurant scene, and an active festival and events calendar. Acworth is one of the best, family-friendly destinations in the Atlanta region. Come discover why You’re Welcome in Acworth! ESS | Webmail | Handbook | Laserfiche | Login ",2024-08-20 14:07:03 -https://acworth-ga.gov/team_member/police-department-records/,team_member/police-department-records,"Police Department Records - City of Acworth, GA",,"Home - City of Acworth, GA",200,"[""Police Department Records""]",[],[],"[""Please Feel Free to Share This Story:""]",[],[],"Open toolbar Accessibility Tools Accessibility Tools Increase Text Increase Text Decrease Text Decrease Text Grayscale Grayscale High Contrast High Contrast Negative Contrast Negative Contrast Light Background Light Background Links Underline Links Underline Readable Font Readable Font Reset Reset Accessibility Tools Increase Text Increase Text Decrease Text Decrease Text Grayscale Grayscale High Contrast High Contrast Negative Contrast Negative Contrast Light Background Light Background Links Underline Links Underline Readable Font Readable Font Reset Reset PHONE DIRECTORY RESOURCES Search for: Search Button PHONE DIRECTORY RESOURCES Search for: Search Button NEWS DEPARTMENTS GOVERNANCE & DEVELOPMENT Development Clerks Office Court Services DDA, Tourism, and Historic Preservation OPERATIONS Parks, Recreation, and Community Resources Power, Public Works, and Stormwater SUPPORT SERVICES Administration Customer Service Human Resources Finance Information Technology PUBLIC SAFETY Acworth Police RESIDENTS Public Art Master Plan Application for Boards & Commissions Board of Aldermen Customer Service Parks, Recreation, and Community Resources Historic Acworth Master Fee Schedule E-News Sign Up Online Payments BUSINESS Bids & Projects E-Verify Permits, Applications, & Ordinances City Code of Ordinances Master Fee Schedule Start a Business EVENTS VISIT ACWORTH NEWS DEPARTMENTS GOVERNANCE & DEVELOPMENT Development Clerks Office Court Services DDA, Tourism, and Historic Preservation OPERATIONS Parks, Recreation, and Community Resources Power, Public Works, and Stormwater SUPPORT SERVICES Administration Customer Service Human Resources Finance Information Technology PUBLIC SAFETY Acworth Police RESIDENTS Public Art Master Plan Application for Boards & Commissions Board of Aldermen Customer Service Parks, Recreation, and Community Resources Historic Acworth Master Fee Schedule E-News Sign Up Online Payments BUSINESS Bids & Projects E-Verify Permits, Applications, & Ordinances City Code of Ordinances Master Fee Schedule Start a Business EVENTS VISIT ACWORTH Previous Next Police Department Records Mike Brooks 2023-12-18T10:02:20-05:00 December 18, 2023 | Please Feel Free to Share This Story: Facebook X LinkedIn Pinterest Email 4415 Center Street, Acworth GA 30101 Phone Directory Contact Us © 2023 City of Acworth Acworth is located in the foothills of the North Georgia mountains and is nestled along the banks of Lake Acworth and Lake Allatoona, hence its nickname “The Lake City.” The city boasts a rich history, a charming downtown, abundant outdoor recreational activities, a vibrant restaurant scene, and an active festival and events calendar. Acworth is one of the best, family-friendly destinations in the Atlanta region. Come discover why You’re Welcome in Acworth! ESS | Webmail | Handbook | Laserfiche | Login ",2024-08-20 14:07:03 -https://www.ada.gov/_pages/redirects/illinois_state_police/,_pages/redirects/illinois_state_police,SETTLEMENT AGREEMENT BETWEEN THE UNITED STATES AND ILLINOIS STATE POLICE,"The ADA Home Page provides access to Americans with Disabilities Act (ADA) regulations for businesses and State and local governments, technical assistance materials, ADA Standards for Accessible Design, links to Federal agencies with ADA responsibilities and information, updates on new ADA requirements, streaming video, information about Department of Justice ADA settlement agreements, consent decrees, and enforcement activities and access to Freedom of Information Act (FOIA) ADA material",The Americans with Disabilities Act | ADA.gov,200,"[""SETTLEMENT AGREEMENT BETWEEN THE UNITED STATES \n OF AMERICA AND AND ILLINOIS STATE POLICE""]","[""I. BACKGROUND"", ""II. GENERAL AGREEMENT"", ""III. SPECIFIC REMEDIAL RELIEF"", ""IV. IMPLEMENTATION AND ENFORCEMENT""]",[],[],[],[],,2024-08-20 14:07:03 -https://www.ada.gov/policeinfo.htm,policeinfo.htm,American's with Disabilities Act: Information for Law Enforcement,"The ADA Home Page provides access to Americans with Disabilities Act (ADA) regulations for businesses and State and local governments, technical assistance materials, ADA Standards for Accessible Design, links to Federal agencies with ADA responsibilities and information, updates on new ADA requirements, streaming video, information about Department of Justice ADA settlement agreements, consent decrees, and enforcement activities and access to Freedom of Information Act (FOIA) ADA material",The Americans with Disabilities Act | ADA.gov,200,"[""Americans with Disabilites Act Information for Law Enforcement""]","[""PUBLICATIONS""]",[],[],[],[],"Americans with Disabilites Act Information for Law Enforcement How do you interview a witness who is deaf?  How do you assist a person who is having a seizure?  How do you transport a suspect who uses a wheelchair?  Under the Americans with Disabilities Act (ADA), people who have disabilities are entitled to the same services law enforcement provides to anyone else. They may not be excluded or segregated from services, be denied services, or otherwise be treated differently than other people.  The following compliance assistance materials will help state and local law enforcement officers understand how to interact with victims, witnesses, suspects, and others who have disabilities. PUBLICATIONS Communicating with People Who Are Deaf or Hard of Hearing:  ADA Guide for Law Enforcement Officers - This 8-panel pocket guide provides basic information for officers about communicating effectively with people who are deaf or hard of hearing. Guide for Officers Model Policy for Law Enforcement on Communicating with People Who Are Deaf or Hard of Hearing - This 4-page document serves as a model for law enforcement agencies when adopting a policy on effective communication with people who are deaf or hard of hearing.  Agencies are encouraged to download and adapt the policy to suit their needs. Model Policy (PDF) | Model Policy (HTML) Commonly Asked Questions about the Americans with Disabilities Act and Law Enforcement - This 12-page fact sheet answers frequent questions about the ADA and its effect on law enforcement services involving people with disabilities. Commonly Asked Questions (PDF) | Commonly Asked Questions (HTML) Questions and Answers: The Americans with Disabilities Act and Hiring Police Officers - This 5-page fact sheet answers frequent questions about the ADA and its impact on law enforcement officers with disabilities. Questions and Answers (PDF) | Questions and Answers (HTML) Additional ADA information for state and local government agencies including law enforcement ADA Regulations | Other Publications December 1, 2008 ",2024-08-20 14:07:03 -https://www.ada.gov/policevideo/policedialupgallery.htm,policevideo/policedialupgallery.htm,,,The Americans with Disabilities Act | ADA.gov,404,,,,,,,,2024-08-20 14:07:03 -https://www.adamscountypa.gov/departments/victimwitness/police-departments,departments/victimwitness/police-departments,Adams County PA - Police Departments,,Adams County PA - Official Website,200,"[""Police Departments"", ""Police Departments""]",[],[],[],[],[],[Skip to Content] ,2024-08-20 14:07:03 -https://www.adamscountypa.gov/police-14c9658f036316bb91289647492de2ae/narema/contact-us,police-14c9658f036316bb91289647492de2ae/narema/contact-us,Adams County PA - Contact Us,,Adams County PA - Official Website,200,"[""Contact Us"", ""Contact Us""]","[""Emergency Dial 9-1-1  -   Non-Emergency 717-334-8603""]",[],[],[],[],[Skip to Content] ,2024-08-20 14:07:03 -https://www.adamscountypa.gov/police/earpd/calendar/police-department-commission-meeting,police/earpd/calendar/police-department-commission-meeting,Adams County PA - Police Department Commission Meeting,,Adams County PA - Official Website,200,"[""Police Department Commission Meeting""]",[],[],[],[],[],"[Skip to Content] Search ✖ Home Services Locations Powered by Translate Log in Register ✖ Commissioners Board of Commissioners Commissioners Office Elections and Voters Registration Human Resources Solicitor Tax Services Veterans Affairs County Services Board of Commissioners Meetings County Budget Employment Opportunites Open Records Right to Know Parcel Locator - Interactive Mapping Pay Delinquent Taxes Register to Vote Veterans Services Controller's Fraud Hotline About Adams County Adams County Broadband Taskforce Adams County Profile Adams County School Districts Adams County Plans, Studies, and Publications Adams County Tax Collectors Adams Economic Alliance Land Conservancy of Adams County Penn State Extension Office of Adams County Courts 51st Judicial District Court Court of Common Pleas Court Administration Magisterial District Judges Magisterial District Judges Home District Court 51-3-01 District Court 51-3-02 District Court 51-3-03 District Court 51-3-04 Court Departments Criminal Justice Advisory Board (CJAB) Domestic Relations Section Law Library Operational Services Probation Services County Government County Administration Adult Correction Complex Building and Maintenance​​ Children and Youth Services​​ Conservation District​ Department of Emergency Services​ Elections and Voter Registration​ Human Resources Information Technology Office of Budget and Purchasing Office of Planning and Development​ ​ Protective Services Public Defender Security Solicitor Tax Services​​​ Veterans Affairs Victim Witness Elected Officials Clerk of Court​ ​ Clerk of Orphans' Court ​ ​ Controller​​ ​ Coroner​​ ​ District Attorney ​ Prothonotary Recorder of Deeds ​ Register of Wills​ ​ Sheriff ​ Treasurer ​ Municipalities Boroughs Abbottstown Borough Arendtsville Borough Bendersville Borough Biglerville Borough Bonneauville Borough Carroll Valley Borough East Berlin Borough Fairfield Borough Gettysburg Borough Littlestown Borough New Oxford Borough McSherrystown Borough York Springs Borough Townships Berwick Township Butler Township Conewago Township Cumberland Township Franklin Township Freedom Township Germany Township Hamiltonban Township Hamilton Township Highland Township Huntington Township Latimore Township Liberty Township Menallen Township Mt. Joy Township Mt. Pleasant Township Oxford Township Reading Township Straban Township Tyrone Township Union Township ​Associations Council of Government Association of Borough Officials Association of Township Officials York/Adams MH IDD Program Adams County Volunteer Emergency Services Northern Adams Regional Emergency Management Agency Police Department (EARPD) Search Search Police Department Commission Meeting Start Date: Sunday, January 1, 2023 End Date: Sunday, December 31, 2023 Location: Eastern Adams Regional Police Department - 110 N Berlin Road Start Time: 4:00 PM Back to previous page Resources County by Location County Coat of Arms Privacy Statement Terms of Use Navigation Commissioners County Government Courts Municipalities Services Courts Self-Help Center Election Resources Employment Office of Open Records Tax Services​​​ Copyright 2024 ",2024-08-20 14:07:03 -https://www.adamscountypa.gov/police/earpd/calendar/police-department-commission-meeting/07-18-2023-04-00-00-pm-police-department-commission-meeting,police/earpd/calendar/police-department-commission-meeting/07-18-2023-04-00-00-pm-police-department-commission-meeting,Adams County PA - 07/18/2023 04:00:00 PM Police Department Commission Meeting,,Adams County PA - Official Website,200,"[""Police Department Commission Meeting""]",[],[],[],[],[],"[Skip to Content] Search ✖ Home Services Locations Powered by Translate Log in Register ✖ Commissioners Board of Commissioners Commissioners Office Elections and Voters Registration Human Resources Solicitor Tax Services Veterans Affairs County Services Board of Commissioners Meetings County Budget Employment Opportunites Open Records Right to Know Parcel Locator - Interactive Mapping Pay Delinquent Taxes Register to Vote Veterans Services Controller's Fraud Hotline About Adams County Adams County Broadband Taskforce Adams County Profile Adams County School Districts Adams County Plans, Studies, and Publications Adams County Tax Collectors Adams Economic Alliance Land Conservancy of Adams County Penn State Extension Office of Adams County Courts 51st Judicial District Court Court of Common Pleas Court Administration Magisterial District Judges Magisterial District Judges Home District Court 51-3-01 District Court 51-3-02 District Court 51-3-03 District Court 51-3-04 Court Departments Criminal Justice Advisory Board (CJAB) Domestic Relations Section Law Library Operational Services Probation Services County Government County Administration Adult Correction Complex Building and Maintenance​​ Children and Youth Services​​ Conservation District​ Department of Emergency Services​ Elections and Voter Registration​ Human Resources Information Technology Office of Budget and Purchasing Office of Planning and Development​ ​ Protective Services Public Defender Security Solicitor Tax Services​​​ Veterans Affairs Victim Witness Elected Officials Clerk of Court​ ​ Clerk of Orphans' Court ​ ​ Controller​​ ​ Coroner​​ ​ District Attorney ​ Prothonotary Recorder of Deeds ​ Register of Wills​ ​ Sheriff ​ Treasurer ​ Municipalities Boroughs Abbottstown Borough Arendtsville Borough Bendersville Borough Biglerville Borough Bonneauville Borough Carroll Valley Borough East Berlin Borough Fairfield Borough Gettysburg Borough Littlestown Borough New Oxford Borough McSherrystown Borough York Springs Borough Townships Berwick Township Butler Township Conewago Township Cumberland Township Franklin Township Freedom Township Germany Township Hamiltonban Township Hamilton Township Highland Township Huntington Township Latimore Township Liberty Township Menallen Township Mt. Joy Township Mt. Pleasant Township Oxford Township Reading Township Straban Township Tyrone Township Union Township ​Associations Council of Government Association of Borough Officials Association of Township Officials York/Adams MH IDD Program Adams County Volunteer Emergency Services Northern Adams Regional Emergency Management Agency Police Department (EARPD) Search Search Police Department Commission Meeting Start Date: Tuesday, July 18, 2023 End Date: Tuesday, July 18, 2023 Location: Eastern Adams Regional Police Department - 110 N Berlin Road Start Time: 4:00 PM Back to previous page Resources County by Location County Coat of Arms Privacy Statement Terms of Use Navigation Commissioners County Government Courts Municipalities Services Courts Self-Help Center Election Resources Employment Office of Open Records Tax Services​​​ Copyright 2024 ",2024-08-20 14:07:03 -https://cumberland.adamscountypa.gov/departments/contactuspolice/policecontacts/brian-weikert,departments/contactuspolice/policecontacts/brian-weikert,Adams County Municipality Cumberland Township - Brian Weikert,,Adams County Municipality Cumberland Township - Official Website,200,[],[],[],[],[],[],"[Skip to Content] Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Brian Weikert Patrolman Email: bweikert@cumberlandtwppa.gov Extension: 450 Direct Phone: 717-334-6485 Ext. 450 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Search ✖ Log in Register ✖ Log in Register Home Township Meetings Township Services Township Land Use Search Search Search Brian Weikert Patrolman Email: bweikert@cumberlandtwppa.gov Extension: 450 Direct Phone: 717-334-6485 Ext. 450 Brian Weikert Patrolman Email: bweikert@cumberlandtwppa.gov Extension: 450 Direct Phone: 717-334-6485 Ext. 450 Brian Weikert Patrolman Email: bweikert@cumberlandtwppa.gov Extension: 450 Direct Phone: 717-334-6485 Ext. 450 Brian Weikert Patrolman Email: bweikert@cumberlandtwppa.gov Extension: 450 Direct Phone: 717-334-6485 Ext. 450 Brian Weikert Patrolman Email: bweikert@cumberlandtwppa.gov Extension: 450 Direct Phone: 717-334-6485 Ext. 450 Brian Weikert Patrolman Email: bweikert@cumberlandtwppa.gov Email: Extension: 450 Extension: Direct Phone: 717-334-6485 Ext. 450 Direct Phone: Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Copyright 2024 Copyright 2024 Copyright 2024 ",2024-08-20 14:07:03 -https://cumberland.adamscountypa.gov/departments/contactuspolice/policecontacts/daniel-barbagelle,departments/contactuspolice/policecontacts/daniel-barbagelle,Adams County Municipality Cumberland Township - Daniel Barbagello,,Adams County Municipality Cumberland Township - Official Website,200,[],[],[],[],[],[],"[Skip to Content] Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Daniel Barbagello Patrolman First Class Email: dbarbagello@cumberlandtwppa.gov Extension: 404 Direct Phone: 717-334-6485 Ext. 404 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Search ✖ Log in Register ✖ Log in Register Home Township Meetings Township Services Township Land Use Search Search Search Daniel Barbagello Patrolman First Class Email: dbarbagello@cumberlandtwppa.gov Extension: 404 Direct Phone: 717-334-6485 Ext. 404 Daniel Barbagello Patrolman First Class Email: dbarbagello@cumberlandtwppa.gov Extension: 404 Direct Phone: 717-334-6485 Ext. 404 Daniel Barbagello Patrolman First Class Email: dbarbagello@cumberlandtwppa.gov Extension: 404 Direct Phone: 717-334-6485 Ext. 404 Daniel Barbagello Patrolman First Class Email: dbarbagello@cumberlandtwppa.gov Extension: 404 Direct Phone: 717-334-6485 Ext. 404 Daniel Barbagello Patrolman First Class Email: dbarbagello@cumberlandtwppa.gov Extension: 404 Direct Phone: 717-334-6485 Ext. 404 Daniel Barbagello Patrolman First Class Email: dbarbagello@cumberlandtwppa.gov Email: Extension: 404 Extension: Direct Phone: 717-334-6485 Ext. 404 Direct Phone: Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Copyright 2024 Copyright 2024 Copyright 2024 ",2024-08-20 14:07:03 -https://cumberland.adamscountypa.gov/departments/contactuspolice/policecontacts/eric-yost,departments/contactuspolice/policecontacts/eric-yost,Adams County Municipality Cumberland Township - Eric Yost,,Adams County Municipality Cumberland Township - Official Website,200,[],[],[],[],[],[],"[Skip to Content] Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Eric Yost Patrolman Email: eyost@cumberlandtwppa.gov Extension: 4400 Direct Phone: 717-334-6485 Ext. 4400 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Search ✖ Log in Register ✖ Log in Register Home Township Meetings Township Services Township Land Use Search Search Search Eric Yost Patrolman Email: eyost@cumberlandtwppa.gov Extension: 4400 Direct Phone: 717-334-6485 Ext. 4400 Eric Yost Patrolman Email: eyost@cumberlandtwppa.gov Extension: 4400 Direct Phone: 717-334-6485 Ext. 4400 Eric Yost Patrolman Email: eyost@cumberlandtwppa.gov Extension: 4400 Direct Phone: 717-334-6485 Ext. 4400 Eric Yost Patrolman Email: eyost@cumberlandtwppa.gov Extension: 4400 Direct Phone: 717-334-6485 Ext. 4400 Eric Yost Patrolman Email: eyost@cumberlandtwppa.gov Extension: 4400 Direct Phone: 717-334-6485 Ext. 4400 Eric Yost Patrolman Email: eyost@cumberlandtwppa.gov Email: Extension: 4400 Extension: Direct Phone: 717-334-6485 Ext. 4400 Direct Phone: Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Copyright 2024 Copyright 2024 Copyright 2024 ",2024-08-20 14:07:03 -https://cumberland.adamscountypa.gov/departments/contactuspolice/policecontacts/josh-goodling,departments/contactuspolice/policecontacts/josh-goodling,Adams County Municipality Cumberland Township - Josh Goodling,,Adams County Municipality Cumberland Township - Official Website,200,[],[],[],[],[],[],"[Skip to Content] Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Josh Goodling Sergeant Email: jgoodling@cumberlandtwppa.gov Extension: 407 Direct Phone: 717-334-6485 Ext. 407 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Search ✖ Log in Register ✖ Log in Register Home Township Meetings Township Services Township Land Use Search Search Search Josh Goodling Sergeant Email: jgoodling@cumberlandtwppa.gov Extension: 407 Direct Phone: 717-334-6485 Ext. 407 Josh Goodling Sergeant Email: jgoodling@cumberlandtwppa.gov Extension: 407 Direct Phone: 717-334-6485 Ext. 407 Josh Goodling Sergeant Email: jgoodling@cumberlandtwppa.gov Extension: 407 Direct Phone: 717-334-6485 Ext. 407 Josh Goodling Sergeant Email: jgoodling@cumberlandtwppa.gov Extension: 407 Direct Phone: 717-334-6485 Ext. 407 Josh Goodling Sergeant Email: jgoodling@cumberlandtwppa.gov Extension: 407 Direct Phone: 717-334-6485 Ext. 407 Josh Goodling Sergeant Email: jgoodling@cumberlandtwppa.gov Email: Extension: 407 Extension: Direct Phone: 717-334-6485 Ext. 407 Direct Phone: Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Copyright 2024 Copyright 2024 Copyright 2024 ",2024-08-20 14:07:03 -https://cumberland.adamscountypa.gov/departments/contactuspolice/policecontacts/joshua-rosenberger,departments/contactuspolice/policecontacts/joshua-rosenberger,Adams County Municipality Cumberland Township - Joshua Rosenberger,,Adams County Municipality Cumberland Township - Official Website,200,[],[],[],[],[],[],"[Skip to Content] Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Joshua Rosenberger Sergeant Email: jrosenberger@cumberlandtwppa.gov Extension: 402 Direct Phone: 717-334-6485 Ext. 402 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Search ✖ Log in Register ✖ Log in Register Home Township Meetings Township Services Township Land Use Search Search Search Joshua Rosenberger Sergeant Email: jrosenberger@cumberlandtwppa.gov Extension: 402 Direct Phone: 717-334-6485 Ext. 402 Joshua Rosenberger Sergeant Email: jrosenberger@cumberlandtwppa.gov Extension: 402 Direct Phone: 717-334-6485 Ext. 402 Joshua Rosenberger Sergeant Email: jrosenberger@cumberlandtwppa.gov Extension: 402 Direct Phone: 717-334-6485 Ext. 402 Joshua Rosenberger Sergeant Email: jrosenberger@cumberlandtwppa.gov Extension: 402 Direct Phone: 717-334-6485 Ext. 402 Joshua Rosenberger Sergeant Email: jrosenberger@cumberlandtwppa.gov Extension: 402 Direct Phone: 717-334-6485 Ext. 402 Joshua Rosenberger Sergeant Email: jrosenberger@cumberlandtwppa.gov Email: Extension: 402 Extension: Direct Phone: 717-334-6485 Ext. 402 Direct Phone: Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Copyright 2024 Copyright 2024 Copyright 2024 ",2024-08-20 14:07:03 -https://cumberland.adamscountypa.gov/departments/contactuspolice/policecontacts/lane-hartley,departments/contactuspolice/policecontacts/lane-hartley,Adams County Municipality Cumberland Township - Lane Hartley,,Adams County Municipality Cumberland Township - Official Website,200,[],[],[],[],[],[],"[Skip to Content] Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Lane Hartley Patrolman Email: lhartley@cumberlandtwppa.gov Extension: 408 Direct Phone: 717-334-6485 Ext. 408 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Search ✖ Log in Register ✖ Log in Register Home Township Meetings Township Services Township Land Use Search Search Search Lane Hartley Patrolman Email: lhartley@cumberlandtwppa.gov Extension: 408 Direct Phone: 717-334-6485 Ext. 408 Lane Hartley Patrolman Email: lhartley@cumberlandtwppa.gov Extension: 408 Direct Phone: 717-334-6485 Ext. 408 Lane Hartley Patrolman Email: lhartley@cumberlandtwppa.gov Extension: 408 Direct Phone: 717-334-6485 Ext. 408 Lane Hartley Patrolman Email: lhartley@cumberlandtwppa.gov Extension: 408 Direct Phone: 717-334-6485 Ext. 408 Lane Hartley Patrolman Email: lhartley@cumberlandtwppa.gov Extension: 408 Direct Phone: 717-334-6485 Ext. 408 Lane Hartley Patrolman Email: lhartley@cumberlandtwppa.gov Email: Extension: 408 Extension: Direct Phone: 717-334-6485 Ext. 408 Direct Phone: Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Copyright 2024 Copyright 2024 Copyright 2024 ",2024-08-20 14:07:03 -https://cumberland.adamscountypa.gov/departments/contactuspolice/policecontacts/ryan-eiker,departments/contactuspolice/policecontacts/ryan-eiker,Adams County Municipality Cumberland Township - Ryan Eiker,,Adams County Municipality Cumberland Township - Official Website,200,[],[],[],[],[],[],"[Skip to Content] Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Ryan Eiker Patrolman First Class Email: reiker@cumberlandtwppa.gov Extension: 403 Direct Phone: 717-334-6485 Ext. 403 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Search ✖ Log in Register ✖ Log in Register Home Township Meetings Township Services Township Land Use Search Search Search Ryan Eiker Patrolman First Class Email: reiker@cumberlandtwppa.gov Extension: 403 Direct Phone: 717-334-6485 Ext. 403 Ryan Eiker Patrolman First Class Email: reiker@cumberlandtwppa.gov Extension: 403 Direct Phone: 717-334-6485 Ext. 403 Ryan Eiker Patrolman First Class Email: reiker@cumberlandtwppa.gov Extension: 403 Direct Phone: 717-334-6485 Ext. 403 Ryan Eiker Patrolman First Class Email: reiker@cumberlandtwppa.gov Extension: 403 Direct Phone: 717-334-6485 Ext. 403 Ryan Eiker Patrolman First Class Email: reiker@cumberlandtwppa.gov Extension: 403 Direct Phone: 717-334-6485 Ext. 403 Ryan Eiker Patrolman First Class Email: reiker@cumberlandtwppa.gov Email: Extension: 403 Extension: Direct Phone: 717-334-6485 Ext. 403 Direct Phone: Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Copyright 2024 Copyright 2024 Copyright 2024 ",2024-08-20 14:07:03 -https://cumberland.adamscountypa.gov/home/newsinformation/now-hiring-police-department,home/newsinformation/now-hiring-police-department,Adams County Municipality Cumberland Township - Official Website,Official Website,Adams County Municipality Cumberland Township - Official Website,200,"[""Now Hiring - Police Department""]",[],[],[],[],[],"[Skip to Content] Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Now Hiring - Police Department News Date: Tuesday, June 18, 2024 Full Time Police Cadet Full Time Officer Application Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Search ✖ Log in Register ✖ Home Township Meetings Township Services Township Land Use Search Search Search ✖ Log in Register ✖ Log in Register Home Township Meetings Township Services Township Land Use Search Search Search Now Hiring - Police Department News Date: Tuesday, June 18, 2024 Full Time Police Cadet Full Time Officer Application Now Hiring - Police Department Now Hiring - Police Department News Date: Tuesday, June 18, 2024 News Date: Tuesday, June 18, 2024 News Date: Tuesday, June 18, 2024 Full Time Police Cadet Full Time Officer Application Full Time Police Cadet Full Time Officer Application Full Time Police Cadet Full Time Officer Application Full Time Police Cadet Full Time Officer Application Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Cumberland Township 1370 Fairfield Road Gettysburg, PA 17325 Phone - 717-334-6485 Fax - 717-334-3632 Copyright 2024 Copyright 2024 Copyright 2024 Copyright 2024 ",2024-08-20 14:07:03 -https://cumberland.adamscountypa.gov/home/newsinformation/press-release-for-police-department-donations,home/newsinformation/press-release-for-police-department-donations,,,Adams County Municipality Cumberland Township - Official Website,404,,,,,,,,2024-08-20 14:07:03 diff --git a/annotation_pipeline/populate_labelstudio.py b/annotation_pipeline/populate_labelstudio.py deleted file mode 100644 index 49c673c2..00000000 --- a/annotation_pipeline/populate_labelstudio.py +++ /dev/null @@ -1,256 +0,0 @@ -""" -This Python script automates the process of crawling Common Crawl Corpus for relevant URLs, -scraping HTML content from those pages, -formatting the data as Label Studio tasks, -and uploading them to Label Studio for annotation. -""" - -import argparse -import configparser -import os -import subprocess -import sys -from http import HTTPStatus - -import pandas as pd -from huggingface_hub import hf_hub_download - -# The below code sets the working directory to be the root of the entire repository for module imports -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) - -from label_studio_interface.LabelStudioConfig import LabelStudioConfig -from label_studio_interface.LabelStudioAPIManager import LabelStudioAPIManager - -def run_subprocess(terminal_command: str): - """ - Runs subprocesses (e.g. common crawl and html tag collector) and handles their outputs + errors - """ - - process = subprocess.Popen(terminal_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1) - - with process.stdout, process.stderr: - for line in process.stdout: - print(line, end='') - for line in process.stderr: - print(line, end='') - - return_code = process.wait() - - stdout, stderr = process.communicate() - - return return_code, stdout, stderr - -def run_common_crawl(common_crawl_id: str, url: str, search_term: str, num_pages: str): - """ - Prompts terminal to run common crawl script provided the following: - Args: SEE def process_crawl() - - See Common Crawl Documentation @ https://github.com/Police-Data-Accessibility-Project/data-source-identification/blob/main/common_crawler/README.md - - CSV of crawled URL's uploaded to HuggingFace - """ - - common_crawl = f"python common_crawler/main.py {common_crawl_id} '{url}' {search_term} --config annotation_pipeline/config.ini --pages {num_pages}" - - return_code, stdout, stderr = run_subprocess(common_crawl) - - return return_code, stdout, stderr - -def run_tag_collector(filename: str): - """ - Prompts terminal to run tag collector on crawled URL's - filename: name of csv containing crawled URL's - - CSV of URL's + collected tags saved in ./labeled-source-text.csv - """ - tag_collector = f"python3 html_tag_collector/collector.py annotation_pipeline/data/{filename} --render-javascript" - - return_code, stdout, stderr = run_subprocess(tag_collector) - - return return_code, stdout, stderr - -def csv_to_label_studio_tasks(csv_file_path: str, batch_id: str, output_name: str, record_type: str = None) -> list[dict]: - """ - Formats CSV into list[dict] with "data" key as labelstudio expects - csv_file_path: path to csv with labeled source text - batch_id: timestamp to append to all URL's in batch - output_name: saves tag_collected CSV + batch_info in data/tag_collector/{output_name} - """ - df = pd.read_csv(csv_file_path) - df['batch_id'] = [batch_id] * len(df) - df = df.fillna('') - os.makedirs("annotation_pipeline/data/tag_collector/", exist_ok=True) - df.to_csv("annotation_pipeline/data/tag_collector/" + output_name.replace("urls/", "", 1), index=False) - - #remove labeled-source-text.csv (updated and written to data/tag_collector) - if os.path.exists(csv_file_path): - os.remove(csv_file_path) - - tasks = [] - - if record_type: - for _, row in df.iterrows(): - task_data = row.to_dict() - task_predictions = { - "model_version": "record-type prediction", - "result": [ - { - "from_name": "record-type", - "to_name": "url", - "type": "choices", - "value": { - "choices": [record_type] - } - } - ] - } - - tasks.append({"data": task_data, "predictions": [task_predictions]}) - else: - tasks = [{"data": row.to_dict()} for _, row in df.iterrows()] - - return tasks - -def get_valid_record_types(file_path: str) -> set: - """ load file containing valid record types and return them as a set""" - with open(file_path, 'r') as file: - valid_record_types = {line.strip() for line in file} - return valid_record_types - -def get_huggingface_repo_id(config_file: str) -> str: - """ Returns HuggingFace REPO_ID (where unlabeled URLs are stashed) from config.ini file""" - - config = configparser.ConfigParser() - config.read(config_file) - - # Retrieve the huggingface_repo_id from the DEFAULT section - huggingface_repo_id = config['DEFAULT'].get('huggingface_repo_id') - - if huggingface_repo_id is None: - raise ValueError("huggingface_repo_id not found in the config file.") - - return huggingface_repo_id - -def process_crawl(common_crawl_id: str, url: str, search_term: str, num_pages: str) -> pd.Series: - """Initiated common crawl script and handles output for further processing - - Args: - common_crawl_id: string to specify which common crawl corpus to search - url: specify type of url to search for (e.g. *.gov for all .gov domains) - search_term: further refine search with keyword that must be matched in full URL - num_pages: number of pages to search (15,000 records per page) - - Returns: - batch_info (pd.Series): summary info of crawl, including filename of csv containing relevant URLs - """ - #run common crawl - crawl_return_code, crawl_stdout, crawl_stderr = run_common_crawl(common_crawl_id, url, search_term, num_pages) - - print(f"from populate label studio crawl error: crawl return {crawl_return_code}, crawl stdout {crawl_stdout}, crawl stderr {crawl_stderr}") - - #check success - if crawl_return_code != 0: - raise ValueError(f"Common crawl script failed:\n{crawl_stderr}") - - #print batch info to verify before continuing - batch_info = pd.read_csv("annotation_pipeline/data/batch_info.csv").iloc[-1] - print("Batch Info:\n" + f"{batch_info}") - - if(batch_info["Count"] == 0): - raise ValueError("Batch count is 0. Rerun to crawl more pages.") - - return batch_info - -def process_tag_collector(batch_info: pd.Series, FILENAME: str) -> str: - """ - Initiates tag collector script and creates a batch id for all samples - - Args: - batch_info (pd.Series): summary info for crawl - FILENAME (str): filename of csv to collect tags on - - Returns: - batch_id (str): a datetime stamp to track batches - """ - - #run tag collector - tag_collector_return_code, tag_collector_stdout, tag_collector_stderr = run_tag_collector(FILENAME) - - #check success - if tag_collector_return_code != 0: - raise ValueError(f"Tag collector script failed:\n{tag_collector_stderr}") - - #create batch_id from datetime (removes milliseconds) - datetime = batch_info["Datetime"] - batch_id = datetime[:datetime.find('.')] - - return batch_id - -def label_studio_upload(batch_id: str, FILENAME: str, record_type: str): - """ - Handles label studio task formatting and upload - """ - - #convert to label studio task format - data = csv_to_label_studio_tasks("labeled-source-text.csv", batch_id, FILENAME, record_type) - - # Load the configuration for the Label Studio API - config = LabelStudioConfig(".env") - if "REPLACE_WITH_YOUR_TOKEN" in config.authorization_token: - raise ValueError("Please replace the access token in .env with your own access token") - - # Create an API manager - api_manager = LabelStudioAPIManager(config) - - #import tasks - label_studio_response = api_manager.export_tasks_into_project(data) - - #check import success - if label_studio_response.status_code == HTTPStatus.CREATED: - labelstudio_url = api_manager.api_url_constructor.get_import_url().rstrip('/import') - print(f"Tasks successfully imported. Please access the project at {labelstudio_url} to perform review and annotation tasks") - else: - raise ValueError(f"Failed to import tasks. Response code: {label_studio_response.status_code}\n{label_studio_response.text}") - -def main(): - """ - This script automates the process of crawling for relevant URL's, - scraping HTML content from those pages, formatting the data as label studio tasks, - and uploading to label studio - """ - - parser = argparse.ArgumentParser(description='Process crawl arguments') - parser.add_argument('common_crawl_id', type=str, help='common crawl ID') - parser.add_argument('url', type=str, help='URL type to search for') - parser.add_argument('keyword', type=str, help='require this keyword in URL results') - parser.add_argument('--pages', type=str, required=True, help='number of pages to process') - parser.add_argument('--record-type', type=str, required=False, help='assumed record type for pre-annotation') - args = parser.parse_args() - - if args.record_type is not None: - valid_record_types = get_valid_record_types("annotation_pipeline/record_types.txt") - if args.record_type not in valid_record_types: - raise ValueError(f"Invalid record type: {args.record_type}. Must be one of {valid_record_types}") - return - - try: - # COMMON CRAWL - batch_info = process_crawl(args.common_crawl_id, args.url, args.keyword, args.pages) - #get urls from hugging face - REPO_ID = get_huggingface_repo_id("annotation_pipeline/config.ini") - FILENAME = "urls/" + batch_info["Filename"] + ".csv" - hf_hub_download(repo_id=REPO_ID, filename=FILENAME, repo_type="dataset", local_dir="annotation_pipeline/data/") - - # TAG COLLECTOR - batch_id = process_tag_collector(batch_info, FILENAME) - - # LABEL STUDIO UPLOAD - label_studio_upload(batch_id, FILENAME, args.record_type) - except ValueError as e: - print(f"Error: {e}") - return - -if __name__ == "__main__": - print("Running Annotation Pipeline...") - main() - diff --git a/annotation_pipeline/record_types.txt b/annotation_pipeline/record_types.txt deleted file mode 100644 index b12931d6..00000000 --- a/annotation_pipeline/record_types.txt +++ /dev/null @@ -1,36 +0,0 @@ -Accident Reports -Arrest Records -Calls for Service -Car GPS -Citations -Dispatch Logs -Dispatch Recordings -Field Contacts -Incident Reports -Misc Police Activity -Officer Involved Shootings -Stops -Surveys -Use of Force Reports -Vehicle Pursuits -Complaints & Misconduct -Daily Activity Logs -Training & Hiring Info -Personnel Records -Annual & Monthly Reports -Budgets & Finances -Contact Info & Agency Meta -Geographic -List of Data Sources -Policies & Contracts -Crime Maps & Reports -Crime Statistics -Media Bulletins -Records Request Info -Resources -Sex Offender Registry -Wanted Persons -Booking Reports -Court Cases -Incarceration Records -Poor Data Source \ No newline at end of file diff --git a/annotation_pipeline/requirements.txt b/annotation_pipeline/requirements.txt deleted file mode 100644 index 5ff85815..00000000 --- a/annotation_pipeline/requirements.txt +++ /dev/null @@ -1,13 +0,0 @@ -pandas==2.1.4 -python-dotenv~=1.0.1 -argparse~=1.1 -huggingface-hub~=0.22.2 -requests~=2.31.0 -requests_html>=0.10.0 -lxml~=5.1.0 -pyppeteer>=2.0.0 -beautifulsoup4>=4.12.3 -bs4~=0.0.2 -tqdm>=4.64.1 -polars~=0.20.10 -urllib3~=1.26.18 diff --git a/api/main.py b/api/main.py index 356467af..eeb3e8a8 100644 --- a/api/main.py +++ b/api/main.py @@ -1,64 +1,109 @@ from contextlib import asynccontextmanager +import aiohttp +import uvicorn from fastapi import FastAPI +from starlette.responses import RedirectResponse from api.routes.annotate import annotate_router from api.routes.batch import batch_router from api.routes.collector import collector_router +from api.routes.metrics import metrics_router +from api.routes.review import review_router from api.routes.root import root_router +from api.routes.search import search_router +from api.routes.task import task_router from api.routes.url import url_router from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.DatabaseClient import DatabaseClient +from collector_manager.AsyncCollectorManager import AsyncCollectorManager from core.AsyncCore import AsyncCore -from core.CoreLogger import CoreLogger +from core.AsyncCoreLogger import AsyncCoreLogger +from core.EnvVarManager import EnvVarManager from core.ScheduledTaskManager import AsyncScheduledTaskManager from core.SourceCollectorCore import SourceCollectorCore +from core.TaskManager import TaskManager from html_tag_collector.ResponseParser import HTMLResponseParser from html_tag_collector.RootURLCache import RootURLCache from html_tag_collector.URLRequestInterface import URLRequestInterface from hugging_face.HuggingFaceInterface import HuggingFaceInterface -from util.helper_functions import get_from_env +from pdap_access_manager import AccessManager +from pdap_api_client.PDAPClient import PDAPClient +from util.DiscordNotifier import DiscordPoster + @asynccontextmanager async def lifespan(app: FastAPI): + env_var_manager = EnvVarManager.get() + # Initialize shared dependencies - db_client = DatabaseClient() + db_client = DatabaseClient( + db_url=env_var_manager.get_postgres_connection_string() + ) + adb_client = AsyncDatabaseClient( + db_url=env_var_manager.get_postgres_connection_string(is_async=True) + ) await setup_database(db_client) + core_logger = AsyncCoreLogger(adb_client=adb_client) + + session = aiohttp.ClientSession() + source_collector_core = SourceCollectorCore( - core_logger=CoreLogger( - db_client=db_client - ), db_client=DatabaseClient(), ) - async_core = AsyncCore( - adb_client=AsyncDatabaseClient(), + task_manager = TaskManager( + adb_client=adb_client, huggingface_interface=HuggingFaceInterface(), url_request_interface=URLRequestInterface(), html_parser=HTMLResponseParser( root_url_cache=RootURLCache() + ), + discord_poster=DiscordPoster( + webhook_url=env_var_manager.discord_webhook_url + ), + pdap_client=PDAPClient( + access_manager=AccessManager( + email=env_var_manager.pdap_email, + password=env_var_manager.pdap_password, + api_key=env_var_manager.pdap_api_key, + session=session + ) ) ) + async_collector_manager = AsyncCollectorManager( + logger=core_logger, + adb_client=adb_client, + post_collection_function_trigger=task_manager.task_trigger + ) + + async_core = AsyncCore( + adb_client=adb_client, + task_manager=task_manager, + collector_manager=async_collector_manager + ) async_scheduled_task_manager = AsyncScheduledTaskManager(async_core=async_core) # Pass dependencies into the app state app.state.core = source_collector_core app.state.async_core = async_core app.state.async_scheduled_task_manager = async_scheduled_task_manager + app.state.logger = core_logger # Startup logic yield # Code here runs before shutdown # Shutdown logic (if needed) - app.state.core.shutdown() # Clean up resources, close connections, etc. + await core_logger.shutdown() + await async_core.shutdown() + await session.close() pass async def setup_database(db_client): # Initialize database if dev environment, otherwise apply migrations try: - get_from_env("DEV") db_client.init_db() except Exception as e: return @@ -67,12 +112,31 @@ async def setup_database(db_client): app = FastAPI( title="Source Collector API", description="API for collecting data sources", + docs_url='/api', version="0.1.0", lifespan=lifespan ) -app.include_router(root_router) -app.include_router(collector_router) -app.include_router(batch_router) -app.include_router(annotate_router) -app.include_router(url_router) \ No newline at end of file +@app.get("/docs", include_in_schema=False) +async def redirect_docs(): + return RedirectResponse(url="/api") + + +routers = [ + root_router, + collector_router, + batch_router, + annotate_router, + url_router, + task_router, + review_router, + search_router, + metrics_router +] + +for router in routers: + app.include_router(router) + + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/api/routes/annotate.py b/api/routes/annotate.py index 25eab1d3..95512a0b 100644 --- a/api/routes/annotate.py +++ b/api/routes/annotate.py @@ -1,9 +1,17 @@ -from fastapi import APIRouter, Depends, Path +from typing import Optional + +from fastapi import APIRouter, Depends, Path, Query from api.dependencies import get_async_core from core.AsyncCore import AsyncCore -from core.DTOs.GetNextURLForRelevanceAnnotationResponse import GetNextURLForRelevanceAnnotationResponse -from core.DTOs.RelevanceAnnotationInfo import RelevanceAnnotationPostInfo +from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo +from core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo +from core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo +from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \ + URLAgencyAnnotationPostInfo +from core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse +from core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo +from core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo from security_manager.SecurityManager import get_access_info, AccessInfo annotate_router = APIRouter( @@ -12,30 +20,138 @@ responses={404: {"description": "Not found"}}, ) +batch_query = Query( + description="The batch id of the next URL to get. " + "If not specified, defaults to first qualifying URL", + default=None +) @annotate_router.get("/relevance") async def get_next_url_for_relevance_annotation( access_info: AccessInfo = Depends(get_access_info), async_core: AsyncCore = Depends(get_async_core), -) -> GetNextURLForRelevanceAnnotationResponse: - result = await async_core.get_next_url_for_relevance_annotation(user_id=access_info.user_id) - return result + batch_id: Optional[int] = Query( + description="The batch id of the next URL to get. " + "If not specified, defaults to first qualifying URL", + default=None), +) -> GetNextRelevanceAnnotationResponseOuterInfo: + return await async_core.get_next_url_for_relevance_annotation( + user_id=access_info.user_id, + batch_id=batch_id + ) -@annotate_router.post("/relevance/{metadata_id}") +@annotate_router.post("/relevance/{url_id}") async def annotate_url_for_relevance_and_get_next_url( relevance_annotation_post_info: RelevanceAnnotationPostInfo, - metadata_id: int = Path(description="The metadata id for the associated URL metadata"), + url_id: int = Path(description="The URL id to annotate"), async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) -) -> GetNextURLForRelevanceAnnotationResponse: + access_info: AccessInfo = Depends(get_access_info), + batch_id: Optional[int] = batch_query +) -> GetNextRelevanceAnnotationResponseOuterInfo: """ Post URL annotation and get next URL to annotate """ await async_core.submit_url_relevance_annotation( user_id=access_info.user_id, - metadata_id=metadata_id, - annotation=relevance_annotation_post_info + url_id=url_id, + relevant=relevance_annotation_post_info.is_relevant + ) + return await async_core.get_next_url_for_relevance_annotation( + user_id=access_info.user_id, + batch_id=batch_id + ) + +@annotate_router.get("/record-type") +async def get_next_url_for_record_type_annotation( + access_info: AccessInfo = Depends(get_access_info), + async_core: AsyncCore = Depends(get_async_core), + batch_id: Optional[int] = batch_query +) -> GetNextRecordTypeAnnotationResponseOuterInfo: + return await async_core.get_next_url_for_record_type_annotation( + user_id=access_info.user_id, + batch_id=batch_id + ) + +@annotate_router.post("/record-type/{url_id}") +async def annotate_url_for_record_type_and_get_next_url( + record_type_annotation_post_info: RecordTypeAnnotationPostInfo, + url_id: int = Path(description="The URL id to annotate"), + async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_access_info), + batch_id: Optional[int] = batch_query +) -> GetNextRecordTypeAnnotationResponseOuterInfo: + """ + Post URL annotation and get next URL to annotate + """ + await async_core.submit_url_record_type_annotation( + user_id=access_info.user_id, + url_id=url_id, + record_type=record_type_annotation_post_info.record_type, + ) + return await async_core.get_next_url_for_record_type_annotation( + user_id=access_info.user_id, + batch_id=batch_id + ) + +@annotate_router.get("/agency") +async def get_next_url_for_agency_annotation( + access_info: AccessInfo = Depends(get_access_info), + async_core: AsyncCore = Depends(get_async_core), + batch_id: Optional[int] = batch_query +) -> GetNextURLForAgencyAnnotationResponse: + return await async_core.get_next_url_agency_for_annotation( + user_id=access_info.user_id, + batch_id=batch_id + ) + +@annotate_router.post("/agency/{url_id}") +async def annotate_url_for_agency_and_get_next_url( + url_id: int, + agency_annotation_post_info: URLAgencyAnnotationPostInfo, + async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_access_info), + batch_id: Optional[int] = batch_query +) -> GetNextURLForAgencyAnnotationResponse: + """ + Post URL annotation and get next URL to annotate + """ + await async_core.submit_url_agency_annotation( + user_id=access_info.user_id, + url_id=url_id, + agency_post_info=agency_annotation_post_info + ) + return await async_core.get_next_url_agency_for_annotation( + user_id=access_info.user_id, + batch_id=batch_id + ) + +@annotate_router.get("/all") +async def get_next_url_for_all_annotations( + access_info: AccessInfo = Depends(get_access_info), + async_core: AsyncCore = Depends(get_async_core), + batch_id: Optional[int] = batch_query +) -> GetNextURLForAllAnnotationResponse: + return await async_core.get_next_url_for_all_annotations( + batch_id=batch_id + ) + +@annotate_router.post("/all/{url_id}") +async def annotate_url_for_all_annotations_and_get_next_url( + url_id: int, + all_annotation_post_info: AllAnnotationPostInfo, + async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_access_info), + batch_id: Optional[int] = batch_query +) -> GetNextURLForAllAnnotationResponse: + """ + Post URL annotation and get next URL to annotate + """ + await async_core.submit_url_for_all_annotations( + user_id=access_info.user_id, + url_id=url_id, + post_info=all_annotation_post_info ) - result = await async_core.get_next_url_for_relevance_annotation(user_id=access_info.user_id) - return result + return await async_core.get_next_url_for_all_annotations( + batch_id=batch_id + ) \ No newline at end of file diff --git a/api/routes/batch.py b/api/routes/batch.py index 9405fec6..7ba0a2a4 100644 --- a/api/routes/batch.py +++ b/api/routes/batch.py @@ -3,9 +3,10 @@ from fastapi import Path, APIRouter from fastapi.params import Query, Depends -from api.dependencies import get_core +from api.dependencies import get_core, get_async_core from collector_db.DTOs.BatchInfo import BatchInfo from collector_manager.enums import CollectorType +from core.AsyncCore import AsyncCore from core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse from core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse from core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse @@ -23,7 +24,7 @@ @batch_router.get("") -def get_batch_status( +async def get_batch_status( collector_type: Optional[CollectorType] = Query( description="Filter by collector type", default=None @@ -32,67 +33,77 @@ def get_batch_status( description="Filter by status", default=None ), + has_pending_urls: Optional[bool] = Query( + description="Filter by whether the batch has pending URLs", + default=None + ), page: int = Query( description="The page number", default=1 ), - core: SourceCollectorCore = Depends(get_core), + core: AsyncCore = Depends(get_async_core), access_info: AccessInfo = Depends(get_access_info), ) -> GetBatchStatusResponse: """ Get the status of recent batches """ - return core.get_batch_statuses(collector_type=collector_type, status=status, page=page) + return await core.get_batch_statuses( + collector_type=collector_type, + status=status, + has_pending_urls=has_pending_urls, + page=page + ) @batch_router.get("/{batch_id}") -def get_batch_info( +async def get_batch_info( batch_id: int = Path(description="The batch id"), - core: SourceCollectorCore = Depends(get_core), + core: AsyncCore = Depends(get_async_core), access_info: AccessInfo = Depends(get_access_info), ) -> BatchInfo: - return core.get_batch_info(batch_id) + result = await core.get_batch_info(batch_id) + return result @batch_router.get("/{batch_id}/urls") -def get_urls_by_batch( +async def get_urls_by_batch( batch_id: int = Path(description="The batch id"), page: int = Query( description="The page number", default=1 ), - core: SourceCollectorCore = Depends(get_core), + core: AsyncCore = Depends(get_async_core), access_info: AccessInfo = Depends(get_access_info), ) -> GetURLsByBatchResponse: - return core.get_urls_by_batch(batch_id, page=page) + return await core.get_urls_by_batch(batch_id, page=page) @batch_router.get("/{batch_id}/duplicates") -def get_duplicates_by_batch( +async def get_duplicates_by_batch( batch_id: int = Path(description="The batch id"), page: int = Query( description="The page number", default=1 ), - core: SourceCollectorCore = Depends(get_core), + core: AsyncCore = Depends(get_async_core), access_info: AccessInfo = Depends(get_access_info), ) -> GetDuplicatesByBatchResponse: - return core.get_duplicate_urls_by_batch(batch_id, page=page) + return await core.get_duplicate_urls_by_batch(batch_id, page=page) @batch_router.get("/{batch_id}/logs") -def get_batch_logs( +async def get_batch_logs( batch_id: int = Path(description="The batch id"), - core: SourceCollectorCore = Depends(get_core), + async_core: AsyncCore = Depends(get_async_core), access_info: AccessInfo = Depends(get_access_info), ) -> GetBatchLogsResponse: """ Retrieve the logs for a recent batch. Note that for later batches, the logs may not be available. """ - return core.get_batch_logs(batch_id) + return await async_core.get_batch_logs(batch_id) @batch_router.post("/{batch_id}/abort") -def abort_batch( +async def abort_batch( batch_id: int = Path(description="The batch id"), - core: SourceCollectorCore = Depends(get_core), + async_core: AsyncCore = Depends(get_async_core), access_info: AccessInfo = Depends(get_access_info), ) -> MessageResponse: - return core.abort_batch(batch_id) \ No newline at end of file + return await async_core.abort_batch(batch_id) \ No newline at end of file diff --git a/api/routes/collector.py b/api/routes/collector.py index b49d569c..16f5a900 100644 --- a/api/routes/collector.py +++ b/api/routes/collector.py @@ -1,11 +1,13 @@ from fastapi import APIRouter from fastapi.params import Depends -from api.dependencies import get_core +from api.dependencies import get_async_core from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO from collector_manager.enums import CollectorType +from core.AsyncCore import AsyncCore from core.DTOs.CollectorStartInfo import CollectorStartInfo -from core.SourceCollectorCore import SourceCollectorCore +from core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO +from core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO from security_manager.SecurityManager import AccessInfo, get_access_info from source_collectors.auto_googler.DTOs import AutoGooglerInputDTO from source_collectors.ckan.DTOs import CKANInputDTO @@ -22,13 +24,13 @@ @collector_router.post("/example") async def start_example_collector( dto: ExampleInputDTO, - core: SourceCollectorCore = Depends(get_core), + core: AsyncCore = Depends(get_async_core), access_info: AccessInfo = Depends(get_access_info), ) -> CollectorStartInfo: """ Start the example collector """ - return core.initiate_collector( + return await core.initiate_collector( collector_type=CollectorType.EXAMPLE, dto=dto, user_id=access_info.user_id @@ -37,13 +39,13 @@ async def start_example_collector( @collector_router.post("/ckan") async def start_ckan_collector( dto: CKANInputDTO, - core: SourceCollectorCore = Depends(get_core), + core: AsyncCore = Depends(get_async_core), access_info: AccessInfo = Depends(get_access_info), ) -> CollectorStartInfo: """ Start the ckan collector """ - return core.initiate_collector( + return await core.initiate_collector( collector_type=CollectorType.CKAN, dto=dto, user_id=access_info.user_id @@ -52,13 +54,13 @@ async def start_ckan_collector( @collector_router.post("/common-crawler") async def start_common_crawler_collector( dto: CommonCrawlerInputDTO, - core: SourceCollectorCore = Depends(get_core), + core: AsyncCore = Depends(get_async_core), access_info: AccessInfo = Depends(get_access_info), ) -> CollectorStartInfo: """ Start the common crawler collector """ - return core.initiate_collector( + return await core.initiate_collector( collector_type=CollectorType.COMMON_CRAWLER, dto=dto, user_id=access_info.user_id @@ -67,13 +69,13 @@ async def start_common_crawler_collector( @collector_router.post("/auto-googler") async def start_auto_googler_collector( dto: AutoGooglerInputDTO, - core: SourceCollectorCore = Depends(get_core), + core: AsyncCore = Depends(get_async_core), access_info: AccessInfo = Depends(get_access_info), ) -> CollectorStartInfo: """ Start the auto googler collector """ - return core.initiate_collector( + return await core.initiate_collector( collector_type=CollectorType.AUTO_GOOGLER, dto=dto, user_id=access_info.user_id @@ -82,13 +84,13 @@ async def start_auto_googler_collector( @collector_router.post("/muckrock-simple") async def start_muckrock_collector( dto: MuckrockSimpleSearchCollectorInputDTO, - core: SourceCollectorCore = Depends(get_core), + core: AsyncCore = Depends(get_async_core), access_info: AccessInfo = Depends(get_access_info), ) -> CollectorStartInfo: """ Start the muckrock collector """ - return core.initiate_collector( + return await core.initiate_collector( collector_type=CollectorType.MUCKROCK_SIMPLE_SEARCH, dto=dto, user_id=access_info.user_id @@ -97,13 +99,13 @@ async def start_muckrock_collector( @collector_router.post("/muckrock-county") async def start_muckrock_county_collector( dto: MuckrockCountySearchCollectorInputDTO, - core: SourceCollectorCore = Depends(get_core), + core: AsyncCore = Depends(get_async_core), access_info: AccessInfo = Depends(get_access_info), ) -> CollectorStartInfo: """ Start the muckrock county level collector """ - return core.initiate_collector( + return await core.initiate_collector( collector_type=CollectorType.MUCKROCK_COUNTY_SEARCH, dto=dto, user_id=access_info.user_id @@ -112,14 +114,28 @@ async def start_muckrock_county_collector( @collector_router.post("/muckrock-all") async def start_muckrock_all_foia_collector( dto: MuckrockAllFOIARequestsCollectorInputDTO, - core: SourceCollectorCore = Depends(get_core), + core: AsyncCore = Depends(get_async_core), access_info: AccessInfo = Depends(get_access_info), ) -> CollectorStartInfo: """ Start the muckrock collector for all FOIA requests """ - return core.initiate_collector( + return await core.initiate_collector( collector_type=CollectorType.MUCKROCK_ALL_SEARCH, dto=dto, user_id=access_info.user_id + ) + +@collector_router.post("/manual") +async def upload_manual_collector( + dto: ManualBatchInputDTO, + core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_access_info), +) -> ManualBatchResponseDTO: + """ + Uploads a manual "collector" with existing data + """ + return await core.upload_manual_batch( + dto=dto, + user_id=access_info.user_id ) \ No newline at end of file diff --git a/api/routes/metrics.py b/api/routes/metrics.py new file mode 100644 index 00000000..d81aa2e6 --- /dev/null +++ b/api/routes/metrics.py @@ -0,0 +1,64 @@ +from fastapi import APIRouter +from fastapi.params import Query, Depends + +from api.dependencies import get_async_core +from core.AsyncCore import AsyncCore +from core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO +from core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO +from core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO +from core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO +from core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO +from core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO +from security_manager.SecurityManager import AccessInfo, get_access_info + +metrics_router = APIRouter( + prefix="/metrics", + tags=["Metrics"], +) + + +@metrics_router.get("/batches/aggregated") +async def get_batches_aggregated_metrics( + core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_access_info) +) -> GetMetricsBatchesAggregatedResponseDTO: + return await core.get_batches_aggregated_metrics() + +@metrics_router.get("/batches/breakdown") +async def get_batches_breakdown_metrics( + core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_access_info), + page: int = Query( + description="The page number", + default=1 + ) +) -> GetMetricsBatchesBreakdownResponseDTO: + return await core.get_batches_breakdown_metrics(page=page) + +@metrics_router.get("/urls/aggregate") +async def get_urls_aggregated_metrics( + core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_access_info) +) -> GetMetricsURLsAggregatedResponseDTO: + return await core.get_urls_aggregated_metrics() + +@metrics_router.get("/urls/breakdown/submitted") +async def get_urls_breakdown_submitted_metrics( + core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_access_info) +) -> GetMetricsURLsBreakdownSubmittedResponseDTO: + return await core.get_urls_breakdown_submitted_metrics() + +@metrics_router.get("/urls/breakdown/pending") +async def get_urls_breakdown_pending_metrics( + core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_access_info) +) -> GetMetricsURLsBreakdownPendingResponseDTO: + return await core.get_urls_breakdown_pending_metrics() + +@metrics_router.get("/backlog") +async def get_backlog_metrics( + core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_access_info) +) -> GetMetricsBacklogResponseDTO: + return await core.get_backlog_metrics() \ No newline at end of file diff --git a/api/routes/review.py b/api/routes/review.py new file mode 100644 index 00000000..62bf5de6 --- /dev/null +++ b/api/routes/review.py @@ -0,0 +1,64 @@ +from typing import Optional + +from fastapi import APIRouter, Depends, Query + +from api.dependencies import get_async_core +from core.AsyncCore import AsyncCore +from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewBaseInfo +from core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewResponse, \ + GetNextURLForFinalReviewOuterResponse +from security_manager.SecurityManager import AccessInfo, get_access_info, require_permission, Permissions + +review_router = APIRouter( + prefix="/review", + tags=["Review"], + responses={404: {"description": "Not found"}}, +) + +requires_final_review_permission = require_permission(Permissions.SOURCE_COLLECTOR_FINAL_REVIEW) + +@review_router.get("/next-source") +async def get_next_source( + core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(requires_final_review_permission), + batch_id: Optional[int] = Query( + description="The batch id of the next URL to get. " + "If not specified, defaults to first qualifying URL", + default=None), +) -> GetNextURLForFinalReviewOuterResponse: + next_source = await core.get_next_source_for_review(batch_id=batch_id) + return GetNextURLForFinalReviewOuterResponse(next_source=next_source) + +@review_router.post("/approve-source") +async def approve_source( + core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(requires_final_review_permission), + approval_info: FinalReviewApprovalInfo = FinalReviewApprovalInfo, + batch_id: Optional[int] = Query( + description="The batch id of the next URL to get. " + "If not specified, defaults to first qualifying URL", + default=None), +) -> GetNextURLForFinalReviewOuterResponse: + await core.approve_url( + approval_info, + access_info=access_info, + ) + next_source = await core.get_next_source_for_review(batch_id=batch_id) + return GetNextURLForFinalReviewOuterResponse(next_source=next_source) + +@review_router.post("/reject-source") +async def reject_source( + core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(requires_final_review_permission), + review_info: FinalReviewBaseInfo = FinalReviewBaseInfo, + batch_id: Optional[int] = Query( + description="The batch id of the next URL to get. " + "If not specified, defaults to first qualifying URL", + default=None), +) -> GetNextURLForFinalReviewOuterResponse: + await core.reject_url( + url_id=review_info.url_id, + access_info=access_info, + ) + next_source = await core.get_next_source_for_review(batch_id=batch_id) + return GetNextURLForFinalReviewOuterResponse(next_source=next_source) \ No newline at end of file diff --git a/api/routes/search.py b/api/routes/search.py new file mode 100644 index 00000000..4513bb2f --- /dev/null +++ b/api/routes/search.py @@ -0,0 +1,20 @@ +from fastapi import APIRouter, Query, Depends + +from api.dependencies import get_async_core +from core.AsyncCore import AsyncCore +from core.DTOs.SearchURLResponse import SearchURLResponse +from security_manager.SecurityManager import get_access_info, AccessInfo + +search_router = APIRouter(prefix="/search", tags=["search"]) + + +@search_router.get("/url") +async def search_url( + url: str = Query(description="The URL to search for"), + access_info: AccessInfo = Depends(get_access_info), + async_core: AsyncCore = Depends(get_async_core), +) -> SearchURLResponse: + """ + Search for a URL in the database + """ + return await async_core.search_for_url(url) \ No newline at end of file diff --git a/api/routes/task.py b/api/routes/task.py new file mode 100644 index 00000000..44971959 --- /dev/null +++ b/api/routes/task.py @@ -0,0 +1,58 @@ +from typing import Optional + +from fastapi import APIRouter, Depends, Query, Path + +from api.dependencies import get_async_core +from collector_db.DTOs.GetTaskStatusResponseInfo import GetTaskStatusResponseInfo +from collector_db.DTOs.TaskInfo import TaskInfo +from collector_db.enums import TaskType +from core.AsyncCore import AsyncCore +from core.enums import BatchStatus +from security_manager.SecurityManager import AccessInfo, get_access_info + +task_router = APIRouter( + prefix="/task", + tags=["Task"], + responses={404: {"description": "Not found"}}, +) + + +@task_router.get("") +async def get_tasks( + page: int = Query( + description="The page number", + default=1 + ), + task_status: Optional[BatchStatus] = Query( + description="Filter by task status", + default=None + ), + task_type: Optional[TaskType] = Query( + description="Filter by task type", + default=None + ), + async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_access_info) +): + return await async_core.get_tasks( + page=page, + task_type=task_type, + task_status=task_status + ) + +@task_router.get("/status") +async def get_task_status( + async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_access_info) +) -> GetTaskStatusResponseInfo: + return await async_core.get_current_task_status() + +@task_router.get("/{task_id}") +async def get_task_info( + task_id: int = Path(description="The task id"), + async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_access_info) +) -> TaskInfo: + return await async_core.get_task_info(task_id) + + diff --git a/apply_migrations.py b/apply_migrations.py index 5be4cd99..183e7d11 100644 --- a/apply_migrations.py +++ b/apply_migrations.py @@ -3,7 +3,7 @@ from collector_db.helper_functions import get_postgres_connection_string -if __name__ == "__main__": +def apply_migrations(): print("Applying migrations...") alembic_config = Config("alembic.ini") alembic_config.set_main_option( @@ -11,4 +11,7 @@ get_postgres_connection_string() ) command.upgrade(alembic_config, "head") - print("Migrations applied.") \ No newline at end of file + print("Migrations applied.") + +if __name__ == "__main__": + apply_migrations() \ No newline at end of file diff --git a/collector_db/AsyncDatabaseClient.py b/collector_db/AsyncDatabaseClient.py index db94a8d5..ac6216d6 100644 --- a/collector_db/AsyncDatabaseClient.py +++ b/collector_db/AsyncDatabaseClient.py @@ -1,40 +1,104 @@ +from datetime import datetime, timedelta from functools import wraps +from typing import Optional, Type, Any, List -from sqlalchemy import select, exists +from fastapi import HTTPException +from sqlalchemy import select, exists, func, case, desc, Select, not_, and_, update, asc, delete, insert, CTE, literal +from sqlalchemy.dialects import postgresql +from sqlalchemy.exc import IntegrityError from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker -from sqlalchemy.orm import selectinload +from sqlalchemy.orm import selectinload, joinedload, QueryableAttribute, aliased +from sqlalchemy.sql.functions import coalesce +from starlette import status from collector_db.ConfigManager import ConfigManager -from collector_db.DTOs.MetadataAnnotationInfo import MetadataAnnotationInfo -from collector_db.DTOs.URLAnnotationInfo import URLAnnotationInfo +from collector_db.DTOConverter import DTOConverter +from collector_db.DTOs.BatchInfo import BatchInfo +from collector_db.DTOs.DuplicateInfo import DuplicateInsertInfo, DuplicateInfo +from collector_db.DTOs.InsertURLsInfo import InsertURLsInfo +from collector_db.DTOs.LogInfo import LogInfo, LogOutputInfo +from collector_db.DTOs.TaskInfo import TaskInfo from collector_db.DTOs.URLErrorInfos import URLErrorPydanticInfo -from collector_db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo -from collector_db.DTOs.URLMetadataInfo import URLMetadataInfo -from collector_db.DTOs.URLWithHTML import URLWithHTML -from collector_db.enums import URLMetadataAttributeType, ValidationStatus, ValidationSource -from collector_db.helper_functions import get_postgres_connection_string -from collector_db.models import URLMetadata, URL, URLErrorInfo, URLHTMLContent, Base, MetadataAnnotation, \ - RootURL -from collector_manager.enums import URLStatus -from core.DTOs.GetURLsResponseInfo import GetURLsResponseInfo, GetURLsResponseMetadataInfo, GetURLsResponseErrorInfo, \ +from collector_db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo, HTMLContentType +from collector_db.DTOs.URLInfo import URLInfo +from collector_db.DTOs.URLMapping import URLMapping +from collector_db.StatementComposer import StatementComposer +from collector_db.constants import PLACEHOLDER_AGENCY_NAME +from collector_db.enums import TaskType +from collector_db.models import URL, URLErrorInfo, URLHTMLContent, Base, \ + RootURL, Task, TaskError, LinkTaskURL, Batch, Agency, AutomatedUrlAgencySuggestion, \ + UserUrlAgencySuggestion, AutoRelevantSuggestion, AutoRecordTypeSuggestion, UserRelevantSuggestion, \ + UserRecordTypeSuggestion, ReviewingUserURL, URLOptionalDataSourceMetadata, ConfirmedURLAgency, Duplicate, Log, \ + BacklogSnapshot, URLDataSource, URLCheckedForDuplicate +from collector_manager.enums import URLStatus, CollectorType +from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo +from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo +from core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO, GetMetricsBacklogResponseInnerDTO +from core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO, \ + GetMetricsBatchesAggregatedInnerResponseDTO +from core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO, \ + GetMetricsBatchesBreakdownInnerResponseDTO +from core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO +from core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO, \ + GetMetricsURLsBreakdownPendingResponseInnerDTO +from core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO, \ + GetMetricsURLsBreakdownSubmittedInnerDTO +from core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseInfo +from core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseInfo +from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \ + GetNextURLForAgencyAgencyInfo, GetNextURLForAgencyAnnotationInnerResponse +from core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse, \ + GetNextURLForAllAnnotationInnerResponse +from core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewResponse, FinalReviewAnnotationInfo, \ + FinalReviewOptionalMetadata +from core.DTOs.GetTasksResponse import GetTasksResponse, GetTasksResponseTaskInfo +from core.DTOs.GetURLsResponseInfo import GetURLsResponseInfo, GetURLsResponseErrorInfo, \ GetURLsResponseInnerInfo -from core.DTOs.RelevanceAnnotationInfo import RelevanceAnnotationPostInfo +from core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO +from core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO +from core.DTOs.SearchURLResponse import SearchURLResponse +from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from core.DTOs.task_data_objects.AgencyIdentificationTDO import AgencyIdentificationTDO +from core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmitApprovedURLTDO, SubmittedURLInfo +from core.DTOs.task_data_objects.URLDuplicateTDO import URLDuplicateTDO +from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO, URLHTMLMetadataInfo +from core.EnvVarManager import EnvVarManager +from core.enums import BatchStatus, SuggestionType, RecordType +from html_tag_collector.DataClassTags import convert_to_response_html_info + +# Type Hints + +UserSuggestionModel = UserRelevantSuggestion or UserRecordTypeSuggestion or UserUrlAgencySuggestion +AutoSuggestionModel = AutoRelevantSuggestion or AutoRecordTypeSuggestion or AutomatedUrlAgencySuggestion + def add_standard_limit_and_offset(statement, page, limit=100): offset = (page - 1) * limit return statement.limit(limit).offset(offset) + class AsyncDatabaseClient: - def __init__(self, db_url: str = get_postgres_connection_string(is_async=True)): + def __init__(self, db_url: Optional[str] = None): + if db_url is None: + db_url = EnvVarManager.get().get_postgres_connection_string(is_async=True) self.engine = create_async_engine( url=db_url, echo=ConfigManager.get_sqlalchemy_echo(), ) self.session_maker = async_sessionmaker(bind=self.engine, expire_on_commit=False) + self.statement_composer = StatementComposer() + + @staticmethod + async def _add_models(session: AsyncSession, model_class, models) -> list[int]: + instances = [model_class(**model.model_dump()) for model in models] + session.add_all(instances) + await session.flush() + return [instance.id for instance in instances] @staticmethod def session_manager(method): """Decorator to manage async session lifecycle.""" + @wraps(method) async def wrapper(self, *args, **kwargs): async with self.session_maker() as session: @@ -45,35 +109,252 @@ async def wrapper(self, *args, **kwargs): except Exception as e: await session.rollback() raise e + return wrapper + # region relevant @session_manager - async def get_url_metadata_by_status( + async def add_auto_relevant_suggestion( self, session: AsyncSession, - url_status: URLStatus, - offset: int = 0 + url_id: int, + relevant: bool ): - statement = (select(URLMetadata) - .join(URL) - .where(URL.outcome == url_status.value) - .limit(100) - .offset(offset) - .order_by(URLMetadata.id)) - scalar_result = await session.scalars(statement) - model_result = scalar_result.all() - return [URLMetadataInfo(**url_metadata.__dict__) for url_metadata in model_result] + suggestion = AutoRelevantSuggestion( + url_id=url_id, + relevant=relevant + ) + session.add(suggestion) + + @staticmethod + async def get_user_suggestion( + session: AsyncSession, + model: UserSuggestionModel, + user_id: int, + url_id: int + ) -> Optional[UserSuggestionModel]: + statement = Select(model).where( + and_( + model.url_id == url_id, + model.user_id == user_id + ) + ) + result = await session.execute(statement) + return result.unique().scalar_one_or_none() + + @staticmethod + async def get_next_url_for_user_annotation( + session: AsyncSession, + user_suggestion_model_to_exclude: UserSuggestionModel, + auto_suggestion_relationship: QueryableAttribute, + batch_id: Optional[int], + check_if_annotated_not_relevant: bool = False + ) -> URL: + url_query = ( + select( + URL, + ) + .where(URL.outcome == URLStatus.PENDING.value) + # URL must not have user suggestion + .where( + StatementComposer.user_suggestion_not_exists(user_suggestion_model_to_exclude) + ) + ) + + if check_if_annotated_not_relevant: + url_query = url_query.where( + not_( + exists( + select(UserRelevantSuggestion) + .where( + UserRelevantSuggestion.url_id == URL.id, + UserRelevantSuggestion.relevant == False + ) + ) + ) + ) + + if batch_id is not None: + url_query = url_query.where(URL.batch_id == batch_id) + + url_query = url_query.options( + joinedload(auto_suggestion_relationship), + joinedload(URL.html_content) + ).limit(1) + + raw_result = await session.execute(url_query) + + return raw_result.unique().scalars().one_or_none() + + @session_manager + async def add_user_relevant_suggestion( + self, + session: AsyncSession, + url_id: int, + user_id: int, + relevant: bool + ): + prior_suggestion = await self.get_user_suggestion( + session, + model=UserRelevantSuggestion, + user_id=user_id, + url_id=url_id + ) + if prior_suggestion is not None: + prior_suggestion.relevant = relevant + return + + suggestion = UserRelevantSuggestion( + url_id=url_id, + user_id=user_id, + relevant=relevant + ) + session.add(suggestion) + + @session_manager + async def get_next_url_for_relevance_annotation( + self, + session: AsyncSession, + user_id: int, + batch_id: Optional[int] + ) -> Optional[GetNextRelevanceAnnotationResponseInfo]: + + url = await self.get_next_url_for_user_annotation( + session, + user_suggestion_model_to_exclude=UserRelevantSuggestion, + auto_suggestion_relationship=URL.auto_relevant_suggestion, + batch_id=batch_id + ) + if url is None: + return None + + # Next, get all HTML content for the URL + html_response_info = DTOConverter.html_content_list_to_html_response_info( + url.html_content + ) + + if url.auto_relevant_suggestion is not None: + suggestion = url.auto_relevant_suggestion.relevant + else: + suggestion = None + + return GetNextRelevanceAnnotationResponseInfo( + url_info=URLMapping( + url=url.url, + url_id=url.id + ), + suggested_relevant=suggestion, + html_info=html_response_info + ) + + #endregion relevant + + #region record_type + + @session_manager + async def get_next_url_for_record_type_annotation( + self, + session: AsyncSession, + user_id: int, + batch_id: Optional[int] + ) -> Optional[GetNextRecordTypeAnnotationResponseInfo]: + + url = await self.get_next_url_for_user_annotation( + session, + user_suggestion_model_to_exclude=UserRecordTypeSuggestion, + auto_suggestion_relationship=URL.auto_record_type_suggestion, + batch_id=batch_id, + check_if_annotated_not_relevant=True + ) + if url is None: + return None + + # Next, get all HTML content for the URL + html_response_info = DTOConverter.html_content_list_to_html_response_info( + url.html_content + ) + + if url.auto_record_type_suggestion is not None: + suggestion = url.auto_record_type_suggestion.record_type + else: + suggestion = None + + return GetNextRecordTypeAnnotationResponseInfo( + url_info=URLMapping( + url=url.url, + url_id=url.id + ), + suggested_record_type=suggestion, + html_info=html_response_info + ) + + + @session_manager + async def add_auto_record_type_suggestions( + self, + session: AsyncSession, + url_and_record_type_list: list[tuple[int, RecordType]] + ): + for url_id, record_type in url_and_record_type_list: + suggestion = AutoRecordTypeSuggestion( + url_id=url_id, + record_type=record_type.value + ) + session.add(suggestion) + + @session_manager + async def add_auto_record_type_suggestion( + self, + session: AsyncSession, + url_id: int, + record_type: RecordType + ): + + suggestion = AutoRecordTypeSuggestion( + url_id=url_id, + record_type=record_type.value + ) + session.add(suggestion) @session_manager - async def add_url_metadata(self, session: AsyncSession, url_metadata_info: URLMetadataInfo): - url_metadata = URLMetadata(**url_metadata_info.model_dump()) - session.add(url_metadata) + async def add_auto_relevance_suggestions( + self, + session: AsyncSession, + url_and_relevance_type_list: list[tuple[int, bool]] + ): + for url_id, relevant in url_and_relevance_type_list: + suggestion = AutoRelevantSuggestion( + url_id=url_id, + relevant=relevant + ) + session.add(suggestion) @session_manager - async def add_url_metadatas(self, session: AsyncSession, url_metadata_infos: list[URLMetadataInfo]): - for url_metadata_info in url_metadata_infos: - url_metadata = URLMetadata(**url_metadata_info.model_dump()) - session.add(url_metadata) + async def add_user_record_type_suggestion( + self, + session: AsyncSession, + url_id: int, + user_id: int, + record_type: RecordType + ): + prior_suggestion = await self.get_user_suggestion( + session, + model=UserRecordTypeSuggestion, + user_id=user_id, + url_id=url_id + ) + if prior_suggestion is not None: + prior_suggestion.record_type = record_type.value + return + + suggestion = UserRecordTypeSuggestion( + url_id=url_id, + user_id=user_id, + record_type=record_type.value + ) + session.add(suggestion) + + #endregion record_type @session_manager async def add_url_error_infos(self, session: AsyncSession, url_error_infos: list[URLErrorPydanticInfo]): @@ -88,221 +369,193 @@ async def add_url_error_infos(self, session: AsyncSession, url_error_infos: list @session_manager async def get_urls_with_errors(self, session: AsyncSession) -> list[URLErrorPydanticInfo]: - statement = (select(URL, URLErrorInfo.error, URLErrorInfo.updated_at) + statement = (select(URL, URLErrorInfo.error, URLErrorInfo.updated_at, URLErrorInfo.task_id) .join(URLErrorInfo) .where(URL.outcome == URLStatus.ERROR.value) .order_by(URL.id)) scalar_result = await session.execute(statement) results = scalar_result.all() final_results = [] - for url, error, updated_at in results: - final_results.append(URLErrorPydanticInfo(url_id=url.id, error=error, updated_at=updated_at)) + for url, error, updated_at, task_id in results: + final_results.append(URLErrorPydanticInfo( + url_id=url.id, + error=error, + updated_at=updated_at, + task_id=task_id + )) return final_results @session_manager async def add_html_content_infos(self, session: AsyncSession, html_content_infos: list[URLHTMLContentInfo]): - for html_content_info in html_content_infos: - # Add HTML Content Info to database - db_html_content_info = URLHTMLContent(**html_content_info.model_dump()) - session.add(db_html_content_info) + await self._add_models(session, URLHTMLContent, html_content_infos) @session_manager - async def get_pending_urls_without_html_data(self, session: AsyncSession): - # TODO: Add test that includes some urls WITH html data. Check they're not returned - statement = (select(URL). - outerjoin(URLHTMLContent). - where(URLHTMLContent.id == None). - where(URL.outcome == URLStatus.PENDING.value). - limit(100). - order_by(URL.id)) + async def has_pending_urls_without_html_data(self, session: AsyncSession) -> bool: + statement = self.statement_composer.pending_urls_without_html_data() + statement = statement.limit(1) scalar_result = await session.scalars(statement) - return scalar_result.all() + return bool(scalar_result.first()) + + @session_manager + async def has_pending_urls_missing_miscellaneous_metadata(self, session: AsyncSession) -> bool: + query = StatementComposer.pending_urls_missing_miscellaneous_metadata_query() + query = query.limit(1) + + scalar_result = await session.scalars(query) + return bool(scalar_result.first()) @session_manager - async def get_urls_with_html_data_and_no_relevancy_metadata( + async def get_pending_urls_missing_miscellaneous_metadata( self, session: AsyncSession - ) -> list[URLWithHTML]: - # Get URLs with no relevancy metadata - statement = (select(URL.id, URL.url, URLHTMLContent). - join(URLHTMLContent). - where(URL.outcome == URLStatus.PENDING.value) - # No relevancy metadata - .where( - ~exists( - select(URLMetadata.id). - where( - URLMetadata.url_id == URL.id, - URLMetadata.attribute == URLMetadataAttributeType.RELEVANT.value - ) - ) - ) - .limit(100) - .order_by(URL.id) + ) -> list[URLMiscellaneousMetadataTDO]: + query = StatementComposer.pending_urls_missing_miscellaneous_metadata_query() + query = ( + query.options( + selectinload(URL.batch), + selectinload(URL.html_content) + ).limit(100).order_by(URL.id) ) - raw_result = await session.execute(statement) - result = raw_result.all() - url_ids_to_urls = {url_id: url for url_id, url, _ in result} - url_ids_to_html_info = {url_id: [] for url_id, _, _ in result} - for url_id, _, html_info in result: - url_ids_to_html_info[url_id].append( - URLHTMLContentInfo(**html_info.__dict__) + scalar_result = await session.scalars(query) + all_results = scalar_result.all() + final_results = [] + for result in all_results: + tdo = URLMiscellaneousMetadataTDO( + url_id=result.id, + collector_metadata=result.collector_metadata or {}, + collector_type=CollectorType(result.batch.strategy), ) + html_info = URLHTMLMetadataInfo() + for html_content in result.html_content: + if html_content.content_type == HTMLContentType.TITLE.value: + html_info.title = html_content.content + elif html_content.content_type == HTMLContentType.DESCRIPTION.value: + html_info.description = html_content.content + tdo.html_metadata_info = html_info + final_results.append(tdo) + return final_results - final_results = [] - for url_id, url in url_ids_to_urls.items(): - url_with_html = URLWithHTML( - url_id=url_id, - url=url, - html_infos=url_ids_to_html_info[url_id] + @session_manager + async def add_miscellaneous_metadata(self, session: AsyncSession, tdos: list[URLMiscellaneousMetadataTDO]): + updates = [] + + for tdo in tdos: + update_query = update( + URL + ).where( + URL.id == tdo.url_id + ).values( + name=tdo.name, + description=tdo.description, ) - final_results.append(url_with_html) + + updates.append(update_query) + + for stmt in updates: + await session.execute(stmt) + + for tdo in tdos: + metadata_object = URLOptionalDataSourceMetadata( + url_id=tdo.url_id, + record_formats=tdo.record_formats, + data_portal_type=tdo.data_portal_type, + supplying_entity=tdo.supplying_entity + ) + session.add(metadata_object) - return final_results @session_manager - async def get_urls_with_metadata( + async def get_pending_urls_without_html_data(self, session: AsyncSession): + # TODO: Add test that includes some urls WITH html data. Check they're not returned + statement = self.statement_composer.pending_urls_without_html_data() + statement = statement.limit(100).order_by(URL.id) + scalar_result = await session.scalars(statement) + return scalar_result.all() + + async def get_urls_with_html_data_and_without_models( self, session: AsyncSession, - attribute: URLMetadataAttributeType, - validation_status: ValidationStatus, - offset: int = 0 - ) -> list[URLMetadataInfo]: - statement = (select(URL.id, URLMetadata.id). - join(URLMetadata). - where(URLMetadata.attribute == attribute.value). - where(URLMetadata.validation_status == validation_status.value). - limit(100). - offset(offset). - order_by(URL.id) - ) - + model: Type[Base] + ): + statement = (select(URL) + .options(selectinload(URL.html_content)) + .where(URL.outcome == URLStatus.PENDING.value)) + statement = self.statement_composer.exclude_urls_with_extant_model( + statement=statement, + model=model + ) + statement = statement.limit(100).order_by(URL.id) raw_result = await session.execute(statement) - result = raw_result.all() - final_results = [] - for url_id, url_metadata_id in result: - info = URLMetadataInfo( - url_id=url_id, - id=url_metadata_id, - ) - final_results.append(info) + urls: list[URL] = raw_result.unique().scalars().all() + final_results = DTOConverter.url_list_to_url_with_html_list(urls) return final_results @session_manager - async def update_url_metadata_status(self, session: AsyncSession, metadata_ids: list[int], validation_status: ValidationStatus): - for metadata_id in metadata_ids: - statement = select(URLMetadata).where(URLMetadata.id == metadata_id) - scalar_result = await session.scalars(statement) - url_metadata = scalar_result.first() - url_metadata.validation_status = validation_status + async def get_urls_with_html_data_and_without_auto_record_type_suggestion( + self, + session: AsyncSession + ): + return await self.get_urls_with_html_data_and_without_models( + session=session, + model=AutoRecordTypeSuggestion + ) @session_manager - async def get_next_url_for_relevance_annotation( + async def get_urls_with_html_data_and_without_auto_relevant_suggestion( self, - session: AsyncSession, - user_id: int - ) -> URLAnnotationInfo: - # Get a URL, its relevancy metadata ID, and HTML data - # For a URL which has not yet been annotated by this user id - # First, subquery retrieving URL and its metadata ID where its relevant metadata - # does not have an annotation for that user - subquery = ( - select( - URL.id.label("url_id"), - URL.url, - URLMetadata.id.label("metadata_id"), - ) - .join(URLMetadata) - # Metadata must be relevant - .where(URLMetadata.attribute == URLMetadataAttributeType.RELEVANT.value) - # Metadata must not be validated - .where(URLMetadata.validation_status == ValidationStatus.PENDING_VALIDATION.value) - # URL must have HTML content entries - .where(exists(select(URLHTMLContent).where(URLHTMLContent.url_id == URL.id))) - # URL must not have been annotated by the user - .where(~exists( - select(MetadataAnnotation). - where( - MetadataAnnotation.metadata_id == URLMetadata.id, - MetadataAnnotation.user_id == user_id - ) - )) - .limit(1) + session: AsyncSession + ): + return await self.get_urls_with_html_data_and_without_models( + session=session, + model=AutoRelevantSuggestion ) - raw_result = await session.execute(subquery) - result = raw_result.all() - - # Next, get all HTML content for the URL - - statement = ( - select( - subquery.c.url, - subquery.c.metadata_id, - URLHTMLContent.content_type, - URLHTMLContent.content, - ) - .join(URLHTMLContent) - .where(subquery.c.url_id == URLHTMLContent.url_id) + async def has_urls_with_html_data_and_without_models( + self, + session: AsyncSession, + model: Type[Base] + ) -> bool: + statement = (select(URL) + .join(URLHTMLContent) + .where(URL.outcome == URLStatus.PENDING.value)) + # Exclude URLs with auto suggested record types + statement = self.statement_composer.exclude_urls_with_extant_model( + statement=statement, + model=model ) + statement = statement.limit(1) + scalar_result = await session.scalars(statement) + return bool(scalar_result.first()) - raw_result = await session.execute(statement) - result = raw_result.all() - - if len(result) == 0: - # No available URLs to annotate - return None - annotation_info = URLAnnotationInfo( - url=result[0][0], - metadata_id=result[0][1], - html_infos=[] + @session_manager + async def has_urls_with_html_data_and_without_auto_record_type_suggestion(self, session: AsyncSession) -> bool: + return await self.has_urls_with_html_data_and_without_models( + session=session, + model=AutoRecordTypeSuggestion ) - for _, _, content_type, content in result: - html_info = URLHTMLContentInfo( - content_type=content_type, - content=content - ) - annotation_info.html_infos.append(html_info) - return annotation_info @session_manager - async def add_relevance_annotation( - self, - session: AsyncSession, - user_id: int, - metadata_id: int, - annotation_info: RelevanceAnnotationPostInfo): - annotation = MetadataAnnotation( - metadata_id=metadata_id, - user_id=user_id, - value=str(annotation_info.is_relevant) + async def has_urls_with_html_data_and_without_auto_relevant_suggestion(self, session: AsyncSession) -> bool: + return await self.has_urls_with_html_data_and_without_models( + session=session, + model=AutoRelevantSuggestion ) - session.add(annotation) - @session_manager - async def get_annotations_for_metadata_id( - self, - session: AsyncSession, - metadata_id: int - ) -> list[MetadataAnnotation]: - statement = (select(MetadataAnnotation). - where(MetadataAnnotation.metadata_id == metadata_id)) - scalar_result = await session.scalars(statement) - all_results = scalar_result.all() - return [MetadataAnnotationInfo(**result.__dict__) for result in all_results] @session_manager - async def get_all(self, session, model: Base): + async def get_all(self, session, model: Base, order_by_attribute: Optional[str] = None) -> list[Base]: """ Get all records of a model Used primarily in testing """ statement = select(model) + if order_by_attribute: + statement = statement.order_by(getattr(model, order_by_attribute)) result = await session.execute(statement) return result.scalars().all() @@ -324,8 +577,8 @@ async def add_to_root_url_cache(self, session: AsyncSession, url: str, page_titl @session_manager async def get_urls(self, session: AsyncSession, page: int, errors: bool) -> GetURLsResponseInfo: statement = select(URL).options( - selectinload(URL.url_metadata), selectinload(URL.error_info) - ) + selectinload(URL.error_info) + ).order_by(URL.id) if errors: # Only return URLs with errors statement = statement.where( @@ -338,18 +591,6 @@ async def get_urls(self, session: AsyncSession, page: int, errors: bool) -> GetU all_results = execute_result.scalars().all() final_results = [] for result in all_results: - metadata_results = [] - for metadata in result.url_metadata: - metadata_result = GetURLsResponseMetadataInfo( - id=metadata.id, - attribute=URLMetadataAttributeType(metadata.attribute), - value=metadata.value, - validation_status=ValidationStatus(metadata.validation_status), - validation_source=ValidationSource(metadata.validation_source), - created_at=metadata.created_at, - updated_at=metadata.updated_at - ) - metadata_results.append(metadata_result) error_results = [] for error in result.error_info: error_result = GetURLsResponseErrorInfo( @@ -368,7 +609,6 @@ async def get_urls(self, session: AsyncSession, page: int, errors: bool) -> GetU updated_at=result.updated_at, created_at=result.created_at, errors=error_results, - metadata=metadata_results ) ) @@ -377,5 +617,1658 @@ async def get_urls(self, session: AsyncSession, page: int, errors: bool) -> GetU count=len(final_results) ) + @session_manager + async def initiate_task( + self, + session: AsyncSession, + task_type: TaskType + ) -> int: + # Create Task + task = Task( + task_type=task_type, + task_status=BatchStatus.IN_PROCESS.value + ) + session.add(task) + # Return Task ID + await session.flush() + await session.refresh(task) + return task.id + + @session_manager + async def update_task_status(self, session: AsyncSession, task_id: int, status: BatchStatus): + task = await session.get(Task, task_id) + task.task_status = status.value + await session.commit() + + @session_manager + async def add_task_error(self, session: AsyncSession, task_id: int, error: str): + task_error = TaskError( + task_id=task_id, + error=error + ) + session.add(task_error) + await session.commit() + + @session_manager + async def get_task_info(self, session: AsyncSession, task_id: int) -> TaskInfo: + # Get Task + result = await session.execute( + select(Task) + .where(Task.id == task_id) + .options( + selectinload(Task.urls), + selectinload(Task.error), + selectinload(Task.errored_urls) + ) + ) + task = result.scalars().first() + error = task.error[0].error if len(task.error) > 0 else None + # Get error info if any + # Get URLs + urls = task.urls + url_infos = [] + for url in urls: + url_info = URLInfo( + id=url.id, + batch_id=url.batch_id, + url=url.url, + collector_metadata=url.collector_metadata, + outcome=URLStatus(url.outcome), + updated_at=url.updated_at + ) + url_infos.append(url_info) + + errored_urls = [] + for url in task.errored_urls: + url_error_info = URLErrorPydanticInfo( + task_id=url.task_id, + url_id=url.url_id, + error=url.error, + updated_at=url.updated_at + ) + errored_urls.append(url_error_info) + return TaskInfo( + task_type=TaskType(task.task_type), + task_status=BatchStatus(task.task_status), + error_info=error, + updated_at=task.updated_at, + urls=url_infos, + url_errors=errored_urls + ) + + @session_manager + async def get_html_content_info(self, session: AsyncSession, url_id: int) -> list[URLHTMLContentInfo]: + session_result = await session.execute( + select(URLHTMLContent) + .where(URLHTMLContent.url_id == url_id) + ) + results = session_result.scalars().all() + return [URLHTMLContentInfo(**result.__dict__) for result in results] + + @session_manager + async def link_urls_to_task(self, session: AsyncSession, task_id: int, url_ids: list[int]): + for url_id in url_ids: + link = LinkTaskURL( + url_id=url_id, + task_id=task_id + ) + session.add(link) + + @session_manager + async def get_tasks( + self, + session: AsyncSession, + task_type: Optional[TaskType] = None, + task_status: Optional[BatchStatus] = None, + page: int = 1 + ) -> GetTasksResponse: + url_count_subquery = self.statement_composer.simple_count_subquery( + LinkTaskURL, + 'task_id', + 'url_count' + ) + + url_error_count_subquery = self.statement_composer.simple_count_subquery( + URLErrorInfo, + 'task_id', + 'url_error_count' + ) + + statement = select( + Task, + url_count_subquery.c.url_count, + url_error_count_subquery.c.url_error_count + ).outerjoin( + url_count_subquery, + Task.id == url_count_subquery.c.task_id + ).outerjoin( + url_error_count_subquery, + Task.id == url_error_count_subquery.c.task_id + ) + if task_type is not None: + statement = statement.where(Task.task_type == task_type.value) + if task_status is not None: + statement = statement.where(Task.task_status == task_status.value) + add_standard_limit_and_offset(statement, page) + + execute_result = await session.execute(statement) + all_results = execute_result.all() + final_results = [] + for task, url_count, url_error_count in all_results: + final_results.append( + GetTasksResponseTaskInfo( + task_id=task.id, + type=TaskType(task.task_type), + status=BatchStatus(task.task_status), + url_count=url_count if url_count is not None else 0, + url_error_count=url_error_count if url_error_count is not None else 0, + updated_at=task.updated_at + ) + ) + return GetTasksResponse( + tasks=final_results + ) + + @session_manager + async def has_urls_without_agency_suggestions( + self, + session: AsyncSession + ) -> bool: + statement = ( + select( + URL.id + ).where( + URL.outcome == URLStatus.PENDING.value + ) + ) + + statement = self.statement_composer.exclude_urls_with_agency_suggestions(statement) + raw_result = await session.execute(statement) + result = raw_result.all() + return len(result) != 0 + + @session_manager + async def get_urls_without_agency_suggestions(self, session: AsyncSession) -> list[AgencyIdentificationTDO]: + """ + Retrieve URLs without confirmed or suggested agencies + Args: + session: + + Returns: + + """ + + statement = ( + select(URL.id, URL.collector_metadata, Batch.strategy) + .where(URL.outcome == URLStatus.PENDING.value) + .join(Batch) + ) + statement = self.statement_composer.exclude_urls_with_agency_suggestions(statement) + statement = statement.limit(100) + raw_results = await session.execute(statement) + return [ + AgencyIdentificationTDO( + url_id=raw_result[0], + collector_metadata=raw_result[1], + collector_type=CollectorType(raw_result[2]) + ) + for raw_result in raw_results + ] + + @session_manager + async def get_next_url_agency_for_annotation( + self, + session: AsyncSession, + user_id: int, + batch_id: Optional[int] + ) -> GetNextURLForAgencyAnnotationResponse: + """ + Retrieve URL for annotation + The URL must + not be a confirmed URL + not have been annotated by this user + have extant autosuggestions + """ + # Select statement + statement = ( + select(URL.id, URL.url) + # Must not have confirmed agencies + .where( + URL.outcome == URLStatus.PENDING.value + ) + ) + if batch_id is not None: + statement = statement.where(URL.batch_id == batch_id) + # Must not have been annotated by a user + statement = ( + statement.join(UserUrlAgencySuggestion, isouter=True) + .where( + ~exists( + select(UserUrlAgencySuggestion). + where(UserUrlAgencySuggestion.url_id == URL.id). + correlate(URL) + ) + ) + # Must have extant autosuggestions + .join(AutomatedUrlAgencySuggestion, isouter=True) + .where( + exists( + select(AutomatedUrlAgencySuggestion). + where(AutomatedUrlAgencySuggestion.url_id == URL.id). + correlate(URL) + ) + ) + # Must not have confirmed agencies + .join(ConfirmedURLAgency, isouter=True) + .where( + ~exists( + select(ConfirmedURLAgency). + where(ConfirmedURLAgency.url_id == URL.id). + correlate(URL) + ) + ) + # Must not have been marked as "Not Relevant" by this user + .join(UserRelevantSuggestion, isouter=True) + .where( + ~exists( + select(UserRelevantSuggestion). + where( + (UserRelevantSuggestion.user_id == user_id) & + (UserRelevantSuggestion.url_id == URL.id) & + (UserRelevantSuggestion.relevant == False) + ).correlate(URL) + ) + ) + ).limit(1) + raw_result = await session.execute(statement) + results = raw_result.all() + if len(results) == 0: + return GetNextURLForAgencyAnnotationResponse( + next_annotation=None + ) + + result = results[0] + url_id = result[0] + url = result[1] + + agency_suggestions = await self.get_agency_suggestions(session, url_id=url_id) + + # Get HTML content info + html_content_infos = await self.get_html_content_info(url_id) + response_html_info = convert_to_response_html_info(html_content_infos) + + return GetNextURLForAgencyAnnotationResponse( + next_annotation=GetNextURLForAgencyAnnotationInnerResponse( + url_id=url_id, + url=url, + html_info=response_html_info, + agency_suggestions=agency_suggestions + ) + ) + + @session_manager + async def upsert_new_agencies( + self, + session: AsyncSession, + suggestions: list[URLAgencySuggestionInfo] + ): + """ + Add or update agencies in the database + """ + for suggestion in suggestions: + agency = Agency( + agency_id=suggestion.pdap_agency_id, + name=suggestion.agency_name, + state=suggestion.state, + county=suggestion.county, + locality=suggestion.locality + ) + await session.merge(agency) + + @session_manager + async def add_confirmed_agency_url_links( + self, + session: AsyncSession, + suggestions: list[URLAgencySuggestionInfo] + ): + for suggestion in suggestions: + confirmed_agency = ConfirmedURLAgency( + url_id=suggestion.url_id, + agency_id=suggestion.pdap_agency_id + ) + session.add(confirmed_agency) + + @session_manager + async def add_agency_auto_suggestions( + self, + session: AsyncSession, + suggestions: list[URLAgencySuggestionInfo] + ): + for suggestion in suggestions: + url_agency_suggestion = AutomatedUrlAgencySuggestion( + url_id=suggestion.url_id, + agency_id=suggestion.pdap_agency_id, + is_unknown=suggestion.suggestion_type == SuggestionType.UNKNOWN + ) + session.add(url_agency_suggestion) + + await session.commit() + + @session_manager + async def add_agency_manual_suggestion( + self, + session: AsyncSession, + agency_id: Optional[int], + url_id: int, + user_id: int, + is_new: bool + ): + if is_new and agency_id is not None: + raise ValueError("agency_id must be None when is_new is True") + + # Check if agency exists in database -- if not, add with placeholder + if agency_id is not None: + statement = select(Agency).where(Agency.agency_id == agency_id) + result = await session.execute(statement) + if len(result.all()) == 0: + agency = Agency( + agency_id=agency_id, + name=PLACEHOLDER_AGENCY_NAME + ) + await session.merge(agency) + + url_agency_suggestion = UserUrlAgencySuggestion( + url_id=url_id, + agency_id=agency_id, + user_id=user_id, + is_new=is_new + ) + session.add(url_agency_suggestion) + + @session_manager + async def get_urls_with_confirmed_agencies(self, session: AsyncSession) -> list[URL]: + statement = select(URL).where(exists().where(ConfirmedURLAgency.url_id == URL.id)) + results = await session.execute(statement) + return list(results.scalars().all()) + + @session_manager + async def get_next_url_for_final_review( + self, + session: AsyncSession, + batch_id: Optional[int] + ) -> Optional[GetNextURLForFinalReviewResponse]: + + + def annotations_exist_subquery(model: Type[Base]): + return ( + select( + URL.id.label("url_id"), + case( + ( + exists().where(URL.id == model.url_id), 1 + ), + else_=0 + ).label("exists") + ).subquery() + ) + + def count_subquery(model: Type[Base]): + return ( + select( + model.url_id, + func.count(model.url_id).label("count") + ).group_by(model.url_id).subquery() + ) + + models = [ + AutoRelevantSuggestion, + UserRelevantSuggestion, + AutoRecordTypeSuggestion, + UserRecordTypeSuggestion, + AutomatedUrlAgencySuggestion, + UserUrlAgencySuggestion + ] + + exist_subqueries = [ + annotations_exist_subquery(model=model) + for model in models + ] + + sum_of_exist_subqueries = ( + sum( + [ + subquery.c.exists + for subquery in exist_subqueries] + ) + ) + + + # Basic URL query + url_query = ( + select( + URL, + ( + sum_of_exist_subqueries + ).label("total_distinct_annotation_count"), + ) + ) + + for subquery in exist_subqueries: + url_query = url_query.outerjoin( + subquery, URL.id == subquery.c.url_id + ) + + url_query = url_query.where( + URL.outcome == URLStatus.PENDING.value + ) + if batch_id is not None: + url_query = url_query.where( + URL.batch_id == batch_id + ) + + # The below relationships are joined directly to the URL + single_join_relationships = [ + URL.html_content, + URL.auto_record_type_suggestion, + URL.auto_relevant_suggestion, + URL.user_relevant_suggestion, + URL.user_record_type_suggestion, + URL.optional_data_source_metadata, + ] + + options = [ + joinedload(relationship) for relationship in single_join_relationships + ] + + # The below relationships are joined to entities that are joined to the URL + double_join_relationships = [ + (URL.automated_agency_suggestions, AutomatedUrlAgencySuggestion.agency), + (URL.user_agency_suggestion, UserUrlAgencySuggestion.agency), + (URL.confirmed_agencies, ConfirmedURLAgency.agency) + ] + for primary, secondary in double_join_relationships: + options.append(joinedload(primary).joinedload(secondary)) + + # Apply options + url_query = url_query.options(*options) + + # Apply order clause + url_query = url_query.order_by( + desc("total_distinct_annotation_count"), + asc(URL.id) + ) + + # Apply limit + url_query = url_query.limit(1) + + # Execute query + raw_result = await session.execute(url_query) + + full_result = raw_result.unique().all() + + if len(full_result) == 0: + return None + result: URL = full_result[0][0] + + # Convert html content to response format + html_content = result.html_content + html_content_infos = [URLHTMLContentInfo(**html_info.__dict__) for html_info in html_content] + + if result.optional_data_source_metadata is None: + optional_metadata = FinalReviewOptionalMetadata() + else: + optional_metadata = FinalReviewOptionalMetadata( + record_formats=result.optional_data_source_metadata.record_formats, + data_portal_type=result.optional_data_source_metadata.data_portal_type, + supplying_entity=result.optional_data_source_metadata.supplying_entity + ) + + + # Return + return GetNextURLForFinalReviewResponse( + id=result.id, + url=result.url, + html_info=convert_to_response_html_info(html_content_infos), + name=result.name, + description=result.description, + annotations=FinalReviewAnnotationInfo( + relevant=DTOConverter.final_review_annotation_relevant_info( + user_suggestion=result.user_relevant_suggestion, + auto_suggestion=result.auto_relevant_suggestion + ), + record_type=DTOConverter.final_review_annotation_record_type_info( + user_suggestion=result.user_record_type_suggestion, + auto_suggestion=result.auto_record_type_suggestion + ), + agency=DTOConverter.final_review_annotation_agency_info( + automated_agency_suggestions=result.automated_agency_suggestions, + user_agency_suggestion=result.user_agency_suggestion, + confirmed_agencies=result.confirmed_agencies + ) + ), + optional_metadata=optional_metadata + ) + + @session_manager + async def approve_url( + self, + session: AsyncSession, + approval_info: FinalReviewApprovalInfo, + user_id: int, + ) -> None: + + # Get URL + def update_if_not_none( + model, + field, + value: Optional[Any], + required: bool=False + ): + if value is not None: + setattr(model, field, value) + return + if not required: + return + model_value = getattr(model, field, None) + if model_value is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Must specify {field} if it does not already exist" + ) + + + query = ( + Select(URL) + .where(URL.id == approval_info.url_id) + .options( + joinedload(URL.optional_data_source_metadata), + joinedload(URL.confirmed_agencies), + ) + ) + + url = await session.execute(query) + url = url.scalars().first() + + update_if_not_none( + url, + "record_type", + approval_info.record_type.value if approval_info.record_type is not None else None, + required=True + ) + + # Get existing agency ids + existing_agencies = url.confirmed_agencies or [] + existing_agency_ids = [agency.agency_id for agency in existing_agencies] + new_agency_ids = approval_info.agency_ids or [] + if len(existing_agency_ids) == 0 and len(new_agency_ids) == 0: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Must specify agency_id if URL does not already have a confirmed agency" + ) + + # Get any existing agency ids that are not in the new agency ids + # If new agency ids are specified, overwrite existing + if len(new_agency_ids) != 0: + for existing_agency in existing_agencies: + if existing_agency.id not in new_agency_ids: + # If the existing agency id is not in the new agency ids, delete it + await session.delete(existing_agency) + # Add any new agency ids that are not in the existing agency ids + for new_agency_id in new_agency_ids: + if new_agency_id not in existing_agency_ids: + # Check if the new agency exists in the database + query = ( + select(Agency) + .where(Agency.agency_id == new_agency_id) + ) + existing_agency = await session.execute(query) + existing_agency = existing_agency.scalars().first() + if existing_agency is None: + # If not, create it + agency = Agency( + agency_id=new_agency_id, + name=PLACEHOLDER_AGENCY_NAME, + ) + session.add(agency) + + # If the new agency id is not in the existing agency ids, add it + confirmed_url_agency = ConfirmedURLAgency( + url_id=approval_info.url_id, + agency_id=new_agency_id + ) + session.add(confirmed_url_agency) + + # If it does, do nothing + + url.outcome = URLStatus.VALIDATED.value + + update_if_not_none(url, "name", approval_info.name, required=True) + update_if_not_none(url, "description", approval_info.description, required=True) + + optional_metadata = url.optional_data_source_metadata + if optional_metadata is None: + url.optional_data_source_metadata = URLOptionalDataSourceMetadata( + record_formats=approval_info.record_formats, + data_portal_type=approval_info.data_portal_type, + supplying_entity=approval_info.supplying_entity + ) + else: + update_if_not_none( + optional_metadata, + "record_formats", + approval_info.record_formats + ) + update_if_not_none( + optional_metadata, + "data_portal_type", + approval_info.data_portal_type + ) + update_if_not_none( + optional_metadata, + "supplying_entity", + approval_info.supplying_entity + ) + + # Add approving user + approving_user_url = ReviewingUserURL( + user_id=user_id, + url_id=approval_info.url_id + ) + + session.add(approving_user_url) + + @session_manager + async def reject_url( + self, + session: AsyncSession, + url_id: int, + user_id: int + ) -> None: + + query = ( + Select(URL) + .where(URL.id == url_id) + ) + + url = await session.execute(query) + url = url.scalars().first() + + url.outcome = URLStatus.REJECTED.value + + # Add rejecting user + rejecting_user_url = ReviewingUserURL( + user_id=user_id, + url_id=url_id + ) + + session.add(rejecting_user_url) + + @session_manager + async def get_batch_by_id(self, session, batch_id: int) -> Optional[BatchInfo]: + """Retrieve a batch by ID.""" + query = Select(Batch).where(Batch.id == batch_id) + result = await session.execute(query) + batch = result.scalars().first() + return BatchInfo(**batch.__dict__) + + @session_manager + async def get_urls_by_batch(self, session, batch_id: int, page: int = 1) -> List[URLInfo]: + """Retrieve all URLs associated with a batch.""" + query = Select(URL).where(URL.batch_id == batch_id).order_by(URL.id).limit(100).offset((page - 1) * 100) + result = await session.execute(query) + urls = result.scalars().all() + return ([URLInfo(**url.__dict__) for url in urls]) + + @session_manager + async def insert_url(self, session: AsyncSession, url_info: URLInfo) -> int: + """Insert a new URL into the database.""" + url_entry = URL( + batch_id=url_info.batch_id, + url=url_info.url, + collector_metadata=url_info.collector_metadata, + outcome=url_info.outcome.value + ) + if url_info.created_at is not None: + url_entry.created_at = url_info.created_at + session.add(url_entry) + await session.flush() + return url_entry.id + + @session_manager + async def get_url_info_by_url(self, session: AsyncSession, url: str) -> Optional[URLInfo]: + query = Select(URL).where(URL.url == url) + raw_result = await session.execute(query) + url = raw_result.scalars().first() + return URLInfo(**url.__dict__) + + @session_manager + async def insert_logs(self, session, log_infos: List[LogInfo]): + for log_info in log_infos: + log = Log(log=log_info.log, batch_id=log_info.batch_id) + if log_info.created_at is not None: + log.created_at = log_info.created_at + session.add(log) + + @session_manager + async def insert_duplicates(self, session, duplicate_infos: list[DuplicateInsertInfo]): + for duplicate_info in duplicate_infos: + duplicate = Duplicate( + batch_id=duplicate_info.duplicate_batch_id, + original_url_id=duplicate_info.original_url_id, + ) + session.add(duplicate) + + @session_manager + async def insert_batch(self, session: AsyncSession, batch_info: BatchInfo) -> int: + """Insert a new batch into the database and return its ID.""" + batch = Batch( + strategy=batch_info.strategy, + user_id=batch_info.user_id, + status=batch_info.status.value, + parameters=batch_info.parameters, + total_url_count=batch_info.total_url_count, + original_url_count=batch_info.original_url_count, + duplicate_url_count=batch_info.duplicate_url_count, + compute_time=batch_info.compute_time, + strategy_success_rate=batch_info.strategy_success_rate, + metadata_success_rate=batch_info.metadata_success_rate, + agency_match_rate=batch_info.agency_match_rate, + record_type_match_rate=batch_info.record_type_match_rate, + record_category_match_rate=batch_info.record_category_match_rate, + ) + if batch_info.date_generated is not None: + batch.date_generated = batch_info.date_generated + session.add(batch) + await session.flush() + return batch.id + + + async def insert_urls(self, url_infos: List[URLInfo], batch_id: int) -> InsertURLsInfo: + url_mappings = [] + duplicates = [] + for url_info in url_infos: + url_info.batch_id = batch_id + try: + url_id = await self.insert_url(url_info) + url_mappings.append(URLMapping(url_id=url_id, url=url_info.url)) + except IntegrityError: + orig_url_info = await self.get_url_info_by_url(url_info.url) + duplicate_info = DuplicateInsertInfo( + duplicate_batch_id=batch_id, + original_url_id=orig_url_info.id + ) + duplicates.append(duplicate_info) + await self.insert_duplicates(duplicates) + + return InsertURLsInfo( + url_mappings=url_mappings, + total_count=len(url_infos), + original_count=len(url_mappings), + duplicate_count=len(duplicates), + url_ids=[url_mapping.url_id for url_mapping in url_mappings] + ) + + @session_manager + async def update_batch_post_collection( + self, + session, + batch_id: int, + total_url_count: int, + original_url_count: int, + duplicate_url_count: int, + batch_status: BatchStatus, + compute_time: float = None, + ): + + query = Select(Batch).where(Batch.id == batch_id) + result = await session.execute(query) + batch = result.scalars().first() + + batch.total_url_count = total_url_count + batch.original_url_count = original_url_count + batch.duplicate_url_count = duplicate_url_count + batch.status = batch_status.value + batch.compute_time = compute_time + + + @session_manager + async def has_validated_urls(self, session: AsyncSession) -> bool: + query = ( + select(URL) + .where(URL.outcome == URLStatus.VALIDATED.value) + ) + urls = await session.execute(query) + urls = urls.scalars().all() + return len(urls) > 0 + + @session_manager + async def get_validated_urls( + self, + session: AsyncSession + ) -> list[SubmitApprovedURLTDO]: + query = ( + select(URL) + .where(URL.outcome == URLStatus.VALIDATED.value) + .options( + selectinload(URL.optional_data_source_metadata), + selectinload(URL.confirmed_agencies), + selectinload(URL.reviewing_user) + ).limit(100) + ) + urls = await session.execute(query) + urls = urls.scalars().all() + results: list[SubmitApprovedURLTDO] = [] + for url in urls: + agency_ids = [] + for agency in url.confirmed_agencies: + agency_ids.append(agency.agency_id) + optional_metadata = url.optional_data_source_metadata + + if optional_metadata is None: + record_formats = None + data_portal_type = None + supplying_entity = None + else: + record_formats = optional_metadata.record_formats + data_portal_type = optional_metadata.data_portal_type + supplying_entity = optional_metadata.supplying_entity + + tdo = SubmitApprovedURLTDO( + url_id=url.id, + url=url.url, + name=url.name, + agency_ids=agency_ids, + description=url.description, + record_type=url.record_type, + record_formats=record_formats, + data_portal_type=data_portal_type, + supplying_entity=supplying_entity, + approving_user_id=url.reviewing_user.user_id + ) + results.append(tdo) + return results + + @session_manager + async def mark_urls_as_submitted(self, session: AsyncSession, infos: list[SubmittedURLInfo]): + for info in infos: + url_id = info.url_id + data_source_id = info.data_source_id + + query = ( + update(URL) + .where(URL.id == url_id) + .values( + outcome=URLStatus.SUBMITTED.value + ) + ) + + url_data_source_object = URLDataSource( + url_id=url_id, + data_source_id=data_source_id + ) + if info.submitted_at is not None: + url_data_source_object.created_at = info.submitted_at + session.add(url_data_source_object) + + + await session.execute(query) + + @session_manager + async def get_duplicates_by_batch_id(self, session, batch_id: int, page: int) -> List[DuplicateInfo]: + original_batch = aliased(Batch) + duplicate_batch = aliased(Batch) + + query = ( + Select( + URL.url.label("source_url"), + URL.id.label("original_url_id"), + duplicate_batch.id.label("duplicate_batch_id"), + duplicate_batch.parameters.label("duplicate_batch_parameters"), + original_batch.id.label("original_batch_id"), + original_batch.parameters.label("original_batch_parameters"), + ) + .select_from(Duplicate) + .join(URL, Duplicate.original_url_id == URL.id) + .join(duplicate_batch, Duplicate.batch_id == duplicate_batch.id) + .join(original_batch, URL.batch_id == original_batch.id) + .filter(duplicate_batch.id == batch_id) + .limit(100) + .offset((page - 1) * 100) + ) + raw_results = await session.execute(query) + results = raw_results.all() + final_results = [] + for result in results: + final_results.append( + DuplicateInfo( + source_url=result.source_url, + duplicate_batch_id=result.duplicate_batch_id, + duplicate_metadata=result.duplicate_batch_parameters, + original_batch_id=result.original_batch_id, + original_metadata=result.original_batch_parameters, + original_url_id=result.original_url_id + ) + ) + return final_results + + @session_manager + async def get_recent_batch_status_info( + self, + session, + page: int, + collector_type: Optional[CollectorType] = None, + status: Optional[BatchStatus] = None, + has_pending_urls: Optional[bool] = None + ) -> List[BatchInfo]: + # Get only the batch_id, collector_type, status, and created_at + limit = 100 + query = Select(Batch) + if has_pending_urls is not None: + pending_url_subquery = Select(URL).where( + and_( + URL.batch_id == Batch.id, + URL.outcome == URLStatus.PENDING.value + ) + ) + + if has_pending_urls: + # Query for all that have pending URLs + query = query.where(exists( + pending_url_subquery + )) + else: + # Query for all that DO NOT have pending URLs + # (or that have no URLs at all) + query = query.where( + not_( + exists( + pending_url_subquery + ) + ) + ) + if collector_type: + query = query.filter(Batch.strategy == collector_type.value) + if status: + query = query.filter(Batch.status == status.value) + + query = (query. + order_by(Batch.date_generated.desc()). + limit(limit). + offset((page - 1) * limit)) + raw_results = await session.execute(query) + batches = raw_results.scalars().all() + return [BatchInfo(**batch.__dict__) for batch in batches] + + @session_manager + async def get_logs_by_batch_id(self, session, batch_id: int) -> List[LogOutputInfo]: + query = Select(Log).filter_by(batch_id=batch_id).order_by(Log.created_at.asc()) + raw_results = await session.execute(query) + logs = raw_results.scalars().all() + return ([LogOutputInfo(**log.__dict__) for log in logs]) + + @session_manager + async def delete_old_logs(self, session): + """ + Delete logs older than a day + """ + statement = delete(Log).where( + Log.created_at < datetime.now() - timedelta(days=1) + ) + await session.execute(statement) + + async def get_agency_suggestions(self, session, url_id: int) -> List[GetNextURLForAgencyAgencyInfo]: + # Get relevant autosuggestions and agency info, if an associated agency exists + + statement = ( + select( + AutomatedUrlAgencySuggestion.agency_id, + AutomatedUrlAgencySuggestion.is_unknown, + Agency.name, + Agency.state, + Agency.county, + Agency.locality + ) + .join(Agency, isouter=True) + .where(AutomatedUrlAgencySuggestion.url_id == url_id) + ) + raw_autosuggestions = await session.execute(statement) + autosuggestions = raw_autosuggestions.all() + agency_suggestions = [] + for autosuggestion in autosuggestions: + agency_id = autosuggestion[0] + is_unknown = autosuggestion[1] + name = autosuggestion[2] + state = autosuggestion[3] + county = autosuggestion[4] + locality = autosuggestion[5] + agency_suggestions.append(GetNextURLForAgencyAgencyInfo( + suggestion_type=SuggestionType.AUTO_SUGGESTION if not is_unknown else SuggestionType.UNKNOWN, + pdap_agency_id=agency_id, + agency_name=name, + state=state, + county=county, + locality=locality + )) + return agency_suggestions + + @session_manager + async def get_next_url_for_all_annotations(self, session, batch_id: Optional[int] = None) -> GetNextURLForAllAnnotationResponse: + query = ( + Select(URL) + .where( + and_( + URL.outcome == URLStatus.PENDING.value, + StatementComposer.user_suggestion_not_exists(UserUrlAgencySuggestion), + StatementComposer.user_suggestion_not_exists(UserRecordTypeSuggestion), + StatementComposer.user_suggestion_not_exists(UserRelevantSuggestion), + ) + ) + ) + if batch_id is not None: + query = query.where(URL.batch_id == batch_id) + + load_options = [ + URL.html_content, + URL.automated_agency_suggestions, + URL.auto_relevant_suggestion, + URL.auto_record_type_suggestion + ] + select_in_loads = [selectinload(load_option) for load_option in load_options] + + # Add load options + query = query.options( + *select_in_loads + ) + + query = query.order_by(URL.id.asc()).limit(1) + raw_results = await session.execute(query) + url = raw_results.scalars().one_or_none() + if url is None: + return GetNextURLForAllAnnotationResponse( + next_annotation=None + ) + + html_response_info = DTOConverter.html_content_list_to_html_response_info( + url.html_content + ) + + if url.auto_relevant_suggestion is not None: + auto_relevant = url.auto_relevant_suggestion.relevant + else: + auto_relevant = None + + if url.auto_record_type_suggestion is not None: + auto_record_type = url.auto_record_type_suggestion.record_type + else: + auto_record_type = None + + agency_suggestions = await self.get_agency_suggestions(session, url_id=url.id) + + return GetNextURLForAllAnnotationResponse( + next_annotation=GetNextURLForAllAnnotationInnerResponse( + url_id=url.id, + url=url.url, + html_info=html_response_info, + suggested_relevant=auto_relevant, + suggested_record_type=auto_record_type, + agency_suggestions=agency_suggestions + ) + ) + + @session_manager + async def add_all_annotations_to_url( + self, + session, + user_id: int, + url_id: int, + post_info: AllAnnotationPostInfo + ): + + # Add relevant annotation + relevant_suggestion = UserRelevantSuggestion( + url_id=url_id, + user_id=user_id, + relevant=post_info.is_relevant + ) + session.add(relevant_suggestion) + + # If not relevant, do nothing else + if not post_info.is_relevant: + return + + record_type_suggestion = UserRecordTypeSuggestion( + url_id=url_id, + user_id=user_id, + record_type=post_info.record_type.value + ) + session.add(record_type_suggestion) + + agency_suggestion = UserUrlAgencySuggestion( + url_id=url_id, + user_id=user_id, + agency_id=post_info.agency.suggested_agency, + is_new=post_info.agency.is_new + ) + session.add(agency_suggestion) + + @session_manager + async def upload_manual_batch( + self, + session: AsyncSession, + user_id: int, + dto: ManualBatchInputDTO + ) -> ManualBatchResponseDTO: + batch = Batch( + strategy=CollectorType.MANUAL.value, + status=BatchStatus.READY_TO_LABEL.value, + parameters={ + "name": dto.name + }, + user_id=user_id + ) + session.add(batch) + await session.flush() + + batch_id = batch.id + url_ids = [] + duplicate_urls = [] + + for entry in dto.entries: + url = URL( + url=entry.url, + name=entry.name, + description=entry.description, + batch_id=batch_id, + collector_metadata=entry.collector_metadata, + outcome=URLStatus.PENDING.value, + record_type=entry.record_type.value if entry.record_type is not None else None, + ) + + async with session.begin_nested(): + try: + session.add(url) + await session.flush() + except IntegrityError: + duplicate_urls.append(entry.url) + continue + await session.flush() + optional_metadata = URLOptionalDataSourceMetadata( + url_id=url.id, + record_formats=entry.record_formats, + data_portal_type=entry.data_portal_type, + supplying_entity=entry.supplying_entity, + ) + session.add(optional_metadata) + url_ids.append(url.id) + + + return ManualBatchResponseDTO( + batch_id=batch_id, + urls=url_ids, + duplicate_urls=duplicate_urls + ) + + @session_manager + async def search_for_url(self, session: AsyncSession, url: str) -> SearchURLResponse: + query = select(URL).where(URL.url == url) + raw_results = await session.execute(query) + url = raw_results.scalars().one_or_none() + if url is None: + return SearchURLResponse( + found=False, + url_id=None + ) + return SearchURLResponse( + found=True, + url_id=url.id + ) + + @session_manager + async def get_batches_aggregated_metrics(self, session: AsyncSession) -> GetMetricsBatchesAggregatedResponseDTO: + sc = StatementComposer + + # First, get all batches broken down by collector type and status + def batch_column(status: BatchStatus, label): + return sc.count_distinct( + case( + (Batch.status == status.value, + Batch.id) + ), + label=label + ) + + batch_count_subquery = select( + batch_column(BatchStatus.READY_TO_LABEL, label="done_count"), + batch_column(BatchStatus.ERROR, label="error_count"), + Batch.strategy, + ).group_by(Batch.strategy).subquery("batch_count") + + def url_column(status: URLStatus, label): + return sc.count_distinct( + case( + (URL.outcome == status.value, + URL.id) + ), + label=label + ) + + # Next, count urls + url_count_subquery = select( + Batch.strategy, + url_column(URLStatus.PENDING, label="pending_count"), + url_column(URLStatus.ERROR, label="error_count"), + url_column(URLStatus.VALIDATED, label="validated_count"), + url_column(URLStatus.SUBMITTED, label="submitted_count"), + url_column(URLStatus.REJECTED, label="rejected_count"), + + ).outerjoin( + Batch, Batch.id == URL.batch_id + ).group_by( + Batch.strategy + ).subquery("url_count") + + # Combine + query = select( + Batch.strategy, + batch_count_subquery.c.done_count.label("batch_done_count"), + batch_count_subquery.c.error_count.label("batch_error_count"), + coalesce(url_count_subquery.c.pending_count, 0).label("pending_count"), + coalesce(url_count_subquery.c.error_count, 0).label("error_count"), + coalesce(url_count_subquery.c.submitted_count, 0).label("submitted_count"), + coalesce(url_count_subquery.c.rejected_count, 0).label("rejected_count"), + coalesce(url_count_subquery.c.validated_count, 0).label("validated_count") + ).join( + batch_count_subquery, + Batch.strategy == batch_count_subquery.c.strategy + ).outerjoin( + url_count_subquery, + Batch.strategy == url_count_subquery.c.strategy + ) + raw_results = await session.execute(query) + results = raw_results.all() + d: dict[CollectorType, GetMetricsBatchesAggregatedInnerResponseDTO] = {} + for result in results: + d[CollectorType(result.strategy)] = GetMetricsBatchesAggregatedInnerResponseDTO( + count_successful_batches=result.batch_done_count, + count_failed_batches=result.batch_error_count, + count_urls=result.pending_count + result.submitted_count + + result.rejected_count + result.error_count + + result.validated_count, + count_urls_pending=result.pending_count, + count_urls_validated=result.validated_count, + count_urls_submitted=result.submitted_count, + count_urls_rejected=result.rejected_count, + count_urls_errors=result.error_count + ) + + total_batch_query = await session.execute( + select( + sc.count_distinct(Batch.id, label="count") + ) + ) + total_batch_count = total_batch_query.scalars().one_or_none() + if total_batch_count is None: + total_batch_count = 0 + + return GetMetricsBatchesAggregatedResponseDTO( + total_batches=total_batch_count, + by_strategy=d + ) + + @session_manager + async def get_batches_breakdown_metrics( + self, + session: AsyncSession, + page: int + ) -> GetMetricsBatchesBreakdownResponseDTO: + sc = StatementComposer + + main_query = select( + Batch.strategy, + Batch.id, + Batch.status, + Batch.date_generated.label("created_at"), + ) + + def url_column(status: URLStatus, label): + return sc.count_distinct( + case( + (URL.outcome == status.value, + URL.id) + ), + label=label + ) + + count_query = select( + URL.batch_id, + sc.count_distinct(URL.id, label="count_total"), + url_column(URLStatus.PENDING, label="count_pending"), + url_column(URLStatus.SUBMITTED, label="count_submitted"), + url_column(URLStatus.REJECTED, label="count_rejected"), + url_column(URLStatus.ERROR, label="count_error"), + url_column(URLStatus.VALIDATED, label="count_validated"), + ).group_by( + URL.batch_id + ).subquery("url_count") + + query = (select( + main_query.c.strategy, + main_query.c.id, + main_query.c.created_at, + main_query.c.status, + coalesce(count_query.c.count_total, 0).label("count_total"), + coalesce(count_query.c.count_pending, 0).label("count_pending"), + coalesce(count_query.c.count_submitted, 0).label("count_submitted"), + coalesce(count_query.c.count_rejected, 0).label("count_rejected"), + coalesce(count_query.c.count_error, 0).label("count_error"), + coalesce(count_query.c.count_validated, 0).label("count_validated"), + ).outerjoin( + count_query, + main_query.c.id == count_query.c.batch_id + ).offset( + (page - 1) * 100 + ).order_by( + main_query.c.created_at.asc() + )) + + raw_results = await session.execute(query) + results = raw_results.all() + batches: list[GetMetricsBatchesBreakdownInnerResponseDTO] = [] + for result in results: + dto = GetMetricsBatchesBreakdownInnerResponseDTO( + batch_id=result.id, + strategy=CollectorType(result.strategy), + status=BatchStatus(result.status), + created_at=result.created_at, + count_url_total=result.count_total, + count_url_pending=result.count_pending, + count_url_submitted=result.count_submitted, + count_url_rejected=result.count_rejected, + count_url_error=result.count_error, + count_url_validated=result.count_validated + ) + batches.append(dto) + return GetMetricsBatchesBreakdownResponseDTO( + batches=batches, + ) + + @session_manager + async def get_urls_breakdown_submitted_metrics( + self, + session: AsyncSession + ) -> GetMetricsURLsBreakdownSubmittedResponseDTO: + + # Build the query + month = func.date_trunc('month', URLDataSource.created_at) + query = ( + select( + month.label('month'), + func.count(URLDataSource.id).label('count_submitted'), + ) + .group_by(month) + .order_by(month.asc()) + ) + + # Execute the query + raw_results = await session.execute(query) + results = raw_results.all() + final_results: list[GetMetricsURLsBreakdownSubmittedInnerDTO] = [] + for result in results: + dto = GetMetricsURLsBreakdownSubmittedInnerDTO( + month=result.month.strftime("%B %Y"), + count_submitted=result.count_submitted + ) + final_results.append(dto) + return GetMetricsURLsBreakdownSubmittedResponseDTO( + entries=final_results + ) + + @session_manager + async def get_urls_aggregated_metrics( + self, + session: AsyncSession + ) -> GetMetricsURLsAggregatedResponseDTO: + sc = StatementComposer + + oldest_pending_url_query = select( + URL.id, + URL.created_at + ).where( + URL.outcome == URLStatus.PENDING.value + ).order_by( + URL.created_at.asc() + ).limit(1) + + oldest_pending_url = await session.execute(oldest_pending_url_query) + oldest_pending_url = oldest_pending_url.one_or_none() + if oldest_pending_url is None: + oldest_pending_url_id = None + oldest_pending_created_at = None + else: + oldest_pending_url_id = oldest_pending_url.id + oldest_pending_created_at = oldest_pending_url.created_at + + def case_column(status: URLStatus, label): + return sc.count_distinct( + case( + (URL.outcome == status.value, + URL.id) + ), + label=label + ) + + count_query = select( + sc.count_distinct(URL.id, label="count"), + case_column(URLStatus.PENDING, label="count_pending"), + case_column(URLStatus.SUBMITTED, label="count_submitted"), + case_column(URLStatus.VALIDATED, label="count_validated"), + case_column(URLStatus.REJECTED, label="count_rejected"), + case_column(URLStatus.ERROR, label="count_error"), + ) + raw_results = await session.execute(count_query) + results = raw_results.all() + + return GetMetricsURLsAggregatedResponseDTO( + count_urls_total=results[0].count, + count_urls_pending=results[0].count_pending, + count_urls_submitted=results[0].count_submitted, + count_urls_validated=results[0].count_validated, + count_urls_rejected=results[0].count_rejected, + count_urls_errors=results[0].count_error, + oldest_pending_url_id=oldest_pending_url_id, + oldest_pending_url_created_at=oldest_pending_created_at, + ) + + def compile(self, statement): + compiled_sql = statement.compile(dialect=postgresql.dialect(), compile_kwargs={"literal_binds": True}) + return compiled_sql + + @session_manager + async def get_urls_breakdown_pending_metrics( + self, + session: AsyncSession + ) -> GetMetricsURLsBreakdownPendingResponseDTO: + sc = StatementComposer + + flags = ( + select( + URL.id.label("url_id"), + case((UserRecordTypeSuggestion.url_id != None, literal(True)), else_=literal(False)).label( + "has_user_record_type_annotation" + ), + case((UserRelevantSuggestion.url_id != None, literal(True)), else_=literal(False)).label( + "has_user_relevant_annotation" + ), + case((UserUrlAgencySuggestion.url_id != None, literal(True)), else_=literal(False)).label( + "has_user_agency_annotation" + ), + ) + .outerjoin(UserRecordTypeSuggestion, URL.id == UserRecordTypeSuggestion.url_id) + .outerjoin(UserRelevantSuggestion, URL.id == UserRelevantSuggestion.url_id) + .outerjoin(UserUrlAgencySuggestion, URL.id == UserUrlAgencySuggestion.url_id) + ).cte("flags") + + + month = func.date_trunc('month', URL.created_at) + + # Build the query + query = ( + select( + month.label('month'), + func.count(URL.id).label('count_total'), + func.count(case( + (flags.c.has_user_record_type_annotation == True, 1)) + ).label('user_record_type_count'), + func.count(case( + (flags.c.has_user_relevant_annotation == True, 1)) + ).label('user_relevant_count'), + func.count(case( + (flags.c.has_user_agency_annotation == True, 1)) + ).label('user_agency_count'), + ) + .outerjoin(flags, flags.c.url_id == URL.id) + .where(URL.outcome == URLStatus.PENDING.value) + .group_by(month) + .order_by(month.asc()) + ) + + # Execute the query and return the results + results = await session.execute(query) + all_results = results.all() + final_results: list[GetMetricsURLsBreakdownPendingResponseInnerDTO] = [] + + for result in all_results: + dto = GetMetricsURLsBreakdownPendingResponseInnerDTO( + month=result.month.strftime("%B %Y"), + count_pending_total=result.count_total, + count_pending_relevant_user=result.user_relevant_count, + count_pending_record_type_user=result.user_record_type_count, + count_pending_agency_user=result.user_agency_count, + ) + final_results.append(dto) + return GetMetricsURLsBreakdownPendingResponseDTO( + entries=final_results, + ) + + @session_manager + async def get_backlog_metrics( + self, + session: AsyncSession + ) -> GetMetricsBacklogResponseDTO: + month = func.date_trunc('month', BacklogSnapshot.created_at) + + # 1. Create a subquery that assigns row_number() partitioned by month + monthly_snapshot_subq = ( + select( + BacklogSnapshot.id, + BacklogSnapshot.created_at, + BacklogSnapshot.count_pending_total, + month.label("month_start"), + func.row_number() + .over( + partition_by=month, + order_by=BacklogSnapshot.created_at.desc() + ) + .label("row_number") + ) + .subquery() + ) + + # 2. Filter for the top (most recent) row in each month + stmt = ( + select( + monthly_snapshot_subq.c.month_start, + monthly_snapshot_subq.c.created_at, + monthly_snapshot_subq.c.count_pending_total + ) + .where(monthly_snapshot_subq.c.row_number == 1) + .order_by(monthly_snapshot_subq.c.month_start) + ) + + raw_result = await session.execute(stmt) + results = raw_result.all() + final_results = [] + for result in results: + final_results.append( + GetMetricsBacklogResponseInnerDTO( + month=result.month_start.strftime("%B %Y"), + count_pending_total=result.count_pending_total, + ) + ) + + return GetMetricsBacklogResponseDTO(entries=final_results) + + + @session_manager + async def populate_backlog_snapshot( + self, + session: AsyncSession, + dt: Optional[datetime] = None + ): + sc = StatementComposer + # Get count of pending URLs + query = select( + sc.count_distinct(URL.id, label="count") + ).where( + URL.outcome == URLStatus.PENDING.value + ) + + raw_result = await session.execute(query) + count = raw_result.one()[0] + + # insert count into snapshot + snapshot = BacklogSnapshot( + count_pending_total=count + ) + if dt is not None: + snapshot.created_at = dt + + session.add(snapshot) + + @session_manager + async def has_pending_urls_not_checked_for_duplicates(self, session: AsyncSession) -> bool: + query = (select( + URL.id + ).outerjoin( + URLCheckedForDuplicate, + URL.id == URLCheckedForDuplicate.url_id + ).where( + URL.outcome == URLStatus.PENDING.value, + URLCheckedForDuplicate.id == None + ).limit(1) + ) + + raw_result = await session.execute(query) + result = raw_result.one_or_none() + return result is not None + + @session_manager + async def get_pending_urls_not_checked_for_duplicates(self, session: AsyncSession) -> List[URLDuplicateTDO]: + query = (select( + URL + ).outerjoin( + URLCheckedForDuplicate, + URL.id == URLCheckedForDuplicate.url_id + ).where( + URL.outcome == URLStatus.PENDING.value, + URLCheckedForDuplicate.id == None + ).limit(100) + ) + + raw_result = await session.execute(query) + urls = raw_result.scalars().all() + return [URLDuplicateTDO(url=url.url, url_id=url.id) for url in urls] + + + @session_manager + async def mark_all_as_duplicates(self, session: AsyncSession, url_ids: List[int]): + query = update(URL).where(URL.id.in_(url_ids)).values(outcome=URLStatus.DUPLICATE.value) + await session.execute(query) + + @session_manager + async def mark_as_checked_for_duplicates(self, session: AsyncSession, url_ids: list[int]): + for url_id in url_ids: + url_checked_for_duplicate = URLCheckedForDuplicate(url_id=url_id) + session.add(url_checked_for_duplicate) diff --git a/collector_db/DTOConverter.py b/collector_db/DTOConverter.py new file mode 100644 index 00000000..b43fbbe9 --- /dev/null +++ b/collector_db/DTOConverter.py @@ -0,0 +1,196 @@ +from typing import Optional + +from collector_db.DTOs.URLHTMLContentInfo import HTMLContentType, URLHTMLContentInfo +from collector_db.DTOs.URLWithHTML import URLWithHTML +from collector_db.models import AutomatedUrlAgencySuggestion, UserUrlAgencySuggestion, URLHTMLContent, URL, Agency, \ + AutoRecordTypeSuggestion, UserRecordTypeSuggestion, UserRelevantSuggestion, AutoRelevantSuggestion, \ + ConfirmedURLAgency +from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAgencyInfo +from core.DTOs.GetNextURLForFinalReviewResponse import FinalReviewAnnotationRelevantInfo, \ + FinalReviewAnnotationRecordTypeInfo, FinalReviewAnnotationAgencyAutoInfo, \ + FinalReviewAnnotationAgencyInfo +from core.enums import RecordType, SuggestionType +from html_tag_collector.DataClassTags import ResponseHTMLInfo, ENUM_TO_ATTRIBUTE_MAPPING + +class DTOConverter: + + """ + Converts SQLAlchemy objects to DTOs + """ + + @staticmethod + def final_review_annotation_relevant_info( + user_suggestion: UserRelevantSuggestion, + auto_suggestion: AutoRelevantSuggestion + ) -> FinalReviewAnnotationRelevantInfo: + + auto_value = auto_suggestion.relevant if auto_suggestion else None + user_value = user_suggestion.relevant if user_suggestion else None + return FinalReviewAnnotationRelevantInfo( + auto=auto_value, + user=user_value + ) + + @staticmethod + def final_review_annotation_record_type_info( + user_suggestion: UserRecordTypeSuggestion, + auto_suggestion: AutoRecordTypeSuggestion + ): + + if auto_suggestion is None: + auto_value = None + else: + auto_value = RecordType(auto_suggestion.record_type) + if user_suggestion is None: + user_value = None + else: + user_value = RecordType(user_suggestion.record_type) + + return FinalReviewAnnotationRecordTypeInfo( + auto=auto_value, + user=user_value + ) + + @staticmethod + def final_review_annotation_agency_auto_info( + automated_agency_suggestions: list[AutomatedUrlAgencySuggestion] + ) -> FinalReviewAnnotationAgencyAutoInfo: + + if len(automated_agency_suggestions) == 0: + return FinalReviewAnnotationAgencyAutoInfo( + unknown=True, + suggestions=[] + ) + + if len(automated_agency_suggestions) == 1: + suggestion = automated_agency_suggestions[0] + unknown = suggestion.is_unknown + else: + unknown = False + + if unknown: + return FinalReviewAnnotationAgencyAutoInfo( + unknown=True, + suggestions=[ + GetNextURLForAgencyAgencyInfo( + suggestion_type=SuggestionType.UNKNOWN, + ) + ] + ) + + return FinalReviewAnnotationAgencyAutoInfo( + unknown=unknown, + suggestions=[ + GetNextURLForAgencyAgencyInfo( + suggestion_type=SuggestionType.AUTO_SUGGESTION, + pdap_agency_id=suggestion.agency_id, + agency_name=suggestion.agency.name, + state=suggestion.agency.state, + county=suggestion.agency.county, + locality=suggestion.agency.locality + ) for suggestion in automated_agency_suggestions + ] + ) + + @staticmethod + def user_url_agency_suggestion_to_final_review_annotation_agency_user_info( + user_url_agency_suggestion: UserUrlAgencySuggestion + ) -> Optional[GetNextURLForAgencyAgencyInfo]: + suggestion = user_url_agency_suggestion + if suggestion is None: + return None + return GetNextURLForAgencyAgencyInfo( + suggestion_type=SuggestionType.USER_SUGGESTION, + pdap_agency_id=suggestion.agency_id, + agency_name=suggestion.agency.name, + state=suggestion.agency.state, + county=suggestion.agency.county, + locality=suggestion.agency.locality + ) + + + @staticmethod + def confirmed_agencies_to_final_review_annotation_agency_info( + confirmed_agencies: list[ConfirmedURLAgency] + ) -> list[GetNextURLForAgencyAgencyInfo]: + results = [] + for confirmed_agency in confirmed_agencies: + agency = confirmed_agency.agency + agency_info = GetNextURLForAgencyAgencyInfo( + suggestion_type=SuggestionType.CONFIRMED, + pdap_agency_id=agency.agency_id, + agency_name=agency.name, + state=agency.state, + county=agency.county, + locality=agency.locality + ) + results.append(agency_info) + return results + + + @staticmethod + def final_review_annotation_agency_info( + automated_agency_suggestions: list[AutomatedUrlAgencySuggestion], + confirmed_agencies: list[ConfirmedURLAgency], + user_agency_suggestion: UserUrlAgencySuggestion + ): + + confirmed_agency_info = DTOConverter.confirmed_agencies_to_final_review_annotation_agency_info( + confirmed_agencies + ) + + agency_auto_info = DTOConverter.final_review_annotation_agency_auto_info( + automated_agency_suggestions + ) + + agency_user_info = DTOConverter.user_url_agency_suggestion_to_final_review_annotation_agency_user_info( + user_agency_suggestion + ) + + return FinalReviewAnnotationAgencyInfo( + confirmed=confirmed_agency_info, + user=agency_user_info, + auto=agency_auto_info + ) + + + @staticmethod + def url_list_to_url_with_html_list(url_list: list[URL]) -> list[URLWithHTML]: + return [DTOConverter.url_to_url_with_html(url) for url in url_list] + + @staticmethod + def url_to_url_with_html(url: URL) -> URLWithHTML: + url_val = url.url + url_id = url.id + html_infos = [] + for html_info in url.html_content: + html_infos.append( + URLHTMLContentInfo( + **html_info.__dict__ + ) + ) + + return URLWithHTML( + url=url_val, + url_id=url_id, + html_infos=html_infos + ) + + @staticmethod + def html_content_list_to_html_response_info(html_content_list: list[URLHTMLContent]): + response_html_info = ResponseHTMLInfo() + + for html_content in html_content_list: + content_type = HTMLContentType(html_content.content_type) + content = html_content.content + + setattr( + response_html_info, + ENUM_TO_ATTRIBUTE_MAPPING[content_type], + content + ) + + + return response_html_info + + diff --git a/collector_db/DTOs/GetTaskStatusResponseInfo.py b/collector_db/DTOs/GetTaskStatusResponseInfo.py new file mode 100644 index 00000000..f6a8d5fc --- /dev/null +++ b/collector_db/DTOs/GetTaskStatusResponseInfo.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + +from collector_db.enums import TaskType + + +class GetTaskStatusResponseInfo(BaseModel): + status: TaskType \ No newline at end of file diff --git a/collector_db/DTOs/InsertURLsInfo.py b/collector_db/DTOs/InsertURLsInfo.py index 079510d2..da2ee39a 100644 --- a/collector_db/DTOs/InsertURLsInfo.py +++ b/collector_db/DTOs/InsertURLsInfo.py @@ -5,6 +5,7 @@ class InsertURLsInfo(BaseModel): url_mappings: list[URLMapping] + url_ids: list[int] total_count: int = 0 original_count: int = 0 duplicate_count: int = 0 diff --git a/collector_db/DTOs/RelevanceLabelStudioInputCycleInfo.py b/collector_db/DTOs/RelevanceLabelStudioInputCycleInfo.py deleted file mode 100644 index 644e0e27..00000000 --- a/collector_db/DTOs/RelevanceLabelStudioInputCycleInfo.py +++ /dev/null @@ -1,9 +0,0 @@ -from pydantic import BaseModel - -from collector_db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo - - -class RelevanceLabelStudioInputCycleInfo(BaseModel): - url: str - metadata_id: int - html_content_info: list[URLHTMLContentInfo] \ No newline at end of file diff --git a/collector_db/DTOs/TaskInfo.py b/collector_db/DTOs/TaskInfo.py new file mode 100644 index 00000000..e8d8090d --- /dev/null +++ b/collector_db/DTOs/TaskInfo.py @@ -0,0 +1,18 @@ +import datetime +from typing import Optional + +from pydantic import BaseModel + +from collector_db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from collector_db.DTOs.URLInfo import URLInfo +from collector_db.enums import TaskType +from core.enums import BatchStatus + + +class TaskInfo(BaseModel): + task_type: TaskType + task_status: BatchStatus + updated_at: datetime.datetime + error_info: Optional[str] = None + urls: list[URLInfo] + url_errors: list[URLErrorPydanticInfo] \ No newline at end of file diff --git a/collector_db/DTOs/URLAnnotationInfo.py b/collector_db/DTOs/URLAnnotationInfo.py index 54792dfc..844b226d 100644 --- a/collector_db/DTOs/URLAnnotationInfo.py +++ b/collector_db/DTOs/URLAnnotationInfo.py @@ -6,4 +6,5 @@ class URLAnnotationInfo(BaseModel): metadata_id: int url: str - html_infos: list[URLHTMLContentInfo] \ No newline at end of file + html_infos: list[URLHTMLContentInfo] + suggested_value: str \ No newline at end of file diff --git a/collector_db/DTOs/URLErrorInfos.py b/collector_db/DTOs/URLErrorInfos.py index cf73a6dc..46f5b9fa 100644 --- a/collector_db/DTOs/URLErrorInfos.py +++ b/collector_db/DTOs/URLErrorInfos.py @@ -5,6 +5,7 @@ class URLErrorPydanticInfo(BaseModel): + task_id: int url_id: int error: str updated_at: Optional[datetime.datetime] = None \ No newline at end of file diff --git a/collector_db/DTOs/URLHTMLContentInfo.py b/collector_db/DTOs/URLHTMLContentInfo.py index ffd82724..f8b24eb0 100644 --- a/collector_db/DTOs/URLHTMLContentInfo.py +++ b/collector_db/DTOs/URLHTMLContentInfo.py @@ -18,4 +18,4 @@ class HTMLContentType(Enum): class URLHTMLContentInfo(BaseModel): url_id: Optional[int] = None content_type: HTMLContentType - content: str \ No newline at end of file + content: str | list[str] \ No newline at end of file diff --git a/collector_db/DTOs/URLInfo.py b/collector_db/DTOs/URLInfo.py index afe6c2f2..5a1d2221 100644 --- a/collector_db/DTOs/URLInfo.py +++ b/collector_db/DTOs/URLInfo.py @@ -13,3 +13,5 @@ class URLInfo(BaseModel): collector_metadata: Optional[dict] = None outcome: URLStatus = URLStatus.PENDING updated_at: Optional[datetime.datetime] = None + created_at: Optional[datetime.datetime] = None + name: Optional[str] = None diff --git a/collector_db/DTOs/URLMetadataInfo.py b/collector_db/DTOs/URLMetadataInfo.py index 9cbc7dca..461d16e9 100644 --- a/collector_db/DTOs/URLMetadataInfo.py +++ b/collector_db/DTOs/URLMetadataInfo.py @@ -12,6 +12,7 @@ class URLMetadataInfo(BaseModel): attribute: Optional[URLMetadataAttributeType] = None # TODO: May need to add validation here depending on the type of attribute value: Optional[str] = None + notes: Optional[str] = None validation_status: Optional[ValidationStatus] = None validation_source: Optional[ValidationSource] = None created_at: Optional[datetime] = None diff --git a/collector_db/DatabaseClient.py b/collector_db/DatabaseClient.py index 2a659f3f..8bd8105f 100644 --- a/collector_db/DatabaseClient.py +++ b/collector_db/DatabaseClient.py @@ -1,31 +1,34 @@ -from datetime import datetime, timedelta from functools import wraps from typing import Optional, List -from sqlalchemy import create_engine, Row +from sqlalchemy import create_engine, update from sqlalchemy.exc import IntegrityError -from sqlalchemy.orm import sessionmaker, scoped_session, aliased +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import sessionmaker, scoped_session, Session from collector_db.ConfigManager import ConfigManager from collector_db.DTOs.BatchInfo import BatchInfo -from collector_db.DTOs.DuplicateInfo import DuplicateInfo, DuplicateInsertInfo +from collector_db.DTOs.DuplicateInfo import DuplicateInsertInfo from collector_db.DTOs.InsertURLsInfo import InsertURLsInfo -from collector_db.DTOs.LogInfo import LogInfo, LogOutputInfo +from collector_db.DTOs.LogInfo import LogInfo from collector_db.DTOs.URLInfo import URLInfo from collector_db.DTOs.URLMapping import URLMapping -from collector_db.helper_functions import get_postgres_connection_string -from collector_db.models import Base, Batch, URL, Log, Duplicate -from collector_manager.enums import CollectorType +from collector_db.models import Base, Batch, URL, Log, Duplicate, URLDataSource +from collector_manager.enums import CollectorType, URLStatus +from core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO +from core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO +from core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmittedURLInfo +from core.EnvVarManager import EnvVarManager from core.enums import BatchStatus -# SQLAlchemy ORM models - - # Database Client class DatabaseClient: - def __init__(self, db_url: str = get_postgres_connection_string()): + def __init__(self, db_url: Optional[str] = None): """Initialize the DatabaseClient.""" + if db_url is None: + db_url = EnvVarManager.get().get_postgres_connection_string(is_async=True) + self.engine = create_engine( url=db_url, echo=ConfigManager.get_sqlalchemy_echo(), @@ -52,10 +55,6 @@ def wrapper(self, *args, **kwargs): return wrapper - def row_to_dict(self, row: Row) -> dict: - return dict(row._mapping) - - @session_manager def insert_batch(self, session, batch_info: BatchInfo) -> int: """Insert a new batch into the database and return its ID.""" @@ -74,58 +73,19 @@ def insert_batch(self, session, batch_info: BatchInfo) -> int: record_type_match_rate=batch_info.record_type_match_rate, record_category_match_rate=batch_info.record_category_match_rate, ) + if batch_info.date_generated is not None: + batch.date_generated = batch_info.date_generated session.add(batch) session.commit() session.refresh(batch) return batch.id - @session_manager - def update_batch_post_collection( - self, - session, - batch_id: int, - total_url_count: int, - original_url_count: int, - duplicate_url_count: int, - batch_status: BatchStatus, - compute_time: float = None, - ): - batch = session.query(Batch).filter_by(id=batch_id).first() - batch.total_url_count = total_url_count - batch.original_url_count = original_url_count - batch.duplicate_url_count = duplicate_url_count - batch.status = batch_status.value - batch.compute_time = compute_time - @session_manager def get_batch_by_id(self, session, batch_id: int) -> Optional[BatchInfo]: """Retrieve a batch by ID.""" batch = session.query(Batch).filter_by(id=batch_id).first() return BatchInfo(**batch.__dict__) - def insert_urls(self, url_infos: List[URLInfo], batch_id: int) -> InsertURLsInfo: - url_mappings = [] - duplicates = [] - for url_info in url_infos: - url_info.batch_id = batch_id - try: - url_id = self.insert_url(url_info) - url_mappings.append(URLMapping(url_id=url_id, url=url_info.url)) - except IntegrityError: - orig_url_info = self.get_url_info_by_url(url_info.url) - duplicate_info = DuplicateInsertInfo( - duplicate_batch_id=batch_id, - original_url_id=orig_url_info.id - ) - duplicates.append(duplicate_info) - self.insert_duplicates(duplicates) - - return InsertURLsInfo( - url_mappings=url_mappings, - total_count=len(url_infos), - original_count=len(url_mappings), - duplicate_count=len(duplicates), - ) @session_manager def insert_duplicates(self, session, duplicate_infos: list[DuplicateInsertInfo]): @@ -137,7 +97,6 @@ def insert_duplicates(self, session, duplicate_infos: list[DuplicateInsertInfo]) session.add(duplicate) - @session_manager def get_url_info_by_url(self, session, url: str) -> Optional[URLInfo]: url = session.query(URL).filter_by(url=url).first() @@ -150,13 +109,40 @@ def insert_url(self, session, url_info: URLInfo) -> int: batch_id=url_info.batch_id, url=url_info.url, collector_metadata=url_info.collector_metadata, - outcome=url_info.outcome.value + outcome=url_info.outcome.value, + name=url_info.name ) + if url_info.created_at is not None: + url_entry.created_at = url_info.created_at session.add(url_entry) session.commit() session.refresh(url_entry) return url_entry.id + def insert_urls(self, url_infos: List[URLInfo], batch_id: int) -> InsertURLsInfo: + url_mappings = [] + duplicates = [] + for url_info in url_infos: + url_info.batch_id = batch_id + try: + url_id = self.insert_url(url_info) + url_mappings.append(URLMapping(url_id=url_id, url=url_info.url)) + except IntegrityError: + orig_url_info = self.get_url_info_by_url(url_info.url) + duplicate_info = DuplicateInsertInfo( + duplicate_batch_id=batch_id, + original_url_id=orig_url_info.id + ) + duplicates.append(duplicate_info) + self.insert_duplicates(duplicates) + + return InsertURLsInfo( + url_mappings=url_mappings, + total_count=len(url_infos), + original_count=len(url_mappings), + duplicate_count=len(duplicates), + url_ids=[url_mapping.url_id for url_mapping in url_mappings] + ) @session_manager def get_urls_by_batch(self, session, batch_id: int, page: int = 1) -> List[URLInfo]: @@ -165,11 +151,6 @@ def get_urls_by_batch(self, session, batch_id: int, page: int = 1) -> List[URLIn .order_by(URL.id).limit(100).offset((page - 1) * 100).all()) return ([URLInfo(**url.__dict__) for url in urls]) - @session_manager - def is_duplicate_url(self, session, url: str) -> bool: - result = session.query(URL).filter_by(url=url).first() - return result is not None - @session_manager def insert_logs(self, session, log_infos: List[LogInfo]): for log_info in log_infos: @@ -178,106 +159,43 @@ def insert_logs(self, session, log_infos: List[LogInfo]): log.created_at = log_info.created_at session.add(log) - @session_manager - def get_logs_by_batch_id(self, session, batch_id: int) -> List[LogOutputInfo]: - logs = session.query(Log).filter_by(batch_id=batch_id).order_by(Log.created_at.asc()).all() - return ([LogOutputInfo(**log.__dict__) for log in logs]) - - @session_manager - def get_all_logs(self, session) -> List[LogInfo]: - logs = session.query(Log).all() - return ([LogInfo(**log.__dict__) for log in logs]) - - @session_manager - def add_duplicate_info(self, session, duplicate_infos: list[DuplicateInfo]): - # TODO: Add test for this method when testing CollectorDatabaseProcessor - for duplicate_info in duplicate_infos: - duplicate = Duplicate( - batch_id=duplicate_info.original_batch_id, - original_url_id=duplicate_info.original_url_id, - ) - session.add(duplicate) - @session_manager def get_batch_status(self, session, batch_id: int) -> BatchStatus: batch = session.query(Batch).filter_by(id=batch_id).first() return BatchStatus(batch.status) @session_manager - def get_recent_batch_status_info( - self, - session, - page: int, - collector_type: Optional[CollectorType] = None, - status: Optional[BatchStatus] = None, - ) -> List[BatchInfo]: - # Get only the batch_id, collector_type, status, and created_at - limit = 100 - query = (session.query(Batch) - .order_by(Batch.date_generated.desc())) - if collector_type: - query = query.filter(Batch.strategy == collector_type.value) - if status: - query = query.filter(Batch.status == status.value) - query = (query.limit(limit) - .offset((page - 1) * limit)) - batches = query.all() - return [BatchInfo(**batch.__dict__) for batch in batches] + def update_url(self, session, url_info: URLInfo): + url = session.query(URL).filter_by(id=url_info.id).first() + url.collector_metadata = url_info.collector_metadata @session_manager - def get_duplicates_by_batch_id(self, session, batch_id: int, page: int) -> List[DuplicateInfo]: - original_batch = aliased(Batch) - duplicate_batch = aliased(Batch) - - query = ( - session.query( - URL.url.label("source_url"), - URL.id.label("original_url_id"), - duplicate_batch.id.label("duplicate_batch_id"), - duplicate_batch.parameters.label("duplicate_batch_parameters"), - original_batch.id.label("original_batch_id"), - original_batch.parameters.label("original_batch_parameters"), - ) - .select_from(Duplicate) - .join(URL, Duplicate.original_url_id == URL.id) - .join(duplicate_batch, Duplicate.batch_id == duplicate_batch.id) - .join(original_batch, URL.batch_id == original_batch.id) - .filter(duplicate_batch.id == batch_id) - .limit(100) - .offset((page - 1) * 100) - ) - results = query.all() - final_results = [] - for result in results: - final_results.append( - DuplicateInfo( - source_url=result.source_url, - duplicate_batch_id=result.duplicate_batch_id, - duplicate_metadata=result.duplicate_batch_parameters, - original_batch_id=result.original_batch_id, - original_metadata=result.original_batch_parameters, - original_url_id=result.original_url_id + def mark_urls_as_submitted( + self, + session: Session, + infos: list[SubmittedURLInfo] + ): + for info in infos: + url_id = info.url_id + data_source_id = info.data_source_id + + query = ( + update(URL) + .where(URL.id == url_id) + .values( + outcome=URLStatus.SUBMITTED.value ) ) - return final_results - @session_manager - def delete_all_logs(self, session): - session.query(Log).delete() - - @session_manager - def delete_old_logs(self, session): - """ - Delete logs older than a day - """ - session.query(Log).filter( - Log.created_at < datetime.now() - timedelta(days=1) - ).delete() + url_data_source_object = URLDataSource( + url_id=url_id, + data_source_id=data_source_id + ) + if info.submitted_at is not None: + url_data_source_object.created_at = info.submitted_at + session.add(url_data_source_object) - @session_manager - def update_url(self, session, url_info: URLInfo): - url = session.query(URL).filter_by(id=url_info.id).first() - url.collector_metadata = url_info.collector_metadata + session.execute(query) if __name__ == "__main__": client = DatabaseClient() diff --git a/collector_db/StatementComposer.py b/collector_db/StatementComposer.py new file mode 100644 index 00000000..2ea33c5f --- /dev/null +++ b/collector_db/StatementComposer.py @@ -0,0 +1,126 @@ +from typing import Any, Optional + +from sqlalchemy import Select, select, exists, Table, func, Subquery, and_, not_, ColumnElement, case, literal, CTE +from sqlalchemy.orm import aliased + +from collector_db.enums import URLMetadataAttributeType, ValidationStatus, TaskType +from collector_db.models import URL, URLHTMLContent, AutomatedUrlAgencySuggestion, URLOptionalDataSourceMetadata, Batch, \ + ConfirmedURLAgency, LinkTaskURL, Task, UserUrlAgencySuggestion, UserRecordTypeSuggestion, UserRelevantSuggestion, \ + AutoRecordTypeSuggestion, AutoRelevantSuggestion, ReviewingUserURL +from collector_manager.enums import URLStatus, CollectorType +from core.enums import BatchStatus + + +class StatementComposer: + """ + Assists in the composition of SQLAlchemy statements + """ + + @staticmethod + def pending_urls_without_html_data() -> Select: + exclude_subquery = (select(1). + select_from(LinkTaskURL). + join(Task, LinkTaskURL.task_id == Task.id). + where(LinkTaskURL.url_id == URL.id). + where(Task.task_type == TaskType.HTML.value). + where(Task.task_status == BatchStatus.READY_TO_LABEL.value) + ) + query = ( + select(URL). + outerjoin(URLHTMLContent). + where(URLHTMLContent.id == None). + where(~exists(exclude_subquery)). + where(URL.outcome == URLStatus.PENDING.value) + ) + + + return query + + + @staticmethod + def exclude_urls_with_extant_model( + statement: Select, + model: Any + ): + return (statement.where( + ~exists( + select(model.id). + where( + model.url_id == URL.id + ) + ) + )) + + + + + @staticmethod + def simple_count_subquery(model, attribute: str, label: str) -> Subquery: + attr_value = getattr(model, attribute) + return select( + attr_value, + func.count(attr_value).label(label) + ).group_by(attr_value).subquery() + + @staticmethod + def exclude_urls_with_agency_suggestions( + statement: Select + ): + # Aliases for clarity + AutomatedSuggestion = aliased(AutomatedUrlAgencySuggestion) + + # Exclude if automated suggestions exist + statement = statement.where( + ~exists().where(AutomatedSuggestion.url_id == URL.id) + ) + # Exclude if confirmed agencies exist + statement = statement.where( + ~exists().where(ConfirmedURLAgency.url_id == URL.id) + ) + return statement + + + @staticmethod + def pending_urls_missing_miscellaneous_metadata_query() -> Select: + query = select(URL).where( + and_( + URL.outcome == URLStatus.PENDING.value, + URL.name == None, + URL.description == None, + URLOptionalDataSourceMetadata.url_id == None + ) + ).outerjoin( + URLOptionalDataSourceMetadata + ).join( + Batch + ) + + return query + + + @staticmethod + def user_suggestion_not_exists( + model_to_exclude: UserUrlAgencySuggestion or + UserRecordTypeSuggestion or + UserRelevantSuggestion + ) -> ColumnElement[bool]: + # + + subquery = not_( + exists( + select(model_to_exclude) + .where( + model_to_exclude.url_id == URL.id, + ) + ) + ) + + return subquery + + @staticmethod + def count_distinct(field, label): + return func.count(func.distinct(field)).label(label) + + @staticmethod + def sum_distinct(field, label): + return func.sum(func.distinct(field)).label(label) diff --git a/collector_db/constants.py b/collector_db/constants.py new file mode 100644 index 00000000..294c8fd9 --- /dev/null +++ b/collector_db/constants.py @@ -0,0 +1,3 @@ + + +PLACEHOLDER_AGENCY_NAME = "PLACEHOLDER_AGENCY_NAME" \ No newline at end of file diff --git a/collector_db/enums.py b/collector_db/enums.py index fa66aac4..d6b3ec0f 100644 --- a/collector_db/enums.py +++ b/collector_db/enums.py @@ -17,7 +17,6 @@ class ValidationStatus(PyEnum): class ValidationSource(PyEnum): MACHINE_LEARNING = "Machine Learning" - LABEL_STUDIO = "Label Studio" MANUAL = "Manual" @@ -32,6 +31,15 @@ class URLHTMLContentType(PyEnum): H6 = "H6" DIV = "Div" +class TaskType(PyEnum): + HTML = "HTML" + RELEVANCY = "Relevancy" + RECORD_TYPE = "Record Type" + AGENCY_IDENTIFICATION = "Agency Identification" + MISC_METADATA = "Misc Metadata" + SUBMIT_APPROVED = "Submit Approved URLs" + DUPLICATE_DETECTION = "Duplicate Detection" + IDLE = "Idle" class PGEnum(TypeDecorator): impl = postgresql.ENUM diff --git a/collector_db/helper_functions.py b/collector_db/helper_functions.py index dcb161b9..4f99556a 100644 --- a/collector_db/helper_functions.py +++ b/collector_db/helper_functions.py @@ -2,15 +2,8 @@ import dotenv +from core.EnvVarManager import EnvVarManager + def get_postgres_connection_string(is_async = False): - dotenv.load_dotenv() - username = os.getenv("POSTGRES_USER") - password = os.getenv("POSTGRES_PASSWORD") - host = os.getenv("POSTGRES_HOST") - port = os.getenv("POSTGRES_PORT") - database = os.getenv("POSTGRES_DB") - driver = "postgresql" - if is_async: - driver += "+asyncpg" - return f"{driver}://{username}:{password}@{host}:{port}/{database}" \ No newline at end of file + return EnvVarManager.get().get_postgres_connection_string(is_async) diff --git a/collector_db/models.py b/collector_db/models.py index 273d956f..b2a86e9c 100644 --- a/collector_db/models.py +++ b/collector_db/models.py @@ -1,12 +1,13 @@ """ SQLAlchemy ORM models """ -from sqlalchemy import func, Column, Integer, String, TIMESTAMP, Float, JSON, ForeignKey, Text, UniqueConstraint +from sqlalchemy import func, Column, Integer, String, TIMESTAMP, Float, JSON, ForeignKey, Text, UniqueConstraint, \ + Boolean, DateTime, ARRAY from sqlalchemy.dialects import postgresql from sqlalchemy.orm import declarative_base, relationship -from collector_db.enums import PGEnum -from core.enums import BatchStatus +from collector_db.enums import PGEnum, TaskType +from core.enums import BatchStatus, RecordType from util.helper_functions import get_enum_values # Base class for SQLAlchemy ORM models @@ -16,6 +17,16 @@ CURRENT_TIME_SERVER_DEFAULT = func.now() +batch_status_enum = PGEnum('ready to label', 'error', 'in-process', 'aborted', name='batch_status') + +record_type_values = get_enum_values(RecordType) + +def get_created_at_column(): + return Column(TIMESTAMP, nullable=False, server_default=CURRENT_TIME_SERVER_DEFAULT) + +def get_updated_at_column(): + return Column(TIMESTAMP, nullable=False, server_default=CURRENT_TIME_SERVER_DEFAULT, onupdate=CURRENT_TIME_SERVER_DEFAULT) + class Batch(Base): __tablename__ = 'batches' @@ -23,15 +34,20 @@ class Batch(Base): id = Column(Integer, primary_key=True) strategy = Column( postgresql.ENUM( - 'example', 'ckan', 'muckrock_county_search', 'auto_googler', 'muckrock_all_search', 'muckrock_simple_search', 'common_crawler', + 'example', + 'ckan', + 'muckrock_county_search', + 'auto_googler', + 'muckrock_all_search', + 'muckrock_simple_search', + 'common_crawler', + 'manual', name='batch_strategy'), nullable=False) user_id = Column(Integer, nullable=False) # Gives the status of the batch status = Column( - postgresql.ENUM( - 'complete', 'error', 'in-process', 'aborted', - name='batch_status'), + batch_status_enum, nullable=False ) # The number of URLs in the batch @@ -70,74 +86,107 @@ class URL(Base): # The batch this URL is associated with batch_id = Column(Integer, ForeignKey('batches.id', name='fk_url_batch_id'), nullable=False) url = Column(Text, unique=True) + name = Column(String) + description = Column(Text) # The metadata from the collector collector_metadata = Column(JSON) # The outcome of the URL: submitted, human_labeling, rejected, duplicate, etc. outcome = Column( - postgresql.ENUM('pending', 'submitted', 'human_labeling', 'rejected', 'duplicate', 'error', name='url_status'), + postgresql.ENUM( + 'pending', + 'submitted', + 'validated', + 'rejected', + 'duplicate', + 'error', + name='url_status' + ), nullable=False ) - created_at = Column(TIMESTAMP, nullable=False, server_default=CURRENT_TIME_SERVER_DEFAULT) - updated_at = Column(TIMESTAMP, nullable=False, server_default=CURRENT_TIME_SERVER_DEFAULT) + record_type = Column(postgresql.ENUM(*record_type_values, name='record_type'), nullable=True) + created_at = get_created_at_column() + updated_at = get_updated_at_column() # Relationships batch = relationship("Batch", back_populates="urls") duplicates = relationship("Duplicate", back_populates="original_url") - url_metadata = relationship("URLMetadata", back_populates="url", cascade="all, delete-orphan") html_content = relationship("URLHTMLContent", back_populates="url", cascade="all, delete-orphan") error_info = relationship("URLErrorInfo", back_populates="url", cascade="all, delete-orphan") - - -# URL Metadata table definition -class URLMetadata(Base): - __tablename__ = 'url_metadata' - __table_args__ = (UniqueConstraint( - "url_id", - "attribute", - name="model_num2_key"), + tasks = relationship( + "Task", + secondary="link_task_urls", + back_populates="urls", + ) + automated_agency_suggestions = relationship( + "AutomatedUrlAgencySuggestion", back_populates="url") + user_agency_suggestion = relationship( + "UserUrlAgencySuggestion", uselist=False, back_populates="url") + auto_record_type_suggestion = relationship( + "AutoRecordTypeSuggestion", uselist=False, back_populates="url") + user_record_type_suggestion = relationship( + "UserRecordTypeSuggestion", uselist=False, back_populates="url") + auto_relevant_suggestion = relationship( + "AutoRelevantSuggestion", uselist=False, back_populates="url") + user_relevant_suggestion = relationship( + "UserRelevantSuggestion", uselist=False, back_populates="url") + reviewing_user = relationship( + "ReviewingUserURL", uselist=False, back_populates="url") + optional_data_source_metadata = relationship( + "URLOptionalDataSourceMetadata", uselist=False, back_populates="url") + confirmed_agencies = relationship( + "ConfirmedURLAgency", + ) + data_source = relationship( + "URLDataSource", + back_populates="url", + uselist=False ) + checked_for_duplicate = relationship( + "URLCheckedForDuplicate", + uselist=False, + back_populates="url" + ) + +class URLCheckedForDuplicate(Base): + __tablename__ = 'url_checked_for_duplicate' id = Column(Integer, primary_key=True) url_id = Column(Integer, ForeignKey('urls.id'), nullable=False) - attribute = Column( - PGEnum('Record Type', 'Agency', 'Relevant', name='url_attribute'), - nullable=False) - value = Column(Text, nullable=False) - validation_status = Column( - PGEnum('Pending Validation', 'Validated', name='metadata_validation_status'), - nullable=False) - validation_source = Column( - PGEnum('Machine Learning', 'Label Studio', 'Manual', name='validation_source'), - nullable=False - ) + created_at = get_created_at_column() + + # Relationships + url = relationship("URL", uselist=False, back_populates="checked_for_duplicate") + +class URLOptionalDataSourceMetadata(Base): + __tablename__ = 'url_optional_data_source_metadata' - # Timestamps - created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) - updated_at = Column(TIMESTAMP, nullable=False, server_default=func.now(), onupdate=func.now()) + id = Column(Integer, primary_key=True) + url_id = Column(Integer, ForeignKey('urls.id'), nullable=False) + record_formats = Column(ARRAY(String), nullable=True) + data_portal_type = Column(String, nullable=True) + supplying_entity = Column(String, nullable=True) # Relationships - url = relationship("URL", back_populates="url_metadata") - annotations = relationship("MetadataAnnotation", back_populates="url_metadata") + url = relationship("URL", uselist=False, back_populates="optional_data_source_metadata") -class MetadataAnnotation(Base): - __tablename__ = 'metadata_annotations' - __table_args__ = (UniqueConstraint( - "user_id", - "metadata_id", - name="metadata_annotations_uq_user_id_metadata_id"), +class ReviewingUserURL(Base): + __tablename__ = 'reviewing_user_url' + __table_args__ = ( + UniqueConstraint( + "url_id", + name="approving_user_url_uq_user_id_url_id"), ) id = Column(Integer, primary_key=True) user_id = Column(Integer, nullable=False) - metadata_id = Column(Integer, ForeignKey('url_metadata.id'), nullable=False) - value = Column(Text, nullable=False) - created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) + url_id = Column(Integer, ForeignKey('urls.id'), nullable=False) + created_at = get_created_at_column() # Relationships - url_metadata = relationship("URLMetadata", back_populates="annotations") + url = relationship("URL", uselist=False, back_populates="reviewing_user") class RootURL(Base): - __tablename__ = 'root_urls' + __tablename__ = 'root_url_cache' __table_args__ = ( UniqueConstraint( "url", @@ -148,19 +197,26 @@ class RootURL(Base): url = Column(String, nullable=False) page_title = Column(String, nullable=False) page_description = Column(String, nullable=True) - updated_at = Column(TIMESTAMP, nullable=False, server_default=func.now(), onupdate=func.now()) + updated_at = get_updated_at_column() class URLErrorInfo(Base): __tablename__ = 'url_error_info' + __table_args__ = (UniqueConstraint( + "url_id", + "task_id", + name="uq_url_id_error"), + ) id = Column(Integer, primary_key=True) url_id = Column(Integer, ForeignKey('urls.id'), nullable=False) error = Column(Text, nullable=False) - updated_at = Column(TIMESTAMP, nullable=False, server_default=CURRENT_TIME_SERVER_DEFAULT) + updated_at = get_updated_at_column() + task_id = Column(Integer, ForeignKey('tasks.id'), nullable=False) # Relationships url = relationship("URL", back_populates="error_info") + task = relationship("Task", back_populates="errored_urls") class URLHTMLContent(Base): __tablename__ = 'url_html_content' @@ -170,14 +226,14 @@ class URLHTMLContent(Base): name="uq_url_id_content_type"), ) - id = Column(Integer, primary_key=True) + id = Column(Integer, primary_key=True, autoincrement=True) url_id = Column(Integer, ForeignKey('urls.id'), nullable=False) content_type = Column( PGEnum('Title', 'Description', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'Div', name='url_html_content_type'), nullable=False) content = Column(Text, nullable=False) - updated_at = Column(TIMESTAMP, nullable=False, server_default=func.now(), onupdate=func.now()) + updated_at = get_updated_at_column() # Relationships url = relationship("URL", back_populates="html_content") @@ -214,7 +270,7 @@ class Log(Base): id = Column(Integer, primary_key=True) batch_id = Column(Integer, ForeignKey('batches.id'), nullable=False) log = Column(Text, nullable=False) - created_at = Column(TIMESTAMP, nullable=False, server_default=CURRENT_TIME_SERVER_DEFAULT) + created_at = get_created_at_column() # Relationships batch = relationship("Batch", back_populates="logs") @@ -227,7 +283,213 @@ class Missing(Base): record_type = Column(String, nullable=False) batch_id = Column(Integer, ForeignKey('batches.id')) strategy_used = Column(Text, nullable=False) - date_searched = Column(TIMESTAMP, nullable=False, server_default=CURRENT_TIME_SERVER_DEFAULT) + date_searched = get_created_at_column() # Relationships batch = relationship("Batch", back_populates="missings") + +class Task(Base): + __tablename__ = 'tasks' + + id = Column(Integer, primary_key=True) + task_type = Column( + PGEnum( + *[task_type.value for task_type in TaskType], + name='task_type' + ), nullable=False) + task_status = Column(batch_status_enum, nullable=False) + updated_at = get_updated_at_column() + + # Relationships + urls = relationship( + "URL", + secondary="link_task_urls", + back_populates="tasks" + ) + error = relationship("TaskError", back_populates="task") + errored_urls = relationship("URLErrorInfo", back_populates="task") + +class LinkTaskURL(Base): + __tablename__ = 'link_task_urls' + __table_args__ = (UniqueConstraint( + "task_id", + "url_id", + name="uq_task_id_url_id"), + ) + + task_id = Column(Integer, ForeignKey('tasks.id', ondelete="CASCADE"), primary_key=True) + url_id = Column(Integer, ForeignKey('urls.id', ondelete="CASCADE"), primary_key=True) + + + +class TaskError(Base): + __tablename__ = 'task_errors' + + id = Column(Integer, primary_key=True) + task_id = Column(Integer, ForeignKey('tasks.id', ondelete="CASCADE"), nullable=False) + error = Column(Text, nullable=False) + updated_at = get_updated_at_column() + + # Relationships + task = relationship("Task", back_populates="error") + + __table_args__ = (UniqueConstraint( + "task_id", + "error", + name="uq_task_id_error"), + ) + +class Agency(Base): + __tablename__ = "agencies" + + agency_id = Column(Integer, primary_key=True) + name = Column(String, nullable=False) + state = Column(String, nullable=True) + county = Column(String, nullable=True) + locality = Column(String, nullable=True) + updated_at = get_updated_at_column() + + # Relationships + automated_suggestions = relationship("AutomatedUrlAgencySuggestion", back_populates="agency") + user_suggestions = relationship("UserUrlAgencySuggestion", back_populates="agency") + confirmed_urls = relationship("ConfirmedURLAgency", back_populates="agency") + +class ConfirmedURLAgency(Base): + __tablename__ = "confirmed_url_agency" + + id = Column(Integer, primary_key=True, autoincrement=True) + url_id = Column(Integer, ForeignKey("urls.id"), nullable=False) + agency_id = Column(Integer, ForeignKey("agencies.agency_id"), nullable=False) + + url = relationship("URL", back_populates="confirmed_agencies") + agency = relationship("Agency", back_populates="confirmed_urls") + + __table_args__ = ( + UniqueConstraint("url_id", "agency_id", name="uq_confirmed_url_agency"), + ) + +class AutomatedUrlAgencySuggestion(Base): + __tablename__ = "automated_url_agency_suggestions" + + id = Column(Integer, primary_key=True, autoincrement=True) + agency_id = Column(Integer, ForeignKey("agencies.agency_id"), nullable=True) + url_id = Column(Integer, ForeignKey("urls.id"), nullable=False) + is_unknown = Column(Boolean, nullable=True) + + agency = relationship("Agency", back_populates="automated_suggestions") + url = relationship("URL", back_populates="automated_agency_suggestions") + + __table_args__ = ( + UniqueConstraint("agency_id", "url_id", name="uq_automated_url_agency_suggestions"), + ) + + +class UserUrlAgencySuggestion(Base): + __tablename__ = "user_url_agency_suggestions" + + id = Column(Integer, primary_key=True, autoincrement=True) + agency_id = Column(Integer, ForeignKey("agencies.agency_id"), nullable=True) + url_id = Column(Integer, ForeignKey("urls.id"), nullable=False) + user_id = Column(Integer, nullable=False) + is_new = Column(Boolean, nullable=True) + + agency = relationship("Agency", back_populates="user_suggestions") + url = relationship("URL", back_populates="user_agency_suggestion") + + __table_args__ = ( + UniqueConstraint("agency_id", "url_id", "user_id", name="uq_user_url_agency_suggestions"), + ) + +class AutoRelevantSuggestion(Base): + __tablename__ = "auto_relevant_suggestions" + + id = Column(Integer, primary_key=True, autoincrement=True) + url_id = Column(Integer, ForeignKey("urls.id"), nullable=False) + relevant = Column(Boolean, nullable=True) + created_at = get_created_at_column() + updated_at = get_updated_at_column() + + __table_args__ = ( + UniqueConstraint("url_id", name="auto_relevant_suggestions_uq_url_id"), + ) + + # Relationships + + url = relationship("URL", back_populates="auto_relevant_suggestion") + + +class AutoRecordTypeSuggestion(Base): + __tablename__ = "auto_record_type_suggestions" + + id = Column(Integer, primary_key=True, autoincrement=True) + url_id = Column(Integer, ForeignKey("urls.id"), nullable=False) + record_type = Column(postgresql.ENUM(*record_type_values, name='record_type'), nullable=False) + created_at = get_created_at_column() + updated_at = get_updated_at_column() + + __table_args__ = ( + UniqueConstraint("url_id", name="auto_record_type_suggestions_uq_url_id"), + ) + + # Relationships + + url = relationship("URL", back_populates="auto_record_type_suggestion") + +class UserRelevantSuggestion(Base): + __tablename__ = "user_relevant_suggestions" + + id = Column(Integer, primary_key=True, autoincrement=True) + url_id = Column(Integer, ForeignKey("urls.id"), nullable=False) + user_id = Column(Integer, nullable=False) + relevant = Column(Boolean, nullable=False) + created_at = get_created_at_column() + updated_at = get_updated_at_column() + + __table_args__ = ( + UniqueConstraint("url_id", "user_id", name="uq_user_relevant_suggestions"), + ) + + # Relationships + + url = relationship("URL", back_populates="user_relevant_suggestion") + + +class UserRecordTypeSuggestion(Base): + __tablename__ = "user_record_type_suggestions" + + id = Column(Integer, primary_key=True, autoincrement=True) + url_id = Column(Integer, ForeignKey("urls.id"), nullable=False) + user_id = Column(Integer, nullable=False) + record_type = Column(postgresql.ENUM(*record_type_values, name='record_type'), nullable=False) + created_at = get_created_at_column() + updated_at = get_updated_at_column() + + __table_args__ = ( + UniqueConstraint("url_id", "user_id", name="uq_user_record_type_suggestions"), + ) + + # Relationships + + url = relationship("URL", back_populates="user_record_type_suggestion") + +class BacklogSnapshot(Base): + __tablename__ = "backlog_snapshot" + + id = Column(Integer, primary_key=True, autoincrement=True) + count_pending_total = Column(Integer, nullable=False) + created_at = get_created_at_column() + +class URLDataSource(Base): + __tablename__ = "url_data_sources" + + id = Column(Integer, primary_key=True, autoincrement=True) + url_id = Column(Integer, ForeignKey("urls.id"), nullable=False) + data_source_id = Column(Integer, nullable=False) + created_at = get_created_at_column() + + # Relationships + url = relationship( + "URL", + back_populates="data_source", + uselist=False + ) \ No newline at end of file diff --git a/collector_manager/CollectorBase.py b/collector_manager/AsyncCollectorBase.py similarity index 54% rename from collector_manager/CollectorBase.py rename to collector_manager/AsyncCollectorBase.py index 4fcb8f58..a842a9c0 100644 --- a/collector_manager/CollectorBase.py +++ b/collector_manager/AsyncCollectorBase.py @@ -1,40 +1,38 @@ -""" -Base class for all collectors -""" import abc -import threading +import asyncio import time from abc import ABC -from typing import Optional, Type +from typing import Type, Optional from pydantic import BaseModel +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.DTOs.InsertURLsInfo import InsertURLsInfo from collector_db.DTOs.LogInfo import LogInfo -from collector_db.DatabaseClient import DatabaseClient from collector_manager.enums import CollectorType -from core.CoreLogger import CoreLogger +from core.AsyncCoreLogger import AsyncCoreLogger +from core.FunctionTrigger import FunctionTrigger from core.enums import BatchStatus from core.preprocessors.PreprocessorBase import PreprocessorBase -class CollectorAbortException(Exception): - pass - -class CollectorBase(ABC): +class AsyncCollectorBase(ABC): collector_type: CollectorType = None preprocessor: Type[PreprocessorBase] = None + def __init__( self, batch_id: int, dto: BaseModel, - logger: CoreLogger, - db_client: DatabaseClient, + logger: AsyncCoreLogger, + adb_client: AsyncDatabaseClient, raise_error: bool = False, + post_collection_function_trigger: Optional[FunctionTrigger] = None, ) -> None: + self.post_collection_function_trigger = post_collection_function_trigger self.batch_id = batch_id - self.db_client = db_client + self.adb_client = adb_client self.dto = dto self.data: Optional[BaseModel] = None self.logger = logger @@ -42,11 +40,9 @@ def __init__( self.start_time = None self.compute_time = None self.raise_error = raise_error - # # TODO: Determine how to update this in some of the other collectors - self._stop_event = threading.Event() @abc.abstractmethod - def run_implementation(self) -> None: + async def run_implementation(self) -> None: """ This is the method that will be overridden by each collector No other methods should be modified except for this one. @@ -56,17 +52,17 @@ def run_implementation(self) -> None: """ raise NotImplementedError - def start_timer(self) -> None: + async def start_timer(self) -> None: self.start_time = time.time() - def stop_timer(self) -> None: + async def stop_timer(self) -> None: self.compute_time = time.time() - self.start_time - def handle_error(self, e: Exception) -> None: + async def handle_error(self, e: Exception) -> None: if self.raise_error: raise e - self.log(f"Error: {e}") - self.db_client.update_batch_post_collection( + await self.log(f"Error: {e}") + await self.adb_client.update_batch_post_collection( batch_id=self.batch_id, batch_status=self.status, compute_time=self.compute_time, @@ -75,19 +71,19 @@ def handle_error(self, e: Exception) -> None: duplicate_url_count=0 ) - def process(self) -> None: - self.log("Processing collector...", allow_abort=False) + async def process(self) -> None: + await self.log("Processing collector...") preprocessor = self.preprocessor() url_infos = preprocessor.preprocess(self.data) - self.log(f"URLs processed: {len(url_infos)}", allow_abort=False) + await self.log(f"URLs processed: {len(url_infos)}") - self.log("Inserting URLs...", allow_abort=False) - insert_urls_info: InsertURLsInfo = self.db_client.insert_urls( + await self.log("Inserting URLs...") + insert_urls_info: InsertURLsInfo = await self.adb_client.insert_urls( url_infos=url_infos, batch_id=self.batch_id ) - self.log("Updating batch...", allow_abort=False) - self.db_client.update_batch_post_collection( + await self.log("Updating batch...") + await self.adb_client.update_batch_post_collection( batch_id=self.batch_id, total_url_count=insert_urls_info.total_count, duplicate_url_count=insert_urls_info.duplicate_count, @@ -95,21 +91,23 @@ def process(self) -> None: batch_status=self.status, compute_time=self.compute_time ) - self.log("Done processing collector.", allow_abort=False) + await self.log("Done processing collector.") + if self.post_collection_function_trigger is not None: + await self.post_collection_function_trigger.trigger_or_rerun() - def run(self) -> None: + async def run(self) -> None: try: - self.start_timer() - self.run_implementation() - self.stop_timer() - self.log("Collector completed successfully.") - self.close() - self.process() - except CollectorAbortException: - self.stop_timer() + await self.start_timer() + await self.run_implementation() + await self.stop_timer() + await self.log("Collector completed successfully.") + await self.close() + await self.process() + except asyncio.CancelledError: + await self.stop_timer() self.status = BatchStatus.ABORTED - self.db_client.update_batch_post_collection( + await self.adb_client.update_batch_post_collection( batch_id=self.batch_id, batch_status=BatchStatus.ABORTED, compute_time=self.compute_time, @@ -118,22 +116,18 @@ def run(self) -> None: duplicate_url_count=0 ) except Exception as e: - self.stop_timer() + await self.stop_timer() self.status = BatchStatus.ERROR - self.handle_error(e) + await self.handle_error(e) - def log(self, message: str, allow_abort = True) -> None: - if self._stop_event.is_set() and allow_abort: - raise CollectorAbortException - self.logger.log(LogInfo( + async def log( + self, + message: str, + ) -> None: + await self.logger.log(LogInfo( batch_id=self.batch_id, log=message )) - def abort(self) -> None: - self._stop_event.set() # Signal the thread to stop - self.log("Collector was aborted.", allow_abort=False) - - def close(self) -> None: - self._stop_event.set() - self.status = BatchStatus.COMPLETE + async def close(self) -> None: + self.status = BatchStatus.READY_TO_LABEL diff --git a/collector_manager/AsyncCollectorManager.py b/collector_manager/AsyncCollectorManager.py new file mode 100644 index 00000000..1851bfc9 --- /dev/null +++ b/collector_manager/AsyncCollectorManager.py @@ -0,0 +1,94 @@ +import asyncio +from http import HTTPStatus +from typing import Dict + +from fastapi import HTTPException +from pydantic import BaseModel + +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_manager.AsyncCollectorBase import AsyncCollectorBase +from collector_manager.CollectorManager import InvalidCollectorError +from collector_manager.collector_mapping import COLLECTOR_MAPPING +from collector_manager.enums import CollectorType +from core.AsyncCoreLogger import AsyncCoreLogger +from core.FunctionTrigger import FunctionTrigger + + +class AsyncCollectorManager: + + def __init__( + self, + logger: AsyncCoreLogger, + adb_client: AsyncDatabaseClient, + dev_mode: bool = False, + post_collection_function_trigger: FunctionTrigger = None + ): + self.collectors: Dict[int, AsyncCollectorBase] = {} + self.adb_client = adb_client + self.logger = logger + self.async_tasks: dict[int, asyncio.Task] = {} + self.dev_mode = dev_mode + self.post_collection_function_trigger = post_collection_function_trigger + + async def has_collector(self, cid: int) -> bool: + return cid in self.collectors + + async def start_async_collector( + self, + collector_type: CollectorType, + batch_id: int, + dto: BaseModel, + ) -> None: + if batch_id in self.collectors: + raise ValueError(f"Collector with batch_id {batch_id} is already running.") + try: + collector_class = COLLECTOR_MAPPING[collector_type] + collector = collector_class( + batch_id=batch_id, + dto=dto, + logger=self.logger, + adb_client=self.adb_client, + raise_error=True if self.dev_mode else False, + post_collection_function_trigger=self.post_collection_function_trigger + ) + except KeyError: + raise InvalidCollectorError(f"Collector {collector_type.value} not found.") + + self.collectors[batch_id] = collector + + task = asyncio.create_task(collector.run()) + self.async_tasks[batch_id] = task + + def try_getting_collector(self, cid): + collector = self.collectors.get(cid) + if collector is None: + raise InvalidCollectorError(f"Collector with CID {cid} not found.") + return collector + + async def abort_collector_async(self, cid: int) -> None: + task = self.async_tasks.get(cid) + if not task: + raise HTTPException(status_code=HTTPStatus.OK, detail="Task not found") + if task is not None: + task.cancel() + try: + await task # Await so cancellation propagates + except asyncio.CancelledError: + pass + + self.async_tasks.pop(cid) + + async def shutdown_all_collectors(self) -> None: + while self.async_tasks: + cid, task = self.async_tasks.popitem() + if task.done(): + try: + task.result() + except Exception as e: + raise e + else: + task.cancel() + try: + await task # Await so cancellation propagates + except asyncio.CancelledError: + pass \ No newline at end of file diff --git a/collector_manager/CollectorManager.py b/collector_manager/CollectorManager.py index 658b20a8..9fd5a428 100644 --- a/collector_manager/CollectorManager.py +++ b/collector_manager/CollectorManager.py @@ -3,115 +3,6 @@ Can start, stop, and get info on running collectors And manages the retrieval of collector info """ -import threading -from concurrent.futures import Future, ThreadPoolExecutor -from typing import Dict, List - -from pydantic import BaseModel - -from collector_db.DatabaseClient import DatabaseClient -from collector_manager.CollectorBase import CollectorBase -from collector_manager.collector_mapping import COLLECTOR_MAPPING -from collector_manager.enums import CollectorType -from core.CoreLogger import CoreLogger - class InvalidCollectorError(Exception): pass - -# Collector Manager Class -class CollectorManager: - def __init__( - self, - logger: CoreLogger, - db_client: DatabaseClient, - dev_mode: bool = False, - max_workers: int = 10 # Limit the number of concurrent threads - ): - self.collectors: Dict[int, CollectorBase] = {} - self.futures: Dict[int, Future] = {} - self.threads: Dict[int, threading.Thread] = {} - self.db_client = db_client - self.logger = logger - self.lock = threading.Lock() - self.max_workers = max_workers - self.dev_mode = dev_mode - self.executor = ThreadPoolExecutor(max_workers=self.max_workers) - - def restart_executor(self): - self.executor = ThreadPoolExecutor(max_workers=self.max_workers) - - def list_collectors(self) -> List[str]: - return [cm.value for cm in list(COLLECTOR_MAPPING.keys())] - - def start_collector( - self, - collector_type: CollectorType, - batch_id: int, - dto: BaseModel - ) -> None: - with self.lock: - # If executor is shutdown, restart it - if self.executor._shutdown: - self.restart_executor() - - if batch_id in self.collectors: - raise ValueError(f"Collector with batch_id {batch_id} is already running.") - try: - collector_class = COLLECTOR_MAPPING[collector_type] - collector = collector_class( - batch_id=batch_id, - dto=dto, - logger=self.logger, - db_client=self.db_client, - raise_error=True if self.dev_mode else False - ) - except KeyError: - raise InvalidCollectorError(f"Collector {collector_type.value} not found.") - self.collectors[batch_id] = collector - - future = self.executor.submit(collector.run) - self.futures[batch_id] = future - - # thread = threading.Thread(target=collector.run) - # self.threads[batch_id] = thread - # thread.start() - - def get_info(self, cid: str) -> str: - collector = self.collectors.get(cid) - if not collector: - return f"Collector with CID {cid} not found." - logs = "\n".join(collector.logs[-3:]) # Show the last 3 logs - return f"{cid} ({collector.name}) - {collector.status}\nLogs:\n{logs}" - - - def try_getting_collector(self, cid): - collector = self.collectors.get(cid) - if collector is None: - raise InvalidCollectorError(f"Collector with CID {cid} not found.") - return collector - - def abort_collector(self, cid: int) -> None: - collector = self.try_getting_collector(cid) - # Get collector thread - thread = self.threads.get(cid) - future = self.futures.get(cid) - collector.abort() - # thread.join(timeout=1) - self.collectors.pop(cid) - self.futures.pop(cid) - # self.threads.pop(cid) - - def shutdown_all_collectors(self) -> None: - with self.lock: - for cid, future in self.futures.items(): - if future.done(): - try: - future.result() - except Exception as e: - raise e - self.collectors[cid].abort() - - self.executor.shutdown(wait=True) - self.collectors.clear() - self.futures.clear() \ No newline at end of file diff --git a/collector_manager/ExampleCollector.py b/collector_manager/ExampleCollector.py index c5c2a69c..7bc8a583 100644 --- a/collector_manager/ExampleCollector.py +++ b/collector_manager/ExampleCollector.py @@ -3,27 +3,32 @@ Exists as a proof of concept for collector functionality """ -import time +import asyncio -from collector_manager.CollectorBase import CollectorBase +from collector_manager.AsyncCollectorBase import AsyncCollectorBase from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO from collector_manager.DTOs.ExampleOutputDTO import ExampleOutputDTO from collector_manager.enums import CollectorType from core.preprocessors.ExamplePreprocessor import ExamplePreprocessor -class ExampleCollector(CollectorBase): +class ExampleCollector(AsyncCollectorBase): collector_type = CollectorType.EXAMPLE preprocessor = ExamplePreprocessor - def run_implementation(self) -> None: + async def run_implementation(self) -> None: dto: ExampleInputDTO = self.dto sleep_time = dto.sleep_time for i in range(sleep_time): # Simulate a task - self.log(f"Step {i + 1}/{sleep_time}") - time.sleep(1) # Simulate work + await self.log(f"Step {i + 1}/{sleep_time}") + await self.sleep() self.data = ExampleOutputDTO( message=f"Data collected by {self.batch_id}", urls=["https://example.com", "https://example.com/2"], parameters=self.dto.model_dump(), ) + + @staticmethod + async def sleep(): + # Simulate work + await asyncio.sleep(1) \ No newline at end of file diff --git a/collector_manager/enums.py b/collector_manager/enums.py index 3820f274..5b89ffe2 100644 --- a/collector_manager/enums.py +++ b/collector_manager/enums.py @@ -8,11 +8,12 @@ class CollectorType(Enum): MUCKROCK_COUNTY_SEARCH = "muckrock_county_search" MUCKROCK_ALL_SEARCH = "muckrock_all_search" CKAN = "ckan" + MANUAL = "manual" class URLStatus(Enum): PENDING = "pending" SUBMITTED = "submitted" - HUMAN_LABELING = "human_labeling" - REJECTED = "rejected" - DUPLICATE = "duplicate" + VALIDATED = "validated" ERROR = "error" + DUPLICATE = "duplicate" + REJECTED = "rejected" diff --git a/core/AsyncCore.py b/core/AsyncCore.py index 67f134b1..e7b7f534 100644 --- a/core/AsyncCore.py +++ b/core/AsyncCore.py @@ -1,17 +1,43 @@ -import logging +from typing import Optional + +from pydantic import BaseModel +from sqlalchemy.exc import IntegrityError from collector_db.AsyncDatabaseClient import AsyncDatabaseClient -from collector_db.DTOs.URLAnnotationInfo import URLAnnotationInfo -from core.DTOs.GetNextURLForRelevanceAnnotationResponse import GetNextURLForRelevanceAnnotationResponse +from collector_db.DTOs.BatchInfo import BatchInfo +from collector_db.DTOs.GetTaskStatusResponseInfo import GetTaskStatusResponseInfo +from collector_db.enums import TaskType +from collector_manager.AsyncCollectorManager import AsyncCollectorManager +from collector_manager.enums import CollectorType +from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo +from core.DTOs.CollectorStartInfo import CollectorStartInfo +from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo +from core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse +from core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse +from core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse +from core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO +from core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO +from core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO +from core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO +from core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO +from core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO +from core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo +from core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo +from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \ + URLAgencyAnnotationPostInfo +from core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse +from core.DTOs.GetTasksResponse import GetTasksResponse +from core.DTOs.GetURLsByBatchResponse import GetURLsByBatchResponse from core.DTOs.GetURLsResponseInfo import GetURLsResponseInfo -from core.DTOs.RelevanceAnnotationInfo import RelevanceAnnotationPostInfo -from core.DTOs.RelevanceAnnotationRequestInfo import RelevanceAnnotationRequestInfo -from core.classes.URLHTMLCycler import URLHTMLCycler -from core.classes.URLRelevanceHuggingfaceCycler import URLRelevanceHuggingfaceCycler -from html_tag_collector.DataClassTags import convert_to_response_html_info -from html_tag_collector.ResponseParser import HTMLResponseParser -from html_tag_collector.URLRequestInterface import URLRequestInterface -from hugging_face.HuggingFaceInterface import HuggingFaceInterface +from core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO +from core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO +from core.DTOs.MessageResponse import MessageResponse +from core.DTOs.SearchURLResponse import SearchURLResponse +from core.TaskManager import TaskManager +from core.classes.ErrorManager import ErrorManager +from core.enums import BatchStatus, RecordType, AnnotationType + +from security_manager.SecurityManager import AccessInfo class AsyncCore: @@ -19,71 +45,281 @@ class AsyncCore: def __init__( self, adb_client: AsyncDatabaseClient, - huggingface_interface: HuggingFaceInterface, - url_request_interface: URLRequestInterface, - html_parser: HTMLResponseParser + collector_manager: AsyncCollectorManager, + task_manager: TaskManager ): + self.task_manager = task_manager self.adb_client = adb_client - self.huggingface_interface = huggingface_interface - self.url_request_interface = url_request_interface - self.html_parser = html_parser - self.logger = logging.getLogger(__name__) - self.logger.setLevel(logging.INFO) - async def run_url_html_cycle(self): - self.logger.info("Running URL HTML Cycle") - cycler = URLHTMLCycler( - adb_client=self.adb_client, - url_request_interface=self.url_request_interface, - html_parser=self.html_parser - ) - await cycler.cycle() + self.collector_manager = collector_manager + + + async def get_urls(self, page: int, errors: bool) -> GetURLsResponseInfo: + return await self.adb_client.get_urls(page=page, errors=errors) + + async def shutdown(self): + await self.collector_manager.shutdown_all_collectors() + + #region Batch + async def get_batch_info(self, batch_id: int) -> BatchInfo: + return await self.adb_client.get_batch_by_id(batch_id) + + async def get_urls_by_batch(self, batch_id: int, page: int = 1) -> GetURLsByBatchResponse: + url_infos = await self.adb_client.get_urls_by_batch(batch_id, page) + return GetURLsByBatchResponse(urls=url_infos) + + async def abort_batch(self, batch_id: int) -> MessageResponse: + await self.collector_manager.abort_collector_async(cid=batch_id) + return MessageResponse(message=f"Batch aborted.") - async def run_url_relevance_huggingface_cycle(self): - self.logger.info("Running URL Relevance Huggingface Cycle") - cycler = URLRelevanceHuggingfaceCycler( - adb_client=self.adb_client, - huggingface_interface=self.huggingface_interface + async def get_duplicate_urls_by_batch(self, batch_id: int, page: int = 1) -> GetDuplicatesByBatchResponse: + dup_infos = await self.adb_client.get_duplicates_by_batch_id(batch_id, page=page) + return GetDuplicatesByBatchResponse(duplicates=dup_infos) + + async def get_batch_statuses( + self, + collector_type: Optional[CollectorType], + status: Optional[BatchStatus], + has_pending_urls: Optional[bool], + page: int + ) -> GetBatchStatusResponse: + results = await self.adb_client.get_recent_batch_status_info( + collector_type=collector_type, + status=status, + page=page, + has_pending_urls=has_pending_urls ) - await cycler.cycle() + return GetBatchStatusResponse(results=results) + + async def get_batch_logs(self, batch_id: int) -> GetBatchLogsResponse: + logs = await self.adb_client.get_logs_by_batch_id(batch_id) + return GetBatchLogsResponse(logs=logs) - async def run_cycles(self): - await self.run_url_html_cycle() - await self.run_url_relevance_huggingface_cycle() + #endregion - async def convert_to_relevance_annotation_request_info(self, url_info: URLAnnotationInfo) -> RelevanceAnnotationRequestInfo: - response_html_info = convert_to_response_html_info( - html_content_infos=url_info.html_infos + # region Collector + async def initiate_collector( + self, + collector_type: CollectorType, + user_id: int, + dto: Optional[BaseModel] = None, + ): + """ + Reserves a batch ID from the database + and starts the requisite collector + """ + + batch_info = BatchInfo( + strategy=collector_type.value, + status=BatchStatus.IN_PROCESS, + parameters=dto.model_dump(), + user_id=user_id ) - return RelevanceAnnotationRequestInfo( - url=url_info.url, - metadata_id=url_info.metadata_id, - html_info=response_html_info + batch_id = await self.adb_client.insert_batch(batch_info) + await self.collector_manager.start_async_collector( + collector_type=collector_type, + batch_id=batch_id, + dto=dto + ) + return CollectorStartInfo( + batch_id=batch_id, + message=f"Started {collector_type.value} collector." ) - async def get_next_url_for_relevance_annotation(self, user_id: int) -> GetNextURLForRelevanceAnnotationResponse: - response = GetNextURLForRelevanceAnnotationResponse() - ua_info: URLAnnotationInfo = await self.adb_client.get_next_url_for_relevance_annotation(user_id=user_id) - if ua_info is None: - return response - # Format result - result = await self.convert_to_relevance_annotation_request_info(url_info=ua_info) - response.next_annotation = result - return response + # endregion + async def get_current_task_status(self) -> GetTaskStatusResponseInfo: + return GetTaskStatusResponseInfo(status=self.task_manager.task_status) + + async def run_tasks(self): + await self.task_manager.trigger_task_run() + async def get_tasks( + self, + page: int, + task_type: TaskType, + task_status: BatchStatus + ) -> GetTasksResponse: + return await self.task_manager.get_tasks( + page=page, + task_type=task_type, + task_status=task_status + ) + + async def get_task_info(self, task_id): + return await self.task_manager.get_task_info(task_id) + + #region Annotations and Review async def submit_url_relevance_annotation( self, user_id: int, - metadata_id: int, - annotation: RelevanceAnnotationPostInfo - ) -> GetNextURLForRelevanceAnnotationResponse: - await self.adb_client.add_relevance_annotation( + url_id: int, + relevant: bool + ): + try: + return await self.adb_client.add_user_relevant_suggestion( + user_id=user_id, + url_id=url_id, + relevant=relevant + ) + except IntegrityError as e: + return await ErrorManager.raise_annotation_exists_error( + annotation_type=AnnotationType.RELEVANCE, + url_id=url_id + ) + + async def get_next_url_for_relevance_annotation( + self, + user_id: int, + batch_id: Optional[int] + ) -> GetNextRelevanceAnnotationResponseOuterInfo: + next_annotation = await self.adb_client.get_next_url_for_relevance_annotation( user_id=user_id, - metadata_id=metadata_id, - annotation_info=annotation) - return await self.get_next_url_for_relevance_annotation(user_id=user_id) + batch_id=batch_id + ) + return GetNextRelevanceAnnotationResponseOuterInfo( + next_annotation=next_annotation + ) - async def get_urls(self, page: int, errors: bool) -> GetURLsResponseInfo: - return await self.adb_client.get_urls(page=page, errors=errors) + async def get_next_url_for_record_type_annotation( + self, + user_id: int, + batch_id: Optional[int] + ) -> GetNextRecordTypeAnnotationResponseOuterInfo: + next_annotation = await self.adb_client.get_next_url_for_record_type_annotation( + user_id=user_id, + batch_id=batch_id + ) + return GetNextRecordTypeAnnotationResponseOuterInfo( + next_annotation=next_annotation + ) + + async def submit_url_record_type_annotation( + self, + user_id: int, + url_id: int, + record_type: RecordType, + ): + try: + return await self.adb_client.add_user_record_type_suggestion( + user_id=user_id, + url_id=url_id, + record_type=record_type + ) + except IntegrityError as e: + return await ErrorManager.raise_annotation_exists_error( + annotation_type=AnnotationType.RECORD_TYPE, + url_id=url_id + ) + + + async def get_next_url_agency_for_annotation( + self, + user_id: int, + batch_id: Optional[int] + ) -> GetNextURLForAgencyAnnotationResponse: + return await self.adb_client.get_next_url_agency_for_annotation( + user_id=user_id, + batch_id=batch_id + ) + + async def submit_url_agency_annotation( + self, + user_id: int, + url_id: int, + agency_post_info: URLAgencyAnnotationPostInfo + ) -> GetNextURLForAgencyAnnotationResponse: + if not agency_post_info.is_new and not agency_post_info.suggested_agency: + raise ValueError("suggested_agency must be provided if is_new is False") + + if agency_post_info.is_new: + agency_suggestion_id = None + else: + agency_suggestion_id = agency_post_info.suggested_agency + return await self.adb_client.add_agency_manual_suggestion( + user_id=user_id, + url_id=url_id, + agency_id=agency_suggestion_id, + is_new=agency_post_info.is_new, + ) + + async def get_next_source_for_review( + self, + batch_id: Optional[int] + ): + return await self.adb_client.get_next_url_for_final_review( + batch_id=batch_id + ) + + async def get_next_url_for_all_annotations( + self, + batch_id: Optional[int] + ) -> GetNextURLForAllAnnotationResponse: + return await self.adb_client.get_next_url_for_all_annotations( + batch_id=batch_id + ) + + async def submit_url_for_all_annotations( + self, + user_id: int, + url_id: int, + post_info: AllAnnotationPostInfo + ): + await self.adb_client.add_all_annotations_to_url( + user_id=user_id, + url_id=url_id, + post_info=post_info + ) + + async def approve_url( + self, + approval_info: FinalReviewApprovalInfo, + access_info: AccessInfo + ): + await self.adb_client.approve_url( + approval_info=approval_info, + user_id=access_info.user_id + ) + + + async def reject_url( + self, + url_id: int, + access_info: AccessInfo, + ): + await self.adb_client.reject_url( + url_id=url_id, + user_id=access_info.user_id + ) + + async def upload_manual_batch( + self, + dto: ManualBatchInputDTO, + user_id: int + ) -> ManualBatchResponseDTO: + return await self.adb_client.upload_manual_batch( + user_id=user_id, + dto=dto + ) + + async def search_for_url(self, url: str) -> SearchURLResponse: + return await self.adb_client.search_for_url(url) + + async def get_batches_aggregated_metrics(self) -> GetMetricsBatchesAggregatedResponseDTO: + return await self.adb_client.get_batches_aggregated_metrics() + + async def get_batches_breakdown_metrics(self, page: int) -> GetMetricsBatchesBreakdownResponseDTO: + return await self.adb_client.get_batches_breakdown_metrics(page=page) + + async def get_urls_breakdown_submitted_metrics(self) -> GetMetricsURLsBreakdownSubmittedResponseDTO: + return await self.adb_client.get_urls_breakdown_submitted_metrics() + + async def get_urls_aggregated_metrics(self) -> GetMetricsURLsAggregatedResponseDTO: + return await self.adb_client.get_urls_aggregated_metrics() + + async def get_urls_breakdown_pending_metrics(self) -> GetMetricsURLsBreakdownPendingResponseDTO: + return await self.adb_client.get_urls_breakdown_pending_metrics() + + async def get_backlog_metrics(self) -> GetMetricsBacklogResponseDTO: + return await self.adb_client.get_backlog_metrics() \ No newline at end of file diff --git a/core/AsyncCoreLogger.py b/core/AsyncCoreLogger.py new file mode 100644 index 00000000..70ca06aa --- /dev/null +++ b/core/AsyncCoreLogger.py @@ -0,0 +1,71 @@ +import asyncio + +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_db.DTOs.LogInfo import LogInfo + + +class AsyncCoreLogger: + def __init__( + self, + adb_client: AsyncDatabaseClient, + flush_interval: float = 10, + batch_size: int = 100 + ): + self.adb_client = adb_client + self.flush_interval = flush_interval + self.batch_size = batch_size + + self.log_queue = asyncio.Queue() + self.lock = asyncio.Lock() + self._flush_task: asyncio.Task | None = None + self._stop_event = asyncio.Event() + + async def __aenter__(self): + self._stop_event.clear() + self._flush_task = asyncio.create_task(self._flush_logs()) + return self + + async def __aexit__(self, exc_type, exc_value, traceback): + await self.shutdown() + + async def log(self, log_info: LogInfo): + await self.log_queue.put(log_info) + + async def _flush_logs(self): + while not self._stop_event.is_set(): + await asyncio.sleep(self.flush_interval) + await self.flush() + + async def flush(self): + async with self.lock: + logs: list[LogInfo] = [] + + while not self.log_queue.empty() and len(logs) < self.batch_size: + try: + log = self.log_queue.get_nowait() + logs.append(log) + except asyncio.QueueEmpty: + break + + if logs: + await self.adb_client.insert_logs(log_infos=logs) + + async def clear_log_queue(self): + while not self.log_queue.empty(): + self.log_queue.get_nowait() + + async def flush_all(self): + while not self.log_queue.empty(): + await self.flush() + + async def restart(self): + await self.flush_all() + await self.shutdown() + self._stop_event.clear() + self._flush_task = asyncio.create_task(self._flush_logs()) + + async def shutdown(self): + self._stop_event.set() + if self._flush_task: + await self._flush_task + await self.flush_all() diff --git a/core/CoreLogger.py b/core/CoreLogger.py deleted file mode 100644 index 79263c78..00000000 --- a/core/CoreLogger.py +++ /dev/null @@ -1,97 +0,0 @@ - - -import queue -import threading -import time -from concurrent.futures import Future -from concurrent.futures.thread import ThreadPoolExecutor - -from collector_db.DTOs.LogInfo import LogInfo -from collector_db.DatabaseClient import DatabaseClient - - -class CoreLogger: - def __init__( - self, - db_client: DatabaseClient, - flush_interval=10, - batch_size=100 - ): - self.db_client = db_client - self.flush_interval = flush_interval - self.batch_size = batch_size - - self.log_queue = queue.Queue() - self.lock = threading.Lock() - self.stop_event = threading.Event() - # Start the periodic flush task - self.executor = ThreadPoolExecutor(max_workers=1) - self.flush_future: Future = self.executor.submit(self._flush_logs) - - def __enter__(self): - """ - Start the logger for use in a context. - """ - return self - - def __exit__(self, exc_type, exc_value, traceback): - """ - Gracefully shut down the logger when exiting the context. - """ - self.shutdown() - - def log(self, log_info: LogInfo): - """ - Adds a log entry to the queue. - """ - self.log_queue.put(log_info) - - def _flush_logs(self): - """ - Periodically flushes logs from the queue to the database. - """ - while not self.stop_event.is_set(): - time.sleep(self.flush_interval) - self.flush() - - def flush(self): - """ - Flushes all logs from the queue to the database in batches. - """ - with self.lock: - logs: list[LogInfo] = [] - while not self.log_queue.empty() and len(logs) < self.batch_size: - try: - log = self.log_queue.get_nowait() - logs.append(log) - except queue.Empty: - break - - if logs: - try: - self.db_client.insert_logs(log_infos=logs) - except Exception as e: - # Handle logging database errors (e.g., save to fallback storage) - print(f"Error while flushing logs: {e}") - - def flush_all(self): - """ - Flushes all logs from the queue to the database. - """ - while not self.log_queue.empty(): - self.flush() - - def restart(self): - self.flush_all() - self.executor.shutdown(wait=False) - self.executor = ThreadPoolExecutor(max_workers=1) - self.flush_future = self.executor.submit(self._flush_logs) - - def shutdown(self): - """ - Stops the logger gracefully and flushes any remaining logs. - """ - self.stop_event.set() - # if self.flush_future and not self.flush_future.done(): - self.flush_future.result(timeout=10) - self.flush_all() # Flush remaining logs diff --git a/core/DTOs/AllAnnotationPostInfo.py b/core/DTOs/AllAnnotationPostInfo.py new file mode 100644 index 00000000..a462b40b --- /dev/null +++ b/core/DTOs/AllAnnotationPostInfo.py @@ -0,0 +1,35 @@ +from http import HTTPStatus +from typing import Optional + +from fastapi import HTTPException +from pydantic import BaseModel, model_validator + +from core.DTOs.GetNextURLForAgencyAnnotationResponse import URLAgencyAnnotationPostInfo +from core.enums import RecordType +from core.exceptions import FailedValidationException + + +class AllAnnotationPostInfo(BaseModel): + is_relevant: bool + record_type: Optional[RecordType] = None + agency: Optional[URLAgencyAnnotationPostInfo] = None + + @model_validator(mode="before") + def allow_record_type_and_agency_only_if_relevant(cls, values): + is_relevant = values.get("is_relevant") + record_type = values.get("record_type") + agency = values.get("agency") + + if not is_relevant: + if record_type is not None: + raise FailedValidationException("record_type must be None if is_relevant is False") + + if agency is not None: + raise FailedValidationException("agency must be None if is_relevant is False") + return values + # Similarly, if relevant, record_type and agency must be provided + if record_type is None: + raise FailedValidationException("record_type must be provided if is_relevant is True") + if agency is None: + raise FailedValidationException("agency must be provided if is_relevant is True") + return values \ No newline at end of file diff --git a/core/DTOs/RelevanceAnnotationRequestInfo.py b/core/DTOs/AnnotationRequestInfo.py similarity index 57% rename from core/DTOs/RelevanceAnnotationRequestInfo.py rename to core/DTOs/AnnotationRequestInfo.py index de4036db..1e886ae8 100644 --- a/core/DTOs/RelevanceAnnotationRequestInfo.py +++ b/core/DTOs/AnnotationRequestInfo.py @@ -3,7 +3,8 @@ from html_tag_collector.DataClassTags import ResponseHTMLInfo -class RelevanceAnnotationRequestInfo(BaseModel): +class AnnotationRequestInfo(BaseModel): url: str metadata_id: int - html_info: ResponseHTMLInfo \ No newline at end of file + html_info: ResponseHTMLInfo + suggested_value: str \ No newline at end of file diff --git a/core/DTOs/FinalReviewApprovalInfo.py b/core/DTOs/FinalReviewApprovalInfo.py new file mode 100644 index 00000000..d87fb628 --- /dev/null +++ b/core/DTOs/FinalReviewApprovalInfo.py @@ -0,0 +1,48 @@ +from typing import Optional + +from pydantic import BaseModel, Field + +from core.enums import RecordType + +class FinalReviewBaseInfo(BaseModel): + url_id: int = Field( + title="The id of the URL." + ) + +class FinalReviewApprovalInfo(FinalReviewBaseInfo): + record_type: Optional[RecordType] = Field( + title="The final record type of the URL." + "If none, defers to the existing value from the auto-labeler only if it exists.", + default=None + ) + agency_ids: Optional[list[int]] = Field( + title="The final confirmed agencies for the URL. " + "If none, defers to an existing confirmed agency only if that exists.", + default=None + ) + name: Optional[str] = Field( + title="The name of the source. " + "If none, defers to an existing name only if that exists.", + default=None + ) + description: Optional[str] = Field( + title="The description of the source. " + "If none, defers to an existing description only if that exists.", + default=None + ) + record_formats: Optional[list[str]] = Field( + title="The record formats of the source. " + "If none, defers to an existing record formats only if that exists.", + default=None + ) + data_portal_type: Optional[str] = Field( + title="The data portal type of the source. " + "If none, defers to an existing data portal type only if that exists.", + default=None + ) + supplying_entity: Optional[str] = Field( + title="The supplying entity of the source. " + "If none, defers to an existing supplying entity only if that exists.", + default=None + ) + diff --git a/core/DTOs/GetMetricsBacklogResponse.py b/core/DTOs/GetMetricsBacklogResponse.py new file mode 100644 index 00000000..8193e385 --- /dev/null +++ b/core/DTOs/GetMetricsBacklogResponse.py @@ -0,0 +1,21 @@ +from datetime import datetime + +from pydantic import BaseModel, field_validator + + +class GetMetricsBacklogResponseInnerDTO(BaseModel): + month: str + count_pending_total: int + + @field_validator("month") + @classmethod + def validate_month_format(cls, v: str) -> str: + try: + # This will raise ValueError if format doesn't match + datetime.strptime(v, "%B %Y") + except ValueError: + raise ValueError("month must be in the format 'MonthName YYYY' (e.g., 'May 2025')") + return v + +class GetMetricsBacklogResponseDTO(BaseModel): + entries: list[GetMetricsBacklogResponseInnerDTO] \ No newline at end of file diff --git a/core/DTOs/GetMetricsBatchesAggregatedResponseDTO.py b/core/DTOs/GetMetricsBatchesAggregatedResponseDTO.py new file mode 100644 index 00000000..37535f2d --- /dev/null +++ b/core/DTOs/GetMetricsBatchesAggregatedResponseDTO.py @@ -0,0 +1,25 @@ +from typing import Dict + +from pydantic import BaseModel + +from collector_manager.enums import CollectorType + + +class GetMetricsBatchesAggregatedInnerResponseDTO(BaseModel): + count_successful_batches: int + count_failed_batches: int + count_urls: int + count_urls_pending: int + count_urls_validated: int + count_urls_submitted: int + count_urls_rejected: int + count_urls_errors: int + + + +class GetMetricsBatchesAggregatedResponseDTO(BaseModel): + total_batches: int + by_strategy: Dict[ + CollectorType, + GetMetricsBatchesAggregatedInnerResponseDTO + ] \ No newline at end of file diff --git a/core/DTOs/GetMetricsBatchesBreakdownResponseDTO.py b/core/DTOs/GetMetricsBatchesBreakdownResponseDTO.py new file mode 100644 index 00000000..6572f49f --- /dev/null +++ b/core/DTOs/GetMetricsBatchesBreakdownResponseDTO.py @@ -0,0 +1,22 @@ +from datetime import datetime + +from pydantic import BaseModel + +from collector_manager.enums import CollectorType +from core.enums import BatchStatus + + +class GetMetricsBatchesBreakdownInnerResponseDTO(BaseModel): + batch_id: int + strategy: CollectorType + status: BatchStatus + created_at: datetime + count_url_total: int + count_url_pending: int + count_url_submitted: int + count_url_rejected: int + count_url_error: int + count_url_validated: int + +class GetMetricsBatchesBreakdownResponseDTO(BaseModel): + batches: list[GetMetricsBatchesBreakdownInnerResponseDTO] \ No newline at end of file diff --git a/core/DTOs/GetMetricsURLsAggregatedResponseDTO.py b/core/DTOs/GetMetricsURLsAggregatedResponseDTO.py new file mode 100644 index 00000000..66009223 --- /dev/null +++ b/core/DTOs/GetMetricsURLsAggregatedResponseDTO.py @@ -0,0 +1,14 @@ +import datetime + +from pydantic import BaseModel + + +class GetMetricsURLsAggregatedResponseDTO(BaseModel): + count_urls_total: int + count_urls_pending: int + count_urls_submitted: int + count_urls_rejected: int + count_urls_validated: int + count_urls_errors: int + oldest_pending_url_created_at: datetime.datetime + oldest_pending_url_id: int \ No newline at end of file diff --git a/core/DTOs/GetMetricsURLsBreakdownPendingResponseDTO.py b/core/DTOs/GetMetricsURLsBreakdownPendingResponseDTO.py new file mode 100644 index 00000000..16e596d5 --- /dev/null +++ b/core/DTOs/GetMetricsURLsBreakdownPendingResponseDTO.py @@ -0,0 +1,22 @@ +from pydantic import BaseModel, field_validator +from datetime import datetime + +class GetMetricsURLsBreakdownPendingResponseInnerDTO(BaseModel): + month: str + count_pending_total: int + count_pending_relevant_user: int + count_pending_record_type_user: int + count_pending_agency_user: int + + @field_validator("month") + @classmethod + def validate_month_format(cls, v: str) -> str: + try: + # This will raise ValueError if format doesn't match + datetime.strptime(v, "%B %Y") + except ValueError: + raise ValueError("month must be in the format 'MonthName YYYY' (e.g., 'May 2025')") + return v + +class GetMetricsURLsBreakdownPendingResponseDTO(BaseModel): + entries: list[GetMetricsURLsBreakdownPendingResponseInnerDTO] \ No newline at end of file diff --git a/core/DTOs/GetMetricsURLsBreakdownSubmittedResponseDTO.py b/core/DTOs/GetMetricsURLsBreakdownSubmittedResponseDTO.py new file mode 100644 index 00000000..2ac4e768 --- /dev/null +++ b/core/DTOs/GetMetricsURLsBreakdownSubmittedResponseDTO.py @@ -0,0 +1,21 @@ +from datetime import datetime + +from pydantic import BaseModel, field_validator + + +class GetMetricsURLsBreakdownSubmittedInnerDTO(BaseModel): + month: str + count_submitted: int + + @field_validator("month") + @classmethod + def validate_month_format(cls, v: str) -> str: + try: + # This will raise ValueError if format doesn't match + datetime.strptime(v, "%B %Y") + except ValueError: + raise ValueError("month must be in the format 'MonthName YYYY' (e.g., 'May 2025')") + return v + +class GetMetricsURLsBreakdownSubmittedResponseDTO(BaseModel): + entries: list[GetMetricsURLsBreakdownSubmittedInnerDTO] \ No newline at end of file diff --git a/core/DTOs/GetNextRecordTypeAnnotationResponseInfo.py b/core/DTOs/GetNextRecordTypeAnnotationResponseInfo.py new file mode 100644 index 00000000..4280e00d --- /dev/null +++ b/core/DTOs/GetNextRecordTypeAnnotationResponseInfo.py @@ -0,0 +1,22 @@ +from typing import Optional + +from pydantic import Field, BaseModel + +from collector_db.DTOs.URLMapping import URLMapping +from core.enums import RecordType +from html_tag_collector.DataClassTags import ResponseHTMLInfo + + +class GetNextRecordTypeAnnotationResponseInfo(BaseModel): + url_info: URLMapping = Field( + title="Information about the URL" + ) + suggested_record_type: Optional[RecordType] = Field( + title="What record type, if any, the auto-labeler identified the URL as" + ) + html_info: ResponseHTMLInfo = Field( + title="HTML information about the URL" + ) + +class GetNextRecordTypeAnnotationResponseOuterInfo(BaseModel): + next_annotation: Optional[GetNextRecordTypeAnnotationResponseInfo] diff --git a/core/DTOs/GetNextRelevanceAnnotationResponseInfo.py b/core/DTOs/GetNextRelevanceAnnotationResponseInfo.py new file mode 100644 index 00000000..61cb35a5 --- /dev/null +++ b/core/DTOs/GetNextRelevanceAnnotationResponseInfo.py @@ -0,0 +1,22 @@ +from typing import Optional + +from pydantic import BaseModel, Field + +from collector_db.DTOs.URLMapping import URLMapping +from core.DTOs.ResponseURLInfo import ResponseURLInfo +from html_tag_collector.DataClassTags import ResponseHTMLInfo + + +class GetNextRelevanceAnnotationResponseInfo(BaseModel): + url_info: URLMapping = Field( + title="Information about the URL" + ) + suggested_relevant: Optional[bool] = Field( + title="Whether the auto-labeler identified the URL as relevant or not" + ) + html_info: ResponseHTMLInfo = Field( + title="HTML information about the URL" + ) + +class GetNextRelevanceAnnotationResponseOuterInfo(BaseModel): + next_annotation: Optional[GetNextRelevanceAnnotationResponseInfo] diff --git a/core/DTOs/GetNextURLForAgencyAnnotationResponse.py b/core/DTOs/GetNextURLForAgencyAnnotationResponse.py new file mode 100644 index 00000000..8b3d06f4 --- /dev/null +++ b/core/DTOs/GetNextURLForAgencyAnnotationResponse.py @@ -0,0 +1,29 @@ +from typing import Optional, Literal + +from pydantic import BaseModel + +from core.enums import SuggestionType +from html_tag_collector.DataClassTags import ResponseHTMLInfo + +class GetNextURLForAgencyAgencyInfo(BaseModel): + suggestion_type: SuggestionType + pdap_agency_id: Optional[int] = None + agency_name: Optional[str] = None + state: Optional[str] = None + county: Optional[str] = None + locality: Optional[str] = None + +class GetNextURLForAgencyAnnotationInnerResponse(BaseModel): + url_id: int + url: str + agency_suggestions: list[ + GetNextURLForAgencyAgencyInfo + ] + html_info: ResponseHTMLInfo + +class GetNextURLForAgencyAnnotationResponse(BaseModel): + next_annotation: Optional[GetNextURLForAgencyAnnotationInnerResponse] + +class URLAgencyAnnotationPostInfo(BaseModel): + is_new: bool = False + suggested_agency: Optional[int] = None \ No newline at end of file diff --git a/core/DTOs/GetNextURLForAllAnnotationResponse.py b/core/DTOs/GetNextURLForAllAnnotationResponse.py new file mode 100644 index 00000000..f4fa4bb8 --- /dev/null +++ b/core/DTOs/GetNextURLForAllAnnotationResponse.py @@ -0,0 +1,24 @@ +from typing import Optional + +from pydantic import Field, BaseModel + +from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAgencyInfo +from core.enums import RecordType +from html_tag_collector.DataClassTags import ResponseHTMLInfo + + +class GetNextURLForAllAnnotationInnerResponse(BaseModel): + url_id: int + url: str + html_info: ResponseHTMLInfo + agency_suggestions: Optional[list[GetNextURLForAgencyAgencyInfo]] + suggested_relevant: Optional[bool] = Field( + title="Whether the auto-labeler identified the URL as relevant or not" + ) + suggested_record_type: Optional[RecordType] = Field( + title="What record type, if any, the auto-labeler identified the URL as" + ) + + +class GetNextURLForAllAnnotationResponse(BaseModel): + next_annotation: Optional[GetNextURLForAllAnnotationInnerResponse] \ No newline at end of file diff --git a/core/DTOs/GetNextURLForFinalReviewResponse.py b/core/DTOs/GetNextURLForFinalReviewResponse.py new file mode 100644 index 00000000..c9e838b6 --- /dev/null +++ b/core/DTOs/GetNextURLForFinalReviewResponse.py @@ -0,0 +1,83 @@ +from typing import Optional + +from pydantic import BaseModel, Field + +from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAgencyInfo +from core.enums import RecordType +from html_tag_collector.DataClassTags import ResponseHTMLInfo + +class FinalReviewAnnotationRelevantInfo(BaseModel): + auto: Optional[bool] = Field(title="Whether the auto-labeler has marked the URL as relevant") + user: Optional[bool] = Field( + title="Whether a user has marked the URL as relevant", + ) + +class FinalReviewAnnotationRecordTypeInfo(BaseModel): + auto: Optional[RecordType] = Field( + title="The record type suggested by the auto-labeler" + ) + user: Optional[RecordType] = Field( + title="The record type suggested by a user", + ) + +# region Agency + +class FinalReviewAnnotationAgencyAutoInfo(BaseModel): + unknown: bool = Field(title="Whether the auto-labeler suggested the URL as unknown") + suggestions: Optional[list[GetNextURLForAgencyAgencyInfo]] = Field( + title="A list of agencies, if any, suggested by the auto-labeler", + ) + +class FinalReviewAnnotationAgencyInfo(BaseModel): + confirmed: Optional[list[GetNextURLForAgencyAgencyInfo]] = Field( + title="The confirmed agency for the URL", + ) + auto: Optional[FinalReviewAnnotationAgencyAutoInfo] = Field( + title="A single agency or a list of agencies suggested by the auto-labeler",) + user: Optional[GetNextURLForAgencyAgencyInfo] = Field( + title="A single agency suggested by a user", + ) +# endregion + +class FinalReviewAnnotationInfo(BaseModel): + relevant: FinalReviewAnnotationRelevantInfo = Field( + title="User and auto annotations for relevancy", + ) + record_type: FinalReviewAnnotationRecordTypeInfo = Field( + title="User and auto annotations for record type", + ) + agency: FinalReviewAnnotationAgencyInfo = Field( + title="User and auto annotations for agency", + ) + +class FinalReviewOptionalMetadata(BaseModel): + record_formats: Optional[list[str]] = Field( + title="The record formats of the source", + default=None + ) + data_portal_type: Optional[str] = Field( + title="The data portal type of the source", + default=None + ) + supplying_entity: Optional[str] = Field( + title="The supplying entity of the source", + default=None + ) + +class GetNextURLForFinalReviewResponse(BaseModel): + id: int = Field(title="The id of the URL") + url: str = Field(title="The URL") + name: Optional[str] = Field(title="The name of the source") + description: Optional[str] = Field(title="The description of the source") + html_info: ResponseHTMLInfo = Field(title="The HTML content of the URL") + annotations: FinalReviewAnnotationInfo = Field( + title="The annotations for the URL, from both users and the auto-labeler", + ) + optional_metadata: FinalReviewOptionalMetadata = Field( + title="Optional metadata for the source", + ) + +class GetNextURLForFinalReviewOuterResponse(BaseModel): + next_source: Optional[GetNextURLForFinalReviewResponse] = Field( + title="The next source to be reviewed", + ) \ No newline at end of file diff --git a/core/DTOs/GetNextURLForRelevanceAnnotationResponse.py b/core/DTOs/GetNextURLForRelevanceAnnotationResponse.py deleted file mode 100644 index a58a4565..00000000 --- a/core/DTOs/GetNextURLForRelevanceAnnotationResponse.py +++ /dev/null @@ -1,9 +0,0 @@ -from typing import Optional - -from pydantic import BaseModel - -from core.DTOs.RelevanceAnnotationRequestInfo import RelevanceAnnotationRequestInfo - - -class GetNextURLForRelevanceAnnotationResponse(BaseModel): - next_annotation: Optional[RelevanceAnnotationRequestInfo] = None diff --git a/core/DTOs/GetTasksResponse.py b/core/DTOs/GetTasksResponse.py new file mode 100644 index 00000000..42b3d954 --- /dev/null +++ b/core/DTOs/GetTasksResponse.py @@ -0,0 +1,19 @@ +import datetime + +from pydantic import BaseModel + +from collector_db.enums import TaskType +from core.enums import BatchStatus + + +class GetTasksResponseTaskInfo(BaseModel): + task_id: int + type: TaskType + status: BatchStatus + url_count: int + url_error_count: int + updated_at: datetime.datetime + + +class GetTasksResponse(BaseModel): + tasks: list[GetTasksResponseTaskInfo] diff --git a/core/DTOs/GetURLsResponseInfo.py b/core/DTOs/GetURLsResponseInfo.py index 796b6494..162e92b5 100644 --- a/core/DTOs/GetURLsResponseInfo.py +++ b/core/DTOs/GetURLsResponseInfo.py @@ -29,7 +29,6 @@ class GetURLsResponseInnerInfo(BaseModel): updated_at: datetime.datetime created_at: datetime.datetime errors: list[GetURLsResponseErrorInfo] - metadata: list[GetURLsResponseMetadataInfo] class GetURLsResponseInfo(BaseModel): urls: list[GetURLsResponseInnerInfo] diff --git a/core/DTOs/LabelStudioExportResponseInfo.py b/core/DTOs/LabelStudioExportResponseInfo.py deleted file mode 100644 index fae94096..00000000 --- a/core/DTOs/LabelStudioExportResponseInfo.py +++ /dev/null @@ -1,9 +0,0 @@ -from typing import Annotated - -from fastapi.param_functions import Doc -from pydantic import BaseModel - - -class LabelStudioExportResponseInfo(BaseModel): - label_studio_import_id: Annotated[int, Doc("The ID of the Label Studio import")] - num_urls_imported: Annotated[int, Doc("The number of URLs imported")] \ No newline at end of file diff --git a/core/DTOs/LabelStudioTaskInfo.py b/core/DTOs/LabelStudioTaskInfo.py deleted file mode 100644 index 5c277c8a..00000000 --- a/core/DTOs/LabelStudioTaskInfo.py +++ /dev/null @@ -1,11 +0,0 @@ -from pydantic import BaseModel - -from collector_db.enums import URLMetadataAttributeType -from core.enums import LabelStudioTaskStatus - - -class LabelStudioTaskInfo(BaseModel): - metadata_id: int - attribute: URLMetadataAttributeType - task_id: int - task_status: LabelStudioTaskStatus \ No newline at end of file diff --git a/core/DTOs/ManualBatchInputDTO.py b/core/DTOs/ManualBatchInputDTO.py new file mode 100644 index 00000000..9bb98755 --- /dev/null +++ b/core/DTOs/ManualBatchInputDTO.py @@ -0,0 +1,24 @@ +from typing import Optional + +from pydantic import BaseModel, Field + +from core.enums import RecordType + + +class ManualBatchInnerInputDTO(BaseModel): + url: str + name: Optional[str] = None + description: Optional[str] = None + collector_metadata: Optional[dict] = None + record_type: Optional[RecordType] = None + record_formats: Optional[list[str]] = None + data_portal_type: Optional[str] = None + supplying_entity: Optional[str] = None + + +class ManualBatchInputDTO(BaseModel): + name: str + entries: list[ManualBatchInnerInputDTO] = Field( + min_length=1, + max_length=1000 + ) \ No newline at end of file diff --git a/core/DTOs/ManualBatchResponseDTO.py b/core/DTOs/ManualBatchResponseDTO.py new file mode 100644 index 00000000..b572fbb2 --- /dev/null +++ b/core/DTOs/ManualBatchResponseDTO.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + + +class ManualBatchResponseDTO(BaseModel): + batch_id: int + urls: list[int] + duplicate_urls: list[str] \ No newline at end of file diff --git a/core/DTOs/RecordTypeAnnotationPostInfo.py b/core/DTOs/RecordTypeAnnotationPostInfo.py new file mode 100644 index 00000000..87e8b674 --- /dev/null +++ b/core/DTOs/RecordTypeAnnotationPostInfo.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + +from core.enums import RecordType + + +class RecordTypeAnnotationPostInfo(BaseModel): + record_type: RecordType \ No newline at end of file diff --git a/core/DTOs/RelevanceAnnotationInfo.py b/core/DTOs/RelevanceAnnotationPostInfo.py similarity index 100% rename from core/DTOs/RelevanceAnnotationInfo.py rename to core/DTOs/RelevanceAnnotationPostInfo.py diff --git a/core/DTOs/ResponseURLInfo.py b/core/DTOs/ResponseURLInfo.py new file mode 100644 index 00000000..c7f7e364 --- /dev/null +++ b/core/DTOs/ResponseURLInfo.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +class ResponseURLInfo(BaseModel): + url: str + url_id: int \ No newline at end of file diff --git a/core/DTOs/SearchURLResponse.py b/core/DTOs/SearchURLResponse.py new file mode 100644 index 00000000..1a46c0be --- /dev/null +++ b/core/DTOs/SearchURLResponse.py @@ -0,0 +1,8 @@ +from typing import Optional + +from pydantic import BaseModel + + +class SearchURLResponse(BaseModel): + found: bool + url_id: Optional[int] = None \ No newline at end of file diff --git a/core/DTOs/TaskOperatorRunInfo.py b/core/DTOs/TaskOperatorRunInfo.py new file mode 100644 index 00000000..6b5c29e0 --- /dev/null +++ b/core/DTOs/TaskOperatorRunInfo.py @@ -0,0 +1,14 @@ +from enum import Enum +from typing import Optional + +from pydantic import BaseModel + +class TaskOperatorOutcome(Enum): + SUCCESS = "success" + ERROR = "error" + +class TaskOperatorRunInfo(BaseModel): + task_id: Optional[int] + linked_url_ids: list[int] + outcome: TaskOperatorOutcome + message: str = "" \ No newline at end of file diff --git a/core/DTOs/URLAgencySuggestionInfo.py b/core/DTOs/URLAgencySuggestionInfo.py new file mode 100644 index 00000000..2eae0496 --- /dev/null +++ b/core/DTOs/URLAgencySuggestionInfo.py @@ -0,0 +1,16 @@ +from typing import Optional + +from pydantic import BaseModel + +from core.enums import SuggestionType + + +class URLAgencySuggestionInfo(BaseModel): + url_id: int + suggestion_type: SuggestionType + pdap_agency_id: Optional[int] = None + agency_name: Optional[str] = None + state: Optional[str] = None + county: Optional[str] = None + locality: Optional[str] = None + user_id: Optional[int] = None diff --git a/core/DTOs/task_data_objects/AgencyIdentificationTDO.py b/core/DTOs/task_data_objects/AgencyIdentificationTDO.py new file mode 100644 index 00000000..10c3ce99 --- /dev/null +++ b/core/DTOs/task_data_objects/AgencyIdentificationTDO.py @@ -0,0 +1,11 @@ +from typing import Optional + +from pydantic import BaseModel + +from collector_manager.enums import CollectorType + + +class AgencyIdentificationTDO(BaseModel): + url_id: int + collector_metadata: Optional[dict] = None + collector_type: CollectorType diff --git a/core/DTOs/task_data_objects/README.md b/core/DTOs/task_data_objects/README.md new file mode 100644 index 00000000..3d2fc5ae --- /dev/null +++ b/core/DTOs/task_data_objects/README.md @@ -0,0 +1 @@ +Task Data Objects (or TDOs) are data transfer objects (DTOs) used within a given task operation. Each Task type has one type of TDO. \ No newline at end of file diff --git a/core/DTOs/task_data_objects/SubmitApprovedURLTDO.py b/core/DTOs/task_data_objects/SubmitApprovedURLTDO.py new file mode 100644 index 00000000..be26d3a8 --- /dev/null +++ b/core/DTOs/task_data_objects/SubmitApprovedURLTDO.py @@ -0,0 +1,26 @@ +from typing import Optional + +from pydantic import BaseModel + +from core.enums import RecordType +from datetime import datetime + +class SubmitApprovedURLTDO(BaseModel): + url_id: int + url: str + record_type: RecordType + agency_ids: list[int] + name: str + description: str + approving_user_id: int + record_formats: Optional[list[str]] = None + data_portal_type: Optional[str] = None + supplying_entity: Optional[str] = None + data_source_id: Optional[int] = None + request_error: Optional[str] = None + +class SubmittedURLInfo(BaseModel): + url_id: int + data_source_id: Optional[int] + request_error: Optional[str] + submitted_at: Optional[datetime] = None \ No newline at end of file diff --git a/core/DTOs/task_data_objects/URLDuplicateTDO.py b/core/DTOs/task_data_objects/URLDuplicateTDO.py new file mode 100644 index 00000000..af00ce38 --- /dev/null +++ b/core/DTOs/task_data_objects/URLDuplicateTDO.py @@ -0,0 +1,9 @@ +from typing import Optional + +from pydantic import BaseModel + + +class URLDuplicateTDO(BaseModel): + url_id: int + url: str + is_duplicate: Optional[bool] = None diff --git a/core/DTOs/task_data_objects/URLMiscellaneousMetadataTDO.py b/core/DTOs/task_data_objects/URLMiscellaneousMetadataTDO.py new file mode 100644 index 00000000..ff173a8e --- /dev/null +++ b/core/DTOs/task_data_objects/URLMiscellaneousMetadataTDO.py @@ -0,0 +1,20 @@ +from typing import Optional + +from pydantic import BaseModel + +from collector_manager.enums import CollectorType + +class URLHTMLMetadataInfo(BaseModel): + title: Optional[str] = None + description: Optional[str] = None + +class URLMiscellaneousMetadataTDO(BaseModel): + url_id: int + collector_metadata: dict + collector_type: CollectorType + name: Optional[str] = None + description: Optional[str] = None + record_formats: Optional[list[str]] = None + data_portal_type: Optional[str] = None + supplying_entity: Optional[str] = None + html_metadata_info: Optional[URLHTMLMetadataInfo] = None diff --git a/core/DTOs/task_data_objects/URLRecordTypeTDO.py b/core/DTOs/task_data_objects/URLRecordTypeTDO.py new file mode 100644 index 00000000..34bbc233 --- /dev/null +++ b/core/DTOs/task_data_objects/URLRecordTypeTDO.py @@ -0,0 +1,15 @@ +from typing import Optional + +from pydantic import BaseModel + +from collector_db.DTOs.URLWithHTML import URLWithHTML +from core.enums import RecordType + + +class URLRecordTypeTDO(BaseModel): + url_with_html: URLWithHTML + record_type: Optional[RecordType] = None + error: Optional[str] = None + + def is_errored(self): + return self.error is not None \ No newline at end of file diff --git a/core/DTOs/URLRelevanceHuggingfaceCycleInfo.py b/core/DTOs/task_data_objects/URLRelevanceHuggingfaceTDO.py similarity index 78% rename from core/DTOs/URLRelevanceHuggingfaceCycleInfo.py rename to core/DTOs/task_data_objects/URLRelevanceHuggingfaceTDO.py index 19318e6a..33311a9b 100644 --- a/core/DTOs/URLRelevanceHuggingfaceCycleInfo.py +++ b/core/DTOs/task_data_objects/URLRelevanceHuggingfaceTDO.py @@ -5,6 +5,6 @@ from collector_db.DTOs.URLWithHTML import URLWithHTML -class URLRelevanceHuggingfaceCycleInfo(BaseModel): +class URLRelevanceHuggingfaceTDO(BaseModel): url_with_html: URLWithHTML relevant: Optional[bool] = None diff --git a/core/DTOs/URLHTMLCycleInfo.py b/core/DTOs/task_data_objects/UrlHtmlTDO.py similarity index 94% rename from core/DTOs/URLHTMLCycleInfo.py rename to core/DTOs/task_data_objects/UrlHtmlTDO.py index 1d739375..05e9caf2 100644 --- a/core/DTOs/URLHTMLCycleInfo.py +++ b/core/DTOs/task_data_objects/UrlHtmlTDO.py @@ -7,7 +7,7 @@ @dataclass -class URLHTMLCycleInfo: +class UrlHtmlTDO: url_info: URLInfo url_response_info: Optional[URLResponseInfo] = None html_tag_info: Optional[ResponseHTMLInfo] = None diff --git a/label_studio_interface/DTOs/__init__.py b/core/DTOs/task_data_objects/__init__.py similarity index 100% rename from label_studio_interface/DTOs/__init__.py rename to core/DTOs/task_data_objects/__init__.py diff --git a/core/EnvVarManager.py b/core/EnvVarManager.py new file mode 100644 index 00000000..39e4ce83 --- /dev/null +++ b/core/EnvVarManager.py @@ -0,0 +1,76 @@ +import os + +class EnvVarManager: + _instance = None + _allow_direct_init = False # internal flag + + """ + A class for unified management of environment variables + """ + def __new__(cls, *args, **kwargs): + if not cls._allow_direct_init: + raise RuntimeError("Use `EnvVarManager.get()` or `EnvVarManager.override()` instead.") + return super().__new__(cls) + + def __init__(self, env: dict = os.environ): + self.env = env + self._load() + + def _load(self): + + self.google_api_key = self.require_env("GOOGLE_API_KEY") + self.google_cse_id = self.require_env("GOOGLE_CSE_ID") + + self.pdap_email = self.require_env("PDAP_EMAIL") + self.pdap_password = self.require_env("PDAP_PASSWORD") + self.pdap_api_key = self.require_env("PDAP_API_KEY") + self.pdap_api_url = self.require_env("PDAP_API_URL") + + self.discord_webhook_url = self.require_env("DISCORD_WEBHOOK_URL") + + self.openai_api_key = self.require_env("OPENAI_API_KEY") + + self.postgres_user = self.require_env("POSTGRES_USER") + self.postgres_password = self.require_env("POSTGRES_PASSWORD") + self.postgres_host = self.require_env("POSTGRES_HOST") + self.postgres_port = self.require_env("POSTGRES_PORT") + self.postgres_db = self.require_env("POSTGRES_DB") + + @classmethod + def get(cls): + """ + Get the singleton instance, loading from environment if not yet + instantiated + """ + if cls._instance is None: + cls._allow_direct_init = True + cls._instance = cls(os.environ) + cls._allow_direct_init = False + return cls._instance + + @classmethod + def override(cls, env: dict): + """ + Create singleton instance that + overrides the environment variables with injected values + """ + cls._allow_direct_init = True + cls._instance = cls(env) + cls._allow_direct_init = False + + @classmethod + def reset(cls): + cls._instance = None + + def get_postgres_connection_string(self, is_async = False): + driver = "postgresql" + if is_async: + driver += "+asyncpg" + return (f"{driver}://{self.postgres_user}:{self.postgres_password}" + f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}") + + def require_env(self, key: str, allow_none: bool = False): + val = self.env.get(key) + if val is None and not allow_none: + raise ValueError(f"Environment variable {key} is not set") + return val \ No newline at end of file diff --git a/core/FunctionTrigger.py b/core/FunctionTrigger.py new file mode 100644 index 00000000..df85482a --- /dev/null +++ b/core/FunctionTrigger.py @@ -0,0 +1,30 @@ +import asyncio +from typing import Callable, Awaitable + +class FunctionTrigger: + """ + A small class used to trigger a function to run in a loop + If the trigger is used again while the task is running, the task will be rerun + """ + + def __init__(self, func: Callable[[], Awaitable[None]]): + self._func = func + self._lock = asyncio.Lock() + self._rerun_requested = False + self._loop_running = False + + async def trigger_or_rerun(self): + if self._loop_running: + self._rerun_requested = True + return + + async with self._lock: + self._loop_running = True + try: + while True: + self._rerun_requested = False + await self._func() + if not self._rerun_requested: + break + finally: + self._loop_running = False diff --git a/core/README.md b/core/README.md index c9095c41..9546f613 100644 --- a/core/README.md +++ b/core/README.md @@ -2,4 +2,14 @@ The Source Collector Core is a directory which integrates: 1. The Collector Manager 2. The Source Collector Database 3. The API (to be developed) -4. The PDAP API Client (to be developed) \ No newline at end of file +4. The PDAP API Client (to be developed) + +# Nomenclature + +- **Collector**: A submodule for collecting URLs. Different collectors utilize different sources and different methods for gathering URLs. +- **Batch**: URLs are collected in Collector Batches, with different collectors producing different Batches. +- **Cycle**: Refers to the overall lifecycle for Each URL -- from initial retrieval in a Batch to either disposal or incorporation into the Data Sources App Database +- **Task**: A semi-independent operation performed on a set of URLs. These include: Collection, retrieving HTML data, getting metadata via Machine Learning, and so on. +- **Task Set**: Refers to a group of URLs that are operated on together as part of a single task. These URLs in a set are not necessarily all from the same batch. URLs in a task set should only be operated on in that task once. +- **Task Operator**: A class which performs a single task on a set of URLs. +- **Subtask**: A subcomponent of a Task Operator which performs a single operation on a single URL. Often distinguished by the Collector Strategy used for that URL. \ No newline at end of file diff --git a/core/ScheduledTaskManager.py b/core/ScheduledTaskManager.py index 590690d1..e0b87247 100644 --- a/core/ScheduledTaskManager.py +++ b/core/ScheduledTaskManager.py @@ -1,41 +1,9 @@ from datetime import datetime, timedelta from apscheduler.schedulers.asyncio import AsyncIOScheduler -from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.triggers.interval import IntervalTrigger - -from collector_db.DatabaseClient import DatabaseClient from core.AsyncCore import AsyncCore - -class ScheduledTaskManager: - - def __init__(self, db_client: DatabaseClient): - # Dependencies - self.db_client = db_client - - # Main objects - self.scheduler = BackgroundScheduler() - self.scheduler.start() - self.add_scheduled_tasks() - - # Jobs - self.delete_old_logs_job = None - - - def add_scheduled_tasks(self): - self.delete_old_logs_job = self.scheduler.add_job( - self.db_client.delete_old_logs, - trigger=IntervalTrigger( - days=1, - start_date=datetime.now() + timedelta(minutes=10) - ) - ) - - def shutdown(self): - if self.scheduler.running: - self.scheduler.shutdown() - class AsyncScheduledTaskManager: def __init__(self, async_core: AsyncCore): @@ -49,13 +17,30 @@ def __init__(self, async_core: AsyncCore): # Jobs self.run_cycles_job = None + self.delete_logs_job = None + self.populate_backlog_snapshot_job = None def add_scheduled_tasks(self): self.run_cycles_job = self.scheduler.add_job( - self.async_core.run_cycles, + self.async_core.run_tasks, trigger=IntervalTrigger( hours=1, start_date=datetime.now() + timedelta(minutes=1) + ), + misfire_grace_time=60 + ) + self.delete_logs_job = self.scheduler.add_job( + self.async_core.adb_client.delete_old_logs, + trigger=IntervalTrigger( + days=1, + start_date=datetime.now() + timedelta(minutes=10) + ) + ) + self.populate_backlog_snapshot_job = self.scheduler.add_job( + self.async_core.adb_client.populate_backlog_snapshot, + trigger=IntervalTrigger( + days=1, + start_date=datetime.now() + timedelta(minutes=20) ) ) diff --git a/core/SourceCollectorCore.py b/core/SourceCollectorCore.py index b341bda3..a4699bf6 100644 --- a/core/SourceCollectorCore.py +++ b/core/SourceCollectorCore.py @@ -1,139 +1,18 @@ -from typing import Optional +from typing import Optional, Any -from pydantic import BaseModel -from collector_db.DTOs.BatchInfo import BatchInfo from collector_db.DatabaseClient import DatabaseClient -from collector_manager.CollectorManager import CollectorManager -from collector_manager.enums import CollectorType -from core.CoreLogger import CoreLogger -from core.DTOs.CollectorStartInfo import CollectorStartInfo -from core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse -from core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse -from core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse -from core.DTOs.GetURLsByBatchResponse import GetURLsByBatchResponse -from core.DTOs.LabelStudioExportResponseInfo import LabelStudioExportResponseInfo -from core.DTOs.MessageResponse import MessageResponse -from core.ScheduledTaskManager import ScheduledTaskManager from core.enums import BatchStatus -from label_studio_interface.DTOs.LabelStudioTaskExportInfo import LabelStudioTaskExportInfo -from label_studio_interface.LabelStudioAPIManager import LabelStudioAPIManager class SourceCollectorCore: def __init__( self, - core_logger: CoreLogger, - db_client: DatabaseClient = DatabaseClient(), - label_studio_api_manager: LabelStudioAPIManager = LabelStudioAPIManager(), - dev_mode: bool = False + db_client: Optional[DatabaseClient] = None, ): + if db_client is None: + db_client = DatabaseClient() self.db_client = db_client - self.core_logger = core_logger - self.collector_manager = CollectorManager( - logger=core_logger, - db_client=db_client - ) - if not dev_mode: - self.scheduled_task_manager = ScheduledTaskManager(db_client=db_client) - else: - self.scheduled_task_manager = None - self.label_studio_api_manager = label_studio_api_manager - - def get_batch_info(self, batch_id: int) -> BatchInfo: - return self.db_client.get_batch_by_id(batch_id) - - def get_urls_by_batch(self, batch_id: int, page: int = 1) -> GetURLsByBatchResponse: - url_infos = self.db_client.get_urls_by_batch(batch_id, page) - return GetURLsByBatchResponse(urls=url_infos) - - def get_duplicate_urls_by_batch(self, batch_id: int, page: int = 1) -> GetDuplicatesByBatchResponse: - dup_infos = self.db_client.get_duplicates_by_batch_id(batch_id, page=page) - return GetDuplicatesByBatchResponse(duplicates=dup_infos) - - def get_batch_statuses( - self, - collector_type: Optional[CollectorType], - status: Optional[BatchStatus], - page: int - ) -> GetBatchStatusResponse: - results = self.db_client.get_recent_batch_status_info( - collector_type=collector_type, - status=status, - page=page - ) - return GetBatchStatusResponse(results=results) def get_status(self, batch_id: int) -> BatchStatus: return self.db_client.get_batch_status(batch_id) - - def initiate_collector( - self, - collector_type: CollectorType, - user_id: int, - dto: Optional[BaseModel] = None, - ): - """ - Reserves a batch ID from the database - and starts the requisite collector - """ - batch_info = BatchInfo( - strategy=collector_type.value, - status=BatchStatus.IN_PROCESS, - parameters=dto.model_dump(), - user_id=user_id - ) - batch_id = self.db_client.insert_batch(batch_info) - self.collector_manager.start_collector( - collector_type=collector_type, - batch_id=batch_id, - dto=dto - ) - return CollectorStartInfo( - batch_id=batch_id, - message=f"Started {collector_type.value} collector." - ) - - def get_batch_logs(self, batch_id: int) -> GetBatchLogsResponse: - logs = self.db_client.get_logs_by_batch_id(batch_id) - return GetBatchLogsResponse(logs=logs) - - def export_batch_to_label_studio(self, batch_id: int) -> LabelStudioExportResponseInfo: - # TODO: Might this need to be a separate thread? - db_url_infos = self.db_client.get_urls_by_batch(batch_id) - url_count = len(db_url_infos) - export_infos = [] - for url_info in db_url_infos: - export_infos.append(LabelStudioTaskExportInfo(url=url_info.url)) - import_id = self.label_studio_api_manager.export_tasks_into_project( - data=export_infos - ) - return LabelStudioExportResponseInfo( - label_studio_import_id=import_id, - num_urls_imported=url_count - ) - - def abort_batch(self, batch_id: int) -> MessageResponse: - self.collector_manager.abort_collector(cid=batch_id) - return MessageResponse(message=f"Batch aborted.") - - def restart(self): - self.collector_manager.shutdown_all_collectors() - self.collector_manager.restart_executor() - self.collector_manager.logger.restart() - - - def shutdown(self): - self.collector_manager.shutdown_all_collectors() - self.collector_manager.logger.shutdown() - if self.scheduled_task_manager is not None: - self.scheduled_task_manager.shutdown() - - - - - -""" -TODO: Add logic for batch processing - -""" \ No newline at end of file diff --git a/core/TaskManager.py b/core/TaskManager.py new file mode 100644 index 00000000..1dcc9bb5 --- /dev/null +++ b/core/TaskManager.py @@ -0,0 +1,203 @@ +import logging + +from core.classes.task_operators.URLDuplicateTaskOperator import URLDuplicateTaskOperator +from source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_db.DTOs.TaskInfo import TaskInfo +from collector_db.enums import TaskType +from core.DTOs.GetTasksResponse import GetTasksResponse +from core.DTOs.TaskOperatorRunInfo import TaskOperatorRunInfo, TaskOperatorOutcome +from core.FunctionTrigger import FunctionTrigger +from core.classes.task_operators.AgencyIdentificationTaskOperator import AgencyIdentificationTaskOperator +from core.classes.task_operators.SubmitApprovedURLTaskOperator import SubmitApprovedURLTaskOperator +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from core.classes.task_operators.URLHTMLTaskOperator import URLHTMLTaskOperator +from core.classes.task_operators.URLMiscellaneousMetadataTaskOperator import URLMiscellaneousMetadataTaskOperator +from core.classes.task_operators.URLRecordTypeTaskOperator import URLRecordTypeTaskOperator +from core.classes.task_operators.URLRelevanceHuggingfaceTaskOperator import URLRelevanceHuggingfaceTaskOperator +from core.enums import BatchStatus +from html_tag_collector.ResponseParser import HTMLResponseParser +from html_tag_collector.URLRequestInterface import URLRequestInterface +from hugging_face.HuggingFaceInterface import HuggingFaceInterface +from llm_api_logic.OpenAIRecordClassifier import OpenAIRecordClassifier +from pdap_api_client.PDAPClient import PDAPClient +from util.DiscordNotifier import DiscordPoster + +TASK_REPEAT_THRESHOLD = 20 + +class TaskManager: + + def __init__( + self, + adb_client: AsyncDatabaseClient, + huggingface_interface: HuggingFaceInterface, + url_request_interface: URLRequestInterface, + html_parser: HTMLResponseParser, + discord_poster: DiscordPoster, + pdap_client: PDAPClient + ): + # Dependencies + self.adb_client = adb_client + self.pdap_client = pdap_client + self.huggingface_interface = huggingface_interface + self.url_request_interface = url_request_interface + self.html_parser = html_parser + self.discord_poster = discord_poster + + self.logger = logging.getLogger(__name__) + self.logger.addHandler(logging.StreamHandler()) + self.logger.setLevel(logging.INFO) + self.task_trigger = FunctionTrigger(self.run_tasks) + self.task_status: TaskType = TaskType.IDLE + + + + #region Task Operators + async def get_url_html_task_operator(self): + operator = URLHTMLTaskOperator( + adb_client=self.adb_client, + url_request_interface=self.url_request_interface, + html_parser=self.html_parser + ) + return operator + + async def get_url_relevance_huggingface_task_operator(self): + operator = URLRelevanceHuggingfaceTaskOperator( + adb_client=self.adb_client, + huggingface_interface=self.huggingface_interface + ) + return operator + + async def get_url_record_type_task_operator(self): + operator = URLRecordTypeTaskOperator( + adb_client=self.adb_client, + classifier=OpenAIRecordClassifier() + ) + return operator + + async def get_agency_identification_task_operator(self): + muckrock_api_interface = MuckrockAPIInterface() + operator = AgencyIdentificationTaskOperator( + adb_client=self.adb_client, + pdap_client=self.pdap_client, + muckrock_api_interface=muckrock_api_interface + ) + return operator + + async def get_submit_approved_url_task_operator(self): + operator = SubmitApprovedURLTaskOperator( + adb_client=self.adb_client, + pdap_client=self.pdap_client + ) + return operator + + async def get_url_miscellaneous_metadata_task_operator(self): + operator = URLMiscellaneousMetadataTaskOperator( + adb_client=self.adb_client + ) + return operator + + async def get_url_duplicate_task_operator(self): + operator = URLDuplicateTaskOperator( + adb_client=self.adb_client, + pdap_client=self.pdap_client + ) + return operator + + async def get_task_operators(self) -> list[TaskOperatorBase]: + return [ + await self.get_url_html_task_operator(), + await self.get_url_duplicate_task_operator(), + # await self.get_url_relevance_huggingface_task_operator(), + await self.get_url_record_type_task_operator(), + await self.get_agency_identification_task_operator(), + await self.get_url_miscellaneous_metadata_task_operator(), + await self.get_submit_approved_url_task_operator() + ] + + #endregion + + #region Tasks + async def set_task_status(self, task_type: TaskType): + self.task_status = task_type + + async def run_tasks(self): + operators = await self.get_task_operators() + for operator in operators: + count = 0 + await self.set_task_status(task_type=operator.task_type) + + meets_prereq = await operator.meets_task_prerequisites() + while meets_prereq: + print(f"Running {operator.task_type.value} Task") + if count > TASK_REPEAT_THRESHOLD: + message = f"Task {operator.task_type.value} has been run more than {TASK_REPEAT_THRESHOLD} times in a row. Task loop terminated." + print(message) + self.discord_poster.post_to_discord(message=message) + break + task_id = await self.initiate_task_in_db(task_type=operator.task_type) + run_info: TaskOperatorRunInfo = await operator.run_task(task_id) + await self.conclude_task(run_info) + if run_info.outcome == TaskOperatorOutcome.ERROR: + break + count += 1 + meets_prereq = await operator.meets_task_prerequisites() + await self.set_task_status(task_type=TaskType.IDLE) + + async def trigger_task_run(self): + await self.task_trigger.trigger_or_rerun() + + + async def conclude_task(self, run_info): + await self.adb_client.link_urls_to_task( + task_id=run_info.task_id, + url_ids=run_info.linked_url_ids + ) + await self.handle_outcome(run_info) + + async def initiate_task_in_db(self, task_type: TaskType) -> int: + self.logger.info(f"Initiating {task_type.value} Task") + task_id = await self.adb_client.initiate_task(task_type=task_type) + return task_id + + async def handle_outcome(self, run_info: TaskOperatorRunInfo): + match run_info.outcome: + case TaskOperatorOutcome.ERROR: + await self.handle_task_error(run_info) + case TaskOperatorOutcome.SUCCESS: + await self.adb_client.update_task_status( + task_id=run_info.task_id, + status=BatchStatus.READY_TO_LABEL + ) + + async def handle_task_error(self, run_info: TaskOperatorRunInfo): + await self.adb_client.update_task_status( + task_id=run_info.task_id, + status=BatchStatus.ERROR) + await self.adb_client.add_task_error( + task_id=run_info.task_id, + error=run_info.message + ) + await self.discord_poster.post_to_discord( + message=f"Task {run_info.task_id} ({self.task_status.value}) failed with error.") + + async def get_task_info(self, task_id: int) -> TaskInfo: + return await self.adb_client.get_task_info(task_id=task_id) + + async def get_tasks( + self, + page: int, + task_type: TaskType, + task_status: BatchStatus + ) -> GetTasksResponse: + return await self.adb_client.get_tasks( + page=page, + task_type=task_type, + task_status=task_status + ) + + + #endregion + + + diff --git a/core/classes/ErrorManager.py b/core/classes/ErrorManager.py new file mode 100644 index 00000000..ba763054 --- /dev/null +++ b/core/classes/ErrorManager.py @@ -0,0 +1,44 @@ +from enum import Enum +from http import HTTPStatus + +from fastapi import HTTPException +from pydantic import BaseModel + +from core.enums import AnnotationType + + +class ErrorTypes(Enum): + ANNOTATION_EXISTS = "ANNOTATION_EXISTS" + +class ErrorFormat(BaseModel): + code: ErrorTypes + message: str + + +class ErrorManager: + + @staticmethod + async def raise_error( + error_type: ErrorTypes, + message: str, + status_code: HTTPStatus = HTTPStatus.BAD_REQUEST + ): + raise HTTPException( + status_code=status_code, + detail=ErrorFormat( + code=error_type, + message=message + ).model_dump(mode='json') + ) + + @staticmethod + async def raise_annotation_exists_error( + annotation_type: AnnotationType, + url_id: int + ): + await ErrorManager.raise_error( + error_type=ErrorTypes.ANNOTATION_EXISTS, + message=f"Annotation of type {annotation_type.value} already exists" + f" for url {url_id}", + status_code=HTTPStatus.CONFLICT + ) diff --git a/core/classes/URLHTMLCycler.py b/core/classes/URLHTMLCycler.py deleted file mode 100644 index 73344a9c..00000000 --- a/core/classes/URLHTMLCycler.py +++ /dev/null @@ -1,95 +0,0 @@ -from collector_db.AsyncDatabaseClient import AsyncDatabaseClient -from collector_db.DTOs.URLErrorInfos import URLErrorPydanticInfo -from collector_db.DTOs.URLInfo import URLInfo -from core.DTOs.URLHTMLCycleInfo import URLHTMLCycleInfo -from core.classes.HTMLContentInfoGetter import HTMLContentInfoGetter -from html_tag_collector.ResponseParser import HTMLResponseParser -from html_tag_collector.URLRequestInterface import URLRequestInterface - - -class URLHTMLCycler: - - def __init__( - self, - url_request_interface: URLRequestInterface, - adb_client: AsyncDatabaseClient, - html_parser: HTMLResponseParser - ): - self.url_request_interface = url_request_interface - self.adb_client = adb_client - self.html_parser = html_parser - - async def cycle(self): - print("Running URL HTML Cycle...") - cycle_infos = await self.get_pending_urls_without_html_data() - await self.get_raw_html_data_for_urls(cycle_infos) - success_cycles, error_cycles = await self.separate_success_and_error_cycles(cycle_infos) - await self.update_errors_in_database(error_cycles) - await self.process_html_data(success_cycles) - await self.update_html_data_in_database(success_cycles) - - - async def get_just_urls(self, cycle_infos: list[URLHTMLCycleInfo]): - return [cycle_info.url_info.url for cycle_info in cycle_infos] - - async def get_pending_urls_without_html_data(self): - pending_urls: list[URLInfo] = await self.adb_client.get_pending_urls_without_html_data() - cycle_infos = [ - URLHTMLCycleInfo( - url_info=url_info, - ) for url_info in pending_urls - ] - return cycle_infos - - async def get_raw_html_data_for_urls(self, cycle_infos: list[URLHTMLCycleInfo]): - just_urls = await self.get_just_urls(cycle_infos) - url_response_infos = await self.url_request_interface.make_requests(just_urls) - for cycle_info, url_response_info in zip(cycle_infos, url_response_infos): - cycle_info.url_response_info = url_response_info - - async def separate_success_and_error_cycles( - self, - cycle_infos: list[URLHTMLCycleInfo] - ) -> tuple[ - list[URLHTMLCycleInfo], # Successful - list[URLHTMLCycleInfo] # Error - ]: - errored_cycle_infos = [] - successful_cycle_infos = [] - for cycle_info in cycle_infos: - if not cycle_info.url_response_info.success: - errored_cycle_infos.append(cycle_info) - else: - successful_cycle_infos.append(cycle_info) - return successful_cycle_infos, errored_cycle_infos - - async def update_errors_in_database(self, errored_cycle_infos: list[URLHTMLCycleInfo]): - error_infos = [] - for errored_cycle_info in errored_cycle_infos: - error_info = URLErrorPydanticInfo( - url_id=errored_cycle_info.url_info.id, - error=str(errored_cycle_info.url_response_info.exception), - ) - error_infos.append(error_info) - await self.adb_client.add_url_error_infos(error_infos) - - async def process_html_data(self, cycle_infos: list[URLHTMLCycleInfo]): - for cycle_info in cycle_infos: - html_tag_info = await self.html_parser.parse( - url=cycle_info.url_info.url, - html_content=cycle_info.url_response_info.html, - content_type=cycle_info.url_response_info.content_type - ) - cycle_info.html_tag_info = html_tag_info - - async def update_html_data_in_database(self, cycle_infos: list[URLHTMLCycleInfo]): - html_content_infos = [] - for cycle_info in cycle_infos: - hcig = HTMLContentInfoGetter( - response_html_info=cycle_info.html_tag_info, - url_id=cycle_info.url_info.id - ) - results = hcig.get_all_html_content() - html_content_infos.extend(results) - - await self.adb_client.add_html_content_infos(html_content_infos) diff --git a/core/classes/URLRelevanceHuggingfaceCycler.py b/core/classes/URLRelevanceHuggingfaceCycler.py deleted file mode 100644 index 8ffdb705..00000000 --- a/core/classes/URLRelevanceHuggingfaceCycler.py +++ /dev/null @@ -1,56 +0,0 @@ -from collector_db.AsyncDatabaseClient import AsyncDatabaseClient -from collector_db.DTOs.URLMetadataInfo import URLMetadataInfo -from collector_db.DTOs.URLWithHTML import URLWithHTML -from collector_db.enums import URLMetadataAttributeType, ValidationStatus, ValidationSource -from core.DTOs.URLRelevanceHuggingfaceCycleInfo import URLRelevanceHuggingfaceCycleInfo -from hugging_face.HuggingFaceInterface import HuggingFaceInterface - - -class URLRelevanceHuggingfaceCycler: - - def __init__( - self, - adb_client: AsyncDatabaseClient, - huggingface_interface: HuggingFaceInterface - ): - self.adb_client = adb_client - self.huggingface_interface = huggingface_interface - - async def cycle(self): - # Get pending urls from Source Collector - # with HTML data and without Relevancy Metadata - cycle_infos = await self.get_pending_url_info() - # Pipe into Huggingface - await self.add_huggingface_relevancy(cycle_infos) - - # Put results into Database - await self.put_results_into_database(cycle_infos) - - async def put_results_into_database(self, cycle_infos): - url_metadatas = [] - for cycle_info in cycle_infos: - url_metadata = URLMetadataInfo( - url_id=cycle_info.url_with_html.url_id, - attribute=URLMetadataAttributeType.RELEVANT, - value=str(cycle_info.relevant), - validation_status=ValidationStatus.PENDING_VALIDATION, - validation_source=ValidationSource.MACHINE_LEARNING - ) - url_metadatas.append(url_metadata) - await self.adb_client.add_url_metadatas(url_metadatas) - - async def add_huggingface_relevancy(self, cycle_infos: list[URLRelevanceHuggingfaceCycleInfo]): - urls_with_html = [cycle_info.url_with_html for cycle_info in cycle_infos] - results = self.huggingface_interface.get_url_relevancy(urls_with_html) - for cycle_info, result in zip(cycle_infos, results): - cycle_info.relevant = result - - async def get_pending_url_info(self) -> list[URLRelevanceHuggingfaceCycleInfo]: - cycle_infos = [] - pending_urls: list[URLWithHTML] = await self.adb_client.get_urls_with_html_data_and_no_relevancy_metadata() - for url_with_html in pending_urls: - cycle_info = URLRelevanceHuggingfaceCycleInfo( - url_with_html=url_with_html - ) - cycle_infos.append(cycle_info) - return cycle_infos diff --git a/core/classes/subtasks/AgencyIdentificationSubtaskBase.py b/core/classes/subtasks/AgencyIdentificationSubtaskBase.py new file mode 100644 index 00000000..755cade5 --- /dev/null +++ b/core/classes/subtasks/AgencyIdentificationSubtaskBase.py @@ -0,0 +1,16 @@ +import abc +from abc import ABC +from typing import Optional + +from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo + + +class AgencyIdentificationSubtaskBase(ABC): + + @abc.abstractmethod + async def run( + self, + url_id: int, + collector_metadata: Optional[dict] = None + ) -> list[URLAgencySuggestionInfo]: + raise NotImplementedError diff --git a/core/classes/subtasks/AutoGooglerAgencyIdentificationSubtask.py b/core/classes/subtasks/AutoGooglerAgencyIdentificationSubtask.py new file mode 100644 index 00000000..1e5d945b --- /dev/null +++ b/core/classes/subtasks/AutoGooglerAgencyIdentificationSubtask.py @@ -0,0 +1,25 @@ +from typing import Optional + +from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from core.classes.subtasks.AgencyIdentificationSubtaskBase import AgencyIdentificationSubtaskBase +from core.enums import SuggestionType + + +class AutoGooglerAgencyIdentificationSubtask(AgencyIdentificationSubtaskBase): + + async def run( + self, + url_id: int, + collector_metadata: Optional[dict] = None + ) -> list[URLAgencySuggestionInfo]: + return [ + URLAgencySuggestionInfo( + url_id=url_id, + suggestion_type=SuggestionType.UNKNOWN, + pdap_agency_id=None, + agency_name=None, + state=None, + county=None, + locality=None + ) + ] diff --git a/core/classes/subtasks/CKANAgencyIdentificationSubtask.py b/core/classes/subtasks/CKANAgencyIdentificationSubtask.py new file mode 100644 index 00000000..5eb88406 --- /dev/null +++ b/core/classes/subtasks/CKANAgencyIdentificationSubtask.py @@ -0,0 +1,29 @@ +from typing import Optional + +from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from core.helpers import process_match_agency_response_to_suggestions +from pdap_api_client.PDAPClient import PDAPClient +from pdap_api_client.DTOs import MatchAgencyResponse + + +class CKANAgencyIdentificationSubtask: + + def __init__( + self, + pdap_client: PDAPClient + ): + self.pdap_client = pdap_client + + async def run( + self, + url_id: int, + collector_metadata: Optional[dict] + ) -> list[URLAgencySuggestionInfo]: + agency_name = collector_metadata["agency_name"] + match_agency_response: MatchAgencyResponse = await self.pdap_client.match_agency( + name=agency_name + ) + return process_match_agency_response_to_suggestions( + url_id=url_id, + match_agency_response=match_agency_response + ) diff --git a/core/classes/subtasks/CommonCrawlerAgencyIdentificationSubtask.py b/core/classes/subtasks/CommonCrawlerAgencyIdentificationSubtask.py new file mode 100644 index 00000000..5d0fa409 --- /dev/null +++ b/core/classes/subtasks/CommonCrawlerAgencyIdentificationSubtask.py @@ -0,0 +1,23 @@ +from typing import Optional + +from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from core.enums import SuggestionType + + +class CommonCrawlerAgencyIdentificationSubtask: + async def run( + self, + url_id: int, + collector_metadata: Optional[dict] + ) -> list[URLAgencySuggestionInfo]: + return [ + URLAgencySuggestionInfo( + url_id=url_id, + suggestion_type=SuggestionType.UNKNOWN, + pdap_agency_id=None, + agency_name=None, + state=None, + county=None, + locality=None + ) + ] diff --git a/core/classes/subtasks/MiscellaneousMetadata/AutoGooglerMiscMetadataSubtask.py b/core/classes/subtasks/MiscellaneousMetadata/AutoGooglerMiscMetadataSubtask.py new file mode 100644 index 00000000..43659a9e --- /dev/null +++ b/core/classes/subtasks/MiscellaneousMetadata/AutoGooglerMiscMetadataSubtask.py @@ -0,0 +1,10 @@ +from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO +from core.classes.subtasks.MiscellaneousMetadata.MiscellaneousMetadataSubtaskBase import \ + MiscellaneousMetadataSubtaskBase + + +class AutoGooglerMiscMetadataSubtask(MiscellaneousMetadataSubtaskBase): + + def process(self, tdo: URLMiscellaneousMetadataTDO): + tdo.name = tdo.collector_metadata['title'] + tdo.description = tdo.collector_metadata['snippet'] \ No newline at end of file diff --git a/core/classes/subtasks/MiscellaneousMetadata/CKANMiscMetadataSubtask.py b/core/classes/subtasks/MiscellaneousMetadata/CKANMiscMetadataSubtask.py new file mode 100644 index 00000000..04ef7a0f --- /dev/null +++ b/core/classes/subtasks/MiscellaneousMetadata/CKANMiscMetadataSubtask.py @@ -0,0 +1,13 @@ +from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO +from core.classes.subtasks.MiscellaneousMetadata.MiscellaneousMetadataSubtaskBase import \ + MiscellaneousMetadataSubtaskBase + + +class CKANMiscMetadataSubtask(MiscellaneousMetadataSubtaskBase): + + def process(self, tdo: URLMiscellaneousMetadataTDO): + tdo.name = tdo.collector_metadata['submitted_name'] + tdo.description = tdo.collector_metadata['description'] + tdo.record_formats = tdo.collector_metadata['record_format'] + tdo.data_portal_type = tdo.collector_metadata['data_portal_type'] + tdo.supplying_entity = tdo.collector_metadata['supplying_entity'] diff --git a/core/classes/subtasks/MiscellaneousMetadata/MiscellaneousMetadataSubtaskBase.py b/core/classes/subtasks/MiscellaneousMetadata/MiscellaneousMetadataSubtaskBase.py new file mode 100644 index 00000000..7a0e7d1f --- /dev/null +++ b/core/classes/subtasks/MiscellaneousMetadata/MiscellaneousMetadataSubtaskBase.py @@ -0,0 +1,10 @@ +from abc import ABC, abstractmethod + +from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO + + +class MiscellaneousMetadataSubtaskBase(ABC): + + @abstractmethod + def process(self, tdo: URLMiscellaneousMetadataTDO): + raise NotImplementedError \ No newline at end of file diff --git a/core/classes/subtasks/MiscellaneousMetadata/MuckrockMiscMetadataSubtask.py b/core/classes/subtasks/MiscellaneousMetadata/MuckrockMiscMetadataSubtask.py new file mode 100644 index 00000000..1d599162 --- /dev/null +++ b/core/classes/subtasks/MiscellaneousMetadata/MuckrockMiscMetadataSubtask.py @@ -0,0 +1,10 @@ +from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO +from core.classes.subtasks.MiscellaneousMetadata.MiscellaneousMetadataSubtaskBase import \ + MiscellaneousMetadataSubtaskBase + + +class MuckrockMiscMetadataSubtask(MiscellaneousMetadataSubtaskBase): + + def process(self, tdo: URLMiscellaneousMetadataTDO): + tdo.name = tdo.collector_metadata['title'] + tdo.description = tdo.collector_metadata['title'] diff --git a/label_studio_interface/__init__.py b/core/classes/subtasks/MiscellaneousMetadata/__init__.py similarity index 100% rename from label_studio_interface/__init__.py rename to core/classes/subtasks/MiscellaneousMetadata/__init__.py diff --git a/core/classes/subtasks/MuckrockAgencyIdentificationSubtask.py b/core/classes/subtasks/MuckrockAgencyIdentificationSubtask.py new file mode 100644 index 00000000..a6222cf8 --- /dev/null +++ b/core/classes/subtasks/MuckrockAgencyIdentificationSubtask.py @@ -0,0 +1,42 @@ +from typing import Optional + +from source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface, AgencyLookupResponse, AgencyLookupResponseType +from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from core.exceptions import MuckrockAPIError +from core.helpers import process_match_agency_response_to_suggestions +from pdap_api_client.PDAPClient import PDAPClient +from pdap_api_client.DTOs import MatchAgencyResponse + + +class MuckrockAgencyIdentificationSubtask: + + def __init__( + self, + muckrock_api_interface: MuckrockAPIInterface, + pdap_client: PDAPClient + ): + self.muckrock_api_interface = muckrock_api_interface + self.pdap_client = pdap_client + + async def run( + self, + url_id: int, + collector_metadata: Optional[dict] + ) -> list[URLAgencySuggestionInfo]: + muckrock_agency_id = collector_metadata["agency"] + agency_lookup_response: AgencyLookupResponse = await self.muckrock_api_interface.lookup_agency( + muckrock_agency_id=muckrock_agency_id + ) + if agency_lookup_response.type != AgencyLookupResponseType.FOUND: + raise MuckrockAPIError( + f"Failed to lookup muckrock agency: {muckrock_agency_id}:" + f" {agency_lookup_response.type.value}: {agency_lookup_response.error}" + ) + + match_agency_response: MatchAgencyResponse = await self.pdap_client.match_agency( + name=agency_lookup_response.name + ) + return process_match_agency_response_to_suggestions( + url_id=url_id, + match_agency_response=match_agency_response + ) diff --git a/tests/manual/label_studio_interface/__init__.py b/core/classes/subtasks/__init__.py similarity index 100% rename from tests/manual/label_studio_interface/__init__.py rename to core/classes/subtasks/__init__.py diff --git a/core/classes/task_operators/AgencyIdentificationTaskOperator.py b/core/classes/task_operators/AgencyIdentificationTaskOperator.py new file mode 100644 index 00000000..b6e53955 --- /dev/null +++ b/core/classes/task_operators/AgencyIdentificationTaskOperator.py @@ -0,0 +1,104 @@ +from aiohttp import ClientSession + +from source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from collector_db.enums import TaskType +from collector_manager.enums import CollectorType +from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from core.DTOs.task_data_objects.AgencyIdentificationTDO import AgencyIdentificationTDO +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from core.classes.subtasks.AutoGooglerAgencyIdentificationSubtask import AutoGooglerAgencyIdentificationSubtask +from core.classes.subtasks.CKANAgencyIdentificationSubtask import CKANAgencyIdentificationSubtask +from core.classes.subtasks.CommonCrawlerAgencyIdentificationSubtask import CommonCrawlerAgencyIdentificationSubtask +from core.classes.subtasks.MuckrockAgencyIdentificationSubtask import MuckrockAgencyIdentificationSubtask +from core.enums import SuggestionType +from pdap_api_client.PDAPClient import PDAPClient + + +# TODO: Validate with Manual Tests + +class AgencyIdentificationTaskOperator(TaskOperatorBase): + + def __init__( + self, + adb_client: AsyncDatabaseClient, + pdap_client: PDAPClient, + muckrock_api_interface: MuckrockAPIInterface, + ): + super().__init__(adb_client) + self.pdap_client = pdap_client + self.muckrock_api_interface = muckrock_api_interface + + @property + def task_type(self): + return TaskType.AGENCY_IDENTIFICATION + + async def meets_task_prerequisites(self): + has_urls_without_agency_suggestions = await self.adb_client.has_urls_without_agency_suggestions() + return has_urls_without_agency_suggestions + + async def get_pending_urls_without_agency_identification(self): + return await self.adb_client.get_urls_without_agency_suggestions() + + async def get_muckrock_subtask(self): + return MuckrockAgencyIdentificationSubtask( + muckrock_api_interface=self.muckrock_api_interface, + pdap_client=self.pdap_client + ) + + async def get_subtask(self, collector_type: CollectorType): + match collector_type: + case CollectorType.MUCKROCK_SIMPLE_SEARCH: + return await self.get_muckrock_subtask() + case CollectorType.MUCKROCK_COUNTY_SEARCH: + return await self.get_muckrock_subtask() + case CollectorType.MUCKROCK_ALL_SEARCH: + return await self.get_muckrock_subtask() + case CollectorType.AUTO_GOOGLER: + return AutoGooglerAgencyIdentificationSubtask() + case CollectorType.COMMON_CRAWLER: + return CommonCrawlerAgencyIdentificationSubtask() + case CollectorType.CKAN: + return CKANAgencyIdentificationSubtask( + pdap_client=self.pdap_client + ) + + @staticmethod + async def run_subtask(subtask, url_id, collector_metadata) -> list[URLAgencySuggestionInfo]: + return await subtask.run(url_id=url_id, collector_metadata=collector_metadata) + + async def inner_task_logic(self): + async with ClientSession() as session: + self.pdap_client.access_manager.session = session + self.muckrock_api_interface.session = session + tdos: list[AgencyIdentificationTDO] = await self.get_pending_urls_without_agency_identification() + await self.link_urls_to_task(url_ids=[tdo.url_id for tdo in tdos]) + error_infos = [] + all_agency_suggestions = [] + for tdo in tdos: + subtask = await self.get_subtask(tdo.collector_type) + try: + new_agency_suggestions = await self.run_subtask( + subtask, + tdo.url_id, + tdo.collector_metadata + ) + all_agency_suggestions.extend(new_agency_suggestions) + except Exception as e: + error_info = URLErrorPydanticInfo( + task_id=self.task_id, + url_id=tdo.url_id, + error=str(e), + ) + error_infos.append(error_info) + + non_unknown_agency_suggestions = [suggestion for suggestion in all_agency_suggestions if suggestion.suggestion_type != SuggestionType.UNKNOWN] + await self.adb_client.upsert_new_agencies(non_unknown_agency_suggestions) + confirmed_suggestions = [suggestion for suggestion in all_agency_suggestions if suggestion.suggestion_type == SuggestionType.CONFIRMED] + await self.adb_client.add_confirmed_agency_url_links(confirmed_suggestions) + non_confirmed_suggestions = [suggestion for suggestion in all_agency_suggestions if suggestion.suggestion_type != SuggestionType.CONFIRMED] + await self.adb_client.add_agency_auto_suggestions(non_confirmed_suggestions) + await self.adb_client.add_url_error_infos(error_infos) + + diff --git a/core/classes/task_operators/SubmitApprovedURLTaskOperator.py b/core/classes/task_operators/SubmitApprovedURLTaskOperator.py new file mode 100644 index 00000000..86e0229e --- /dev/null +++ b/core/classes/task_operators/SubmitApprovedURLTaskOperator.py @@ -0,0 +1,65 @@ +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from collector_db.enums import TaskType +from core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmitApprovedURLTDO +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from pdap_api_client.PDAPClient import PDAPClient + + +class SubmitApprovedURLTaskOperator(TaskOperatorBase): + + def __init__( + self, + adb_client: AsyncDatabaseClient, + pdap_client: PDAPClient + ): + super().__init__(adb_client) + self.pdap_client = pdap_client + + @property + def task_type(self): + return TaskType.SUBMIT_APPROVED + + async def meets_task_prerequisites(self): + return await self.adb_client.has_validated_urls() + + async def inner_task_logic(self): + # Retrieve all URLs that are validated and not submitted + tdos: list[SubmitApprovedURLTDO] = await self.adb_client.get_validated_urls() + + # Link URLs to this task + await self.link_urls_to_task(url_ids=[tdo.url_id for tdo in tdos]) + + # Submit each URL, recording errors if they exist + submitted_url_infos = await self.pdap_client.submit_urls(tdos) + + error_infos = await self.get_error_infos(submitted_url_infos) + success_infos = await self.get_success_infos(submitted_url_infos) + + # Update the database for successful submissions + await self.adb_client.mark_urls_as_submitted(infos=success_infos) + + # Update the database for failed submissions + await self.adb_client.add_url_error_infos(error_infos) + + async def get_success_infos(self, submitted_url_infos): + success_infos = [ + response_object for response_object in submitted_url_infos + if response_object.data_source_id is not None + ] + return success_infos + + async def get_error_infos(self, submitted_url_infos): + error_infos: list[URLErrorPydanticInfo] = [] + error_response_objects = [ + response_object for response_object in submitted_url_infos + if response_object.request_error is not None + ] + for error_response_object in error_response_objects: + error_info = URLErrorPydanticInfo( + task_id=self.task_id, + url_id=error_response_object.url_id, + error=error_response_object.request_error, + ) + error_infos.append(error_info) + return error_infos diff --git a/core/classes/task_operators/TaskOperatorBase.py b/core/classes/task_operators/TaskOperatorBase.py new file mode 100644 index 00000000..e7c87dac --- /dev/null +++ b/core/classes/task_operators/TaskOperatorBase.py @@ -0,0 +1,76 @@ +import traceback +from abc import ABC, abstractmethod +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_db.enums import TaskType +from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome, TaskOperatorRunInfo +from core.enums import BatchStatus + + +class TaskOperatorBase(ABC): + + def __init__(self, adb_client: AsyncDatabaseClient): + self.adb_client = adb_client + self.task_id = None + self.tasks_linked = False + self.linked_url_ids = [] + + @property + @abstractmethod + def task_type(self) -> TaskType: + raise NotImplementedError + + @abstractmethod + async def meets_task_prerequisites(self): + """ + A task should not be initiated unless certain + conditions are met + """ + raise NotImplementedError + + async def link_urls_to_task(self, url_ids: list[int]): + self.linked_url_ids = url_ids + + async def initiate_task_in_db(self) -> int: + task_id = await self.adb_client.initiate_task( + task_type=self.task_type + ) + return task_id + + async def conclude_task(self): + if not self.linked_url_ids: + raise Exception("Task has not been linked to any URLs") + return await self.run_info( + outcome=TaskOperatorOutcome.SUCCESS, + message="Task completed successfully" + ) + + async def run_task(self, task_id: int) -> TaskOperatorRunInfo: + self.task_id = task_id + try: + await self.inner_task_logic() + return await self.conclude_task() + except Exception as e: + stack_trace = traceback.format_exc() + return await self.run_info( + outcome=TaskOperatorOutcome.ERROR, + message=str(e) + "\n" + stack_trace + ) + + async def run_info(self, outcome: TaskOperatorOutcome, message: str): + return TaskOperatorRunInfo( + task_id=self.task_id, + linked_url_ids=self.linked_url_ids, + outcome=outcome, + message=message + ) + + @abstractmethod + async def inner_task_logic(self): + raise NotImplementedError + + async def handle_task_error(self, e): + await self.adb_client.update_task_status(task_id=self.task_id, status=BatchStatus.ERROR) + await self.adb_client.add_task_error( + task_id=self.task_id, + error=str(e) + ) diff --git a/core/classes/task_operators/URLDuplicateTaskOperator.py b/core/classes/task_operators/URLDuplicateTaskOperator.py new file mode 100644 index 00000000..32cea432 --- /dev/null +++ b/core/classes/task_operators/URLDuplicateTaskOperator.py @@ -0,0 +1,33 @@ +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_db.enums import TaskType +from core.DTOs.task_data_objects.URLDuplicateTDO import URLDuplicateTDO +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from pdap_api_client.PDAPClient import PDAPClient + + +class URLDuplicateTaskOperator(TaskOperatorBase): + + def __init__( + self, + adb_client: AsyncDatabaseClient, + pdap_client: PDAPClient + ): + super().__init__(adb_client) + self.pdap_client = pdap_client + + @property + def task_type(self): + return TaskType.DUPLICATE_DETECTION + + async def meets_task_prerequisites(self): + return await self.adb_client.has_pending_urls_not_checked_for_duplicates() + + async def inner_task_logic(self): + tdos: list[URLDuplicateTDO] = await self.adb_client.get_pending_urls_not_checked_for_duplicates() + url_ids = [tdo.url_id for tdo in tdos] + await self.link_urls_to_task(url_ids=url_ids) + for tdo in tdos: + tdo.is_duplicate = await self.pdap_client.is_url_duplicate(tdo.url) + duplicate_url_ids = [tdo.url_id for tdo in tdos if tdo.is_duplicate] + await self.adb_client.mark_all_as_duplicates(duplicate_url_ids) + await self.adb_client.mark_as_checked_for_duplicates(url_ids) diff --git a/core/classes/task_operators/URLHTMLTaskOperator.py b/core/classes/task_operators/URLHTMLTaskOperator.py new file mode 100644 index 00000000..f6cfa28a --- /dev/null +++ b/core/classes/task_operators/URLHTMLTaskOperator.py @@ -0,0 +1,106 @@ +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from collector_db.DTOs.URLInfo import URLInfo +from collector_db.enums import TaskType +from core.DTOs.task_data_objects.UrlHtmlTDO import UrlHtmlTDO +from core.classes.HTMLContentInfoGetter import HTMLContentInfoGetter +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from html_tag_collector.ResponseParser import HTMLResponseParser +from html_tag_collector.URLRequestInterface import URLRequestInterface + + +class URLHTMLTaskOperator(TaskOperatorBase): + + def __init__( + self, + url_request_interface: URLRequestInterface, + adb_client: AsyncDatabaseClient, + html_parser: HTMLResponseParser + ): + super().__init__(adb_client) + self.url_request_interface = url_request_interface + self.html_parser = html_parser + + @property + def task_type(self): + return TaskType.HTML + + async def meets_task_prerequisites(self): + return await self.adb_client.has_pending_urls_without_html_data() + + async def inner_task_logic(self): + tdos = await self.get_pending_urls_without_html_data() + url_ids = [task_info.url_info.id for task_info in tdos] + await self.link_urls_to_task(url_ids=url_ids) + await self.get_raw_html_data_for_urls(tdos) + success_subset, error_subset = await self.separate_success_and_error_subsets(tdos) + await self.update_errors_in_database(error_subset) + await self.process_html_data(success_subset) + await self.update_html_data_in_database(success_subset) + + + async def get_just_urls(self, tdos: list[UrlHtmlTDO]): + return [task_info.url_info.url for task_info in tdos] + + async def get_pending_urls_without_html_data(self): + pending_urls: list[URLInfo] = await self.adb_client.get_pending_urls_without_html_data() + tdos = [ + UrlHtmlTDO( + url_info=url_info, + ) for url_info in pending_urls + ] + return tdos + + async def get_raw_html_data_for_urls(self, tdos: list[UrlHtmlTDO]): + just_urls = await self.get_just_urls(tdos) + url_response_infos = await self.url_request_interface.make_requests(just_urls) + for tdto, url_response_info in zip(tdos, url_response_infos): + tdto.url_response_info = url_response_info + + async def separate_success_and_error_subsets( + self, + tdos: list[UrlHtmlTDO] + ) -> tuple[ + list[UrlHtmlTDO], # Successful + list[UrlHtmlTDO] # Error + ]: + errored_tdos = [] + successful_tdos = [] + for tdto in tdos: + if not tdto.url_response_info.success: + errored_tdos.append(tdto) + else: + successful_tdos.append(tdto) + return successful_tdos, errored_tdos + + async def update_errors_in_database(self, error_tdos: list[UrlHtmlTDO]): + error_infos = [] + for error_tdo in error_tdos: + error_info = URLErrorPydanticInfo( + task_id=self.task_id, + url_id=error_tdo.url_info.id, + error=str(error_tdo.url_response_info.exception), + ) + error_infos.append(error_info) + await self.adb_client.add_url_error_infos(error_infos) + + async def process_html_data(self, tdos: list[UrlHtmlTDO]): + for tdto in tdos: + html_tag_info = await self.html_parser.parse( + url=tdto.url_info.url, + html_content=tdto.url_response_info.html, + content_type=tdto.url_response_info.content_type + ) + tdto.html_tag_info = html_tag_info + + async def update_html_data_in_database(self, tdos: list[UrlHtmlTDO]): + html_content_infos = [] + for tdto in tdos: + hcig = HTMLContentInfoGetter( + response_html_info=tdto.html_tag_info, + url_id=tdto.url_info.id + ) + results = hcig.get_all_html_content() + html_content_infos.extend(results) + + await self.adb_client.add_html_content_infos(html_content_infos) diff --git a/core/classes/task_operators/URLMiscellaneousMetadataTaskOperator.py b/core/classes/task_operators/URLMiscellaneousMetadataTaskOperator.py new file mode 100644 index 00000000..68a3a243 --- /dev/null +++ b/core/classes/task_operators/URLMiscellaneousMetadataTaskOperator.py @@ -0,0 +1,75 @@ +from typing import Optional + +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from collector_db.enums import TaskType +from collector_manager.enums import CollectorType +from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from core.classes.subtasks.MiscellaneousMetadata.AutoGooglerMiscMetadataSubtask import AutoGooglerMiscMetadataSubtask +from core.classes.subtasks.MiscellaneousMetadata.CKANMiscMetadataSubtask import CKANMiscMetadataSubtask +from core.classes.subtasks.MiscellaneousMetadata.MiscellaneousMetadataSubtaskBase import \ + MiscellaneousMetadataSubtaskBase +from core.classes.subtasks.MiscellaneousMetadata.MuckrockMiscMetadataSubtask import MuckrockMiscMetadataSubtask + + +class URLMiscellaneousMetadataTaskOperator(TaskOperatorBase): + + def __init__( + self, + adb_client: AsyncDatabaseClient + ): + super().__init__(adb_client) + + @property + def task_type(self): + return TaskType.MISC_METADATA + + async def meets_task_prerequisites(self): + return await self.adb_client.has_pending_urls_missing_miscellaneous_metadata() + + async def get_subtask( + self, + collector_type: CollectorType + ) -> Optional[MiscellaneousMetadataSubtaskBase]: + match collector_type: + case CollectorType.MUCKROCK_SIMPLE_SEARCH: + return MuckrockMiscMetadataSubtask() + case CollectorType.MUCKROCK_COUNTY_SEARCH: + return MuckrockMiscMetadataSubtask() + case CollectorType.MUCKROCK_ALL_SEARCH: + return MuckrockMiscMetadataSubtask() + case CollectorType.AUTO_GOOGLER: + return AutoGooglerMiscMetadataSubtask() + case CollectorType.CKAN: + return CKANMiscMetadataSubtask() + case _: + return None + + async def html_default_logic(self, tdo: URLMiscellaneousMetadataTDO): + if tdo.name is None: + tdo.name = tdo.html_metadata_info.title + if tdo.description is None: + tdo.description = tdo.html_metadata_info.description + + async def inner_task_logic(self): + tdos: list[URLMiscellaneousMetadataTDO] = await self.adb_client.get_pending_urls_missing_miscellaneous_metadata() + await self.link_urls_to_task(url_ids=[tdo.url_id for tdo in tdos]) + + error_infos = [] + for tdo in tdos: + subtask = await self.get_subtask(tdo.collector_type) + try: + if subtask is not None: + subtask.process(tdo) + await self.html_default_logic(tdo) + except Exception as e: + error_info = URLErrorPydanticInfo( + task_id=self.task_id, + url_id=tdo.url_id, + error=str(e), + ) + error_infos.append(error_info) + + await self.adb_client.add_miscellaneous_metadata(tdos) + await self.adb_client.add_url_error_infos(error_infos) \ No newline at end of file diff --git a/core/classes/task_operators/URLRecordTypeTaskOperator.py b/core/classes/task_operators/URLRecordTypeTaskOperator.py new file mode 100644 index 00000000..ab1f1f08 --- /dev/null +++ b/core/classes/task_operators/URLRecordTypeTaskOperator.py @@ -0,0 +1,74 @@ +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_db.DTOs.URLErrorInfos import URLErrorPydanticInfo +from collector_db.enums import TaskType +from core.DTOs.task_data_objects.URLRecordTypeTDO import URLRecordTypeTDO +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from core.enums import RecordType +from llm_api_logic.OpenAIRecordClassifier import OpenAIRecordClassifier + + +class URLRecordTypeTaskOperator(TaskOperatorBase): + + def __init__( + self, + adb_client: AsyncDatabaseClient, + classifier: OpenAIRecordClassifier + ): + super().__init__(adb_client) + self.classifier = classifier + + @property + def task_type(self): + return TaskType.RECORD_TYPE + + async def meets_task_prerequisites(self): + return await self.adb_client.has_urls_with_html_data_and_without_auto_record_type_suggestion() + + async def get_tdos(self) -> list[URLRecordTypeTDO]: + urls_with_html = await self.adb_client.get_urls_with_html_data_and_without_auto_record_type_suggestion() + tdos = [URLRecordTypeTDO(url_with_html=url_with_html) for url_with_html in urls_with_html] + return tdos + + async def inner_task_logic(self): + # Get pending urls from Source Collector + # with HTML data and without Record Type Metadata + tdos = await self.get_tdos() + url_ids = [tdo.url_with_html.url_id for tdo in tdos] + await self.link_urls_to_task(url_ids=url_ids) + + await self.get_ml_classifications(tdos) + success_subset, error_subset = await self.separate_success_and_error_subsets(tdos) + await self.put_results_into_database(success_subset) + await self.update_errors_in_database(error_subset) + + async def update_errors_in_database(self, tdos: list[URLRecordTypeTDO]): + error_infos = [] + for tdo in tdos: + error_info = URLErrorPydanticInfo( + task_id=self.task_id, + url_id=tdo.url_with_html.url_id, + error=tdo.error + ) + error_infos.append(error_info) + await self.adb_client.add_url_error_infos(error_infos) + + async def put_results_into_database(self, tdos: list[URLRecordTypeTDO]): + suggestions = [] + for tdo in tdos: + url_id = tdo.url_with_html.url_id + record_type = tdo.record_type + suggestions.append((url_id, record_type)) + await self.adb_client.add_auto_record_type_suggestions(suggestions) + + async def separate_success_and_error_subsets(self, tdos: list[URLRecordTypeTDO]): + success_subset = [tdo for tdo in tdos if not tdo.is_errored()] + error_subset = [tdo for tdo in tdos if tdo.is_errored()] + return success_subset, error_subset + + async def get_ml_classifications(self, tdos: list[URLRecordTypeTDO]): + for tdo in tdos: + try: + record_type_str = await self.classifier.classify_url(tdo.url_with_html.html_infos) + tdo.record_type = RecordType(record_type_str) + except Exception as e: + tdo.error = str(e) \ No newline at end of file diff --git a/core/classes/task_operators/URLRelevanceHuggingfaceTaskOperator.py b/core/classes/task_operators/URLRelevanceHuggingfaceTaskOperator.py new file mode 100644 index 00000000..49aa7aa0 --- /dev/null +++ b/core/classes/task_operators/URLRelevanceHuggingfaceTaskOperator.py @@ -0,0 +1,63 @@ +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_db.DTOs.URLWithHTML import URLWithHTML +from collector_db.enums import TaskType +from core.DTOs.task_data_objects.URLRelevanceHuggingfaceTDO import URLRelevanceHuggingfaceTDO +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from hugging_face.HuggingFaceInterface import HuggingFaceInterface + + +class URLRelevanceHuggingfaceTaskOperator(TaskOperatorBase): + + def __init__( + self, + adb_client: AsyncDatabaseClient, + huggingface_interface: HuggingFaceInterface + ): + super().__init__(adb_client) + self.huggingface_interface = huggingface_interface + + @property + def task_type(self): + return TaskType.RELEVANCY + + async def meets_task_prerequisites(self): + return await self.adb_client.has_urls_with_html_data_and_without_auto_relevant_suggestion() + + async def inner_task_logic(self): + # Get pending urls from Source Collector + # with HTML data and without Relevancy Metadata + tdos = await self.get_pending_url_info() + url_ids = [tdo.url_with_html.url_id for tdo in tdos] + await self.link_urls_to_task(url_ids=url_ids) + # Pipe into Huggingface + await self.add_huggingface_relevancy(tdos) + + # Put results into Database + await self.put_results_into_database(tdos) + + async def put_results_into_database(self, tdos): + suggestions: list[tuple[int, bool]] = [] + for tdo in tdos: + url_id = tdo.url_with_html.url_id + relevant = tdo.relevant + suggestions.append((url_id, relevant)) + + await self.adb_client.add_auto_relevance_suggestions(suggestions) + + async def add_huggingface_relevancy(self, tdos: list[URLRelevanceHuggingfaceTDO]): + urls_with_html = [tdo.url_with_html for tdo in tdos] + results = await self.huggingface_interface.get_url_relevancy_async(urls_with_html) + for tdo, result in zip(tdos, results): + tdo.relevant = result + + async def get_pending_url_info( + self, + ) -> list[URLRelevanceHuggingfaceTDO]: + tdos = [] + pending_urls: list[URLWithHTML] = await self.adb_client.get_urls_with_html_data_and_without_auto_relevant_suggestion() + for url_with_html in pending_urls: + tdo = URLRelevanceHuggingfaceTDO( + url_with_html=url_with_html + ) + tdos.append(tdo) + return tdos diff --git a/tests/test_automated/integration/cycles/__init__.py b/core/classes/task_operators/__init__.py similarity index 100% rename from tests/test_automated/integration/cycles/__init__.py rename to core/classes/task_operators/__init__.py diff --git a/core/enums.py b/core/enums.py index 69505406..019572b8 100644 --- a/core/enums.py +++ b/core/enums.py @@ -1,12 +1,73 @@ from enum import Enum +class AnnotationType(Enum): + RELEVANCE = "RELEVANCE" + RECORD_TYPE = "RECORD_TYPE" + AGENCY = "AGENCY" + class BatchStatus(Enum): - COMPLETE = "complete" + READY_TO_LABEL = "ready to label" IN_PROCESS = "in-process" ERROR = "error" ABORTED = "aborted" -class LabelStudioTaskStatus(Enum): - PENDING = "pending" - COMPLETED = "completed" \ No newline at end of file +class RecordType(Enum): + """ + All available URL record types + """ + ACCIDENT_REPORTS = "Accident Reports" + ARREST_RECORDS = "Arrest Records" + CALLS_FOR_SERVICE = "Calls for Service" + CAR_GPS = "Car GPS" + CITATIONS = "Citations" + DISPATCH_LOGS = "Dispatch Logs" + DISPATCH_RECORDINGS = "Dispatch Recordings" + FIELD_CONTACTS = "Field Contacts" + INCIDENT_REPORTS = "Incident Reports" + MISC_POLICE_ACTIVITY = "Misc Police Activity" + OFFICER_INVOLVED_SHOOTINGS = "Officer Involved Shootings" + STOPS = "Stops" + SURVEYS = "Surveys" + USE_OF_FORCE_REPORTS = "Use of Force Reports" + VEHICLE_PURSUITS = "Vehicle Pursuits" + COMPLAINTS_AND_MISCONDUCT = "Complaints & Misconduct" + DAILY_ACTIVITY_LOGS = "Daily Activity Logs" + TRAINING_AND_HIRING_INFO = "Training & Hiring Info" + PERSONNEL_RECORDS = "Personnel Records" + ANNUAL_AND_MONTHLY_REPORTS = "Annual & Monthly Reports" + BUDGETS_AND_FINANCES = "Budgets & Finances" + CONTACT_INFO_AND_AGENCY_META = "Contact Info & Agency Meta" + GEOGRAPHIC = "Geographic" + LIST_OF_DATA_SOURCES = "List of Data Sources" + POLICIES_AND_CONTRACTS = "Policies & Contracts" + CRIME_MAPS_AND_REPORTS = "Crime Maps & Reports" + CRIME_STATISTICS = "Crime Statistics" + MEDIA_BULLETINS = "Media Bulletins" + RECORDS_REQUEST_INFO = "Records Request Info" + RESOURCES = "Resources" + SEX_OFFENDER_REGISTRY = "Sex Offender Registry" + WANTED_PERSONS = "Wanted Persons" + BOOKING_REPORTS = "Booking Reports" + COURT_CASES = "Court Cases" + INCARCERATION_RECORDS = "Incarceration Records" + OTHER = "Other" + + +class SuggestionType(Enum): + """ + Identifies the specific kind of suggestion made for a URL + """ + AUTO_SUGGESTION = "Auto Suggestion" + USER_SUGGESTION = "User Suggestion" + UNKNOWN = "Unknown" + NEW_AGENCY = "New Agency" + CONFIRMED = "Confirmed" + +class SubmitResponseStatus(Enum): + """ + Response statuses from the /source-collector/data-sources endpoint + """ + SUCCESS = "success" + FAILURE = "FAILURE" + ALREADY_EXISTS = "already_exists" \ No newline at end of file diff --git a/core/exceptions.py b/core/exceptions.py index edaa32a3..e3e93e55 100644 --- a/core/exceptions.py +++ b/core/exceptions.py @@ -1,2 +1,20 @@ +from http import HTTPStatus + +from fastapi import HTTPException + + class InvalidPreprocessorError(Exception): pass + + +class MuckrockAPIError(Exception): + pass + + +class MatchAgencyError(Exception): + pass + + +class FailedValidationException(HTTPException): + def __init__(self, detail: str): + super().__init__(status_code=HTTPStatus.BAD_REQUEST, detail=detail) \ No newline at end of file diff --git a/core/helpers.py b/core/helpers.py index e69de29b..1fc51cde 100644 --- a/core/helpers.py +++ b/core/helpers.py @@ -0,0 +1,52 @@ +from http import HTTPStatus + +from fastapi import HTTPException + +from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from core.enums import SuggestionType +from core.exceptions import MatchAgencyError +from pdap_api_client.DTOs import MatchAgencyResponse +from pdap_api_client.enums import MatchAgencyResponseStatus + + +def process_match_agency_response_to_suggestions( + url_id: int, + match_agency_response: MatchAgencyResponse +) -> list[URLAgencySuggestionInfo]: + if match_agency_response.status == MatchAgencyResponseStatus.EXACT_MATCH: + match = match_agency_response.matches[0] + return [ + URLAgencySuggestionInfo( + url_id=url_id, + suggestion_type=SuggestionType.CONFIRMED, + pdap_agency_id=int(match.id), + agency_name=match.submitted_name, + state=match.state, + county=match.county, + ) + ] + if match_agency_response.status == MatchAgencyResponseStatus.NO_MATCH: + return [ + URLAgencySuggestionInfo( + url_id=url_id, + suggestion_type=SuggestionType.UNKNOWN, + ) + ] + + if match_agency_response.status != MatchAgencyResponseStatus.PARTIAL_MATCH: + raise MatchAgencyError( + f"Unknown Match Agency Response Status: {match_agency_response.status}" + ) + + return [ + URLAgencySuggestionInfo( + url_id=url_id, + suggestion_type=SuggestionType.AUTO_SUGGESTION, + pdap_agency_id=match.id, + agency_name=match.submitted_name, + state=match.state, + county=match.county, + locality=match.locality + ) + for match in match_agency_response.matches + ] diff --git a/html_tag_collector/ResponseFetcher.py b/html_tag_collector/ResponseFetcher.py deleted file mode 100644 index 04ef3f21..00000000 --- a/html_tag_collector/ResponseFetcher.py +++ /dev/null @@ -1,64 +0,0 @@ -import ssl -import traceback -from dataclasses import dataclass -from typing import Optional - -import requests -import urllib3 -from requests_html import AsyncHTMLSession - -from html_tag_collector.constants import REQUEST_HEADERS -from html_tag_collector.url_adjustment_functions import http_to_https - - -class ResponseFetcher: - - def __init__(self, session: AsyncHTMLSession, url: str, debug=False): - self.headers = REQUEST_HEADERS - self.session = session - self.url = url - self.debug = debug - - def debug_print(self, s: str): - if self.debug: - print(s) - - async def fetch(self, verify_ssl=True): - return await self.session.get( - self.url, - headers=self.headers, - timeout=120, - verify=verify_ssl - ) - - async def get_response(self): - response = None - try: - response = await self.fetch() - except (requests.exceptions.SSLError, ssl.SSLError): - # This error is raised when the website uses a legacy SSL version, which is not supported by requests - self.debug_print(f"SSLError: {self.url}") - - # Retry without SSL verification - response = await self.fetch(verify_ssl=False) - except requests.exceptions.ConnectionError: - # Sometimes this error is raised because the provided url uses http - # when it should be https and the website does not handle it properly - self.debug_print(f"MaxRetryError: {self.url}") - - response = await self.retry_with_https() - except (urllib3.exceptions.LocationParseError, requests.exceptions.ReadTimeout) as e: - self.debug_print(f"{type(e).__name__}: {self.url}") - except Exception as e: - self.debug_print(f""" - "Exception:", {self.url} - {traceback.format_exc()} - {e} - """) - finally: - self.debug_print(f"{self.url} - {str(response)}") - return response - - async def retry_with_https(self): - self.url = http_to_https(self.url) - return await self.fetch() diff --git a/html_tag_collector/RootURLCache.py b/html_tag_collector/RootURLCache.py index be670475..165be89d 100644 --- a/html_tag_collector/RootURLCache.py +++ b/html_tag_collector/RootURLCache.py @@ -16,7 +16,9 @@ class RootURLCacheResponseInfo: exception: Optional[Exception] = None class RootURLCache: - def __init__(self, adb_client: AsyncDatabaseClient = AsyncDatabaseClient()): + def __init__(self, adb_client: Optional[AsyncDatabaseClient] = None): + if adb_client is None: + adb_client = AsyncDatabaseClient() self.adb_client = adb_client self.cache = None @@ -26,7 +28,7 @@ async def save_to_cache(self, url: str, title: str): self.cache[url] = title await self.adb_client.add_to_root_url_cache(url=url, page_title=title) - async def get_from_cache(self, url: str): + async def get_from_cache(self, url: str) -> Optional[str]: if self.cache is None: self.cache = await self.adb_client.load_root_url_cache() diff --git a/html_tag_collector/URLRequestInterface.py b/html_tag_collector/URLRequestInterface.py index d6c8ace2..20ea1989 100644 --- a/html_tag_collector/URLRequestInterface.py +++ b/html_tag_collector/URLRequestInterface.py @@ -1,5 +1,4 @@ import asyncio -import subprocess from typing import Optional from aiohttp import ClientSession @@ -7,16 +6,10 @@ from dataclasses import dataclass -from requests import Response from tqdm.asyncio import tqdm MAX_CONCURRENCY = 5 -@dataclass -class URLResponseInfoOld: - success: bool - response: Response or Exception - @dataclass class URLResponseInfo: success: bool @@ -31,10 +24,8 @@ class RequestResources: semaphore: asyncio.Semaphore = asyncio.Semaphore(MAX_CONCURRENCY) def ensure_browsers_installed(): - print("Installing browsers...") - result = subprocess.run("playwright install", shell=True, capture_output=True, text=True) - print(result.stdout) - print(result.stderr) + # TODO: Slated for destruction + pass HTML_CONTENT_TYPE = "text/html" @@ -88,10 +79,8 @@ async def make_requests( self, urls: list[str], ) -> list[URLResponseInfo]: - try: - ensure_browsers_installed() - return await self.fetch_urls(urls) - except Exception as e: - return [] + ensure_browsers_installed() + return await self.fetch_urls(urls) + diff --git a/html_tag_collector/url_cache.json b/html_tag_collector/url_cache.json deleted file mode 100644 index d4a340e1..00000000 --- a/html_tag_collector/url_cache.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "http://www.example.com": "Example Domain", - "http://www.google.com": "Google", - "https://books.toscrape.com": "\n All products | Books to Scrape - Sandbox\n" -} \ No newline at end of file diff --git a/html_tag_collector/urls.json b/html_tag_collector/urls.json deleted file mode 100644 index 79574f93..00000000 --- a/html_tag_collector/urls.json +++ /dev/null @@ -1,17 +0,0 @@ -[{ - "id": 1, - "url": "https://pdap.io", - "label": "Label" -}, { - "id": 2, - "url": "https://pdapio.io", - "label": "Label" -}, { - "id": 3, - "url": "https://pdap.dev", - "label": "Label" -}, { - "id": 4, - "url": "https://pdap.io/404test", - "label": "Label" -}] diff --git a/hugging_face/HuggingFaceInterface.py b/hugging_face/HuggingFaceInterface.py index efb54b75..3dff8ccd 100644 --- a/hugging_face/HuggingFaceInterface.py +++ b/hugging_face/HuggingFaceInterface.py @@ -1,30 +1,40 @@ -from transformers import pipeline +import asyncio +import json +import os +import sys +from typing import List from collector_db.DTOs.URLWithHTML import URLWithHTML - class HuggingFaceInterface: - def __init__(self): - self.pipe = pipeline("text-classification", model="PDAP/url-relevance") + @staticmethod + async def get_url_relevancy_async(urls_with_html: List[URLWithHTML]) -> List[bool]: + urls = [u.url for u in urls_with_html] + input_data = json.dumps(urls) + + proc = await asyncio.create_subprocess_exec( + sys.executable, "hugging_face/relevancy_worker.py", + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + env=os.environ.copy(), # ⬅️ ensure env variables are inherited + ) - def get_url_relevancy( - self, - urls_with_html: list[URLWithHTML], - threshold: float = 0.5 - ) -> list[bool]: - urls = [url_with_html.url for url_with_html in urls_with_html] - results: list[dict] = self.pipe(urls) + stdout, stderr = await proc.communicate(input=input_data.encode("utf-8")) + print(stderr) - bool_results = [] - for result in results: - score = result["score"] - if score >= threshold: - bool_results.append(True) - else: - bool_results.append(False) - return bool_results + raw_output = stdout.decode("utf-8").strip() + if proc.returncode != 0: + raise RuntimeError(f"Error running HuggingFace: {stderr}/{raw_output}") + # Try to extract the actual JSON line + for line in raw_output.splitlines(): + try: + return json.loads(line) + except json.JSONDecodeError as e: + continue + raise RuntimeError(f"Could not parse JSON from subprocess: {raw_output}") diff --git a/tests/test_automated/integration/source_collectors/__init__.py b/hugging_face/__init__.py similarity index 100% rename from tests/test_automated/integration/source_collectors/__init__.py rename to hugging_face/__init__.py diff --git a/tests/test_automated/unit/collector_manager/__init__.py b/hugging_face/example/__init__.py similarity index 100% rename from tests/test_automated/unit/collector_manager/__init__.py rename to hugging_face/example/__init__.py diff --git a/hugging_face/relevancy_worker.py b/hugging_face/relevancy_worker.py new file mode 100644 index 00000000..dd158898 --- /dev/null +++ b/hugging_face/relevancy_worker.py @@ -0,0 +1,23 @@ +import os +import sys +import json +from transformers import pipeline + +def main(): + urls = json.loads(sys.stdin.read()) + + pipe = pipeline("text-classification", model="PDAP/url-relevance") + results = pipe(urls) + + print("Executable:", sys.executable, file=sys.stderr) + print("sys.path:", sys.path, file=sys.stderr) + print("PYTHONPATH:", os.getenv("PYTHONPATH"), file=sys.stderr) + + if len(results) != len(urls): + raise RuntimeError(f"Expected {len(urls)} results, got {len(results)}") + bools = [r["score"] >= 0.5 for r in results] + + print(json.dumps(bools)) + +if __name__ == "__main__": + main() diff --git a/hugging_face/testing/__init__.py b/hugging_face/testing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hugging_face/url_record_type_labeling/__init__.py b/hugging_face/url_record_type_labeling/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hugging_face/url_relevance/__init__.py b/hugging_face/url_relevance/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/label_studio_interface/DTOs/LabelStudioTaskExportInfo.py b/label_studio_interface/DTOs/LabelStudioTaskExportInfo.py deleted file mode 100644 index 07c0562b..00000000 --- a/label_studio_interface/DTOs/LabelStudioTaskExportInfo.py +++ /dev/null @@ -1,39 +0,0 @@ -from pydantic import BaseModel - -from collector_db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo, HTMLContentType - - -class LabelStudioTaskExportInfo(BaseModel): - url: str - html_title: str = "" - meta_description: str = "" - h1: list[str] = [] - h2: list[str] = [] - h3: list[str] = [] - h4: list[str] = [] - h5: list[str] = [] - h6: list[str] = [] - div_text: str = "" - url_path: str = "" - http_response: int = -1 - url_source_info: str = "" - -ENUM_TO_ATTRIBUTE_MAPPING = { - HTMLContentType.TITLE: "html_title", - HTMLContentType.DESCRIPTION: "meta_description", - HTMLContentType.H1: "h1", - HTMLContentType.H2: "h2", - HTMLContentType.H3: "h3", - HTMLContentType.H4: "h4", - HTMLContentType.H5: "h5", - HTMLContentType.H6: "h6", - HTMLContentType.DIV: "div_text" -} - -def add_html_info_to_export_info( - export_info: LabelStudioTaskExportInfo, - html_content_info: URLHTMLContentInfo -): - attribute_name = ENUM_TO_ATTRIBUTE_MAPPING[html_content_info.content_type] - setattr(export_info, attribute_name, html_content_info.content) - diff --git a/label_studio_interface/LabelStudioAPIManager.py b/label_studio_interface/LabelStudioAPIManager.py deleted file mode 100644 index 138dd0cd..00000000 --- a/label_studio_interface/LabelStudioAPIManager.py +++ /dev/null @@ -1,325 +0,0 @@ -import copy -import json -import os -import random -import string -import sys -from enum import Enum -from typing import Annotated - -import requests - -from label_studio_interface.DTOs.LabelStudioTaskExportInfo import LabelStudioTaskExportInfo - -# The below code sets the working directory to be the root of the entire repository -# This is done to solve otherwise quite annoying import issues. -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) - -from label_studio_interface.LabelStudioConfig import LabelStudioConfig - -""" -This script contains code which interfaces with the Label Studio API. -To view the documentation for the Label Studio API, visit https://app.heartex.com/docs/api -""" - - -class Role(Enum): - """ - This class represents the roles that a user can have in an organization. - """ - OWNER = "OW" - ADMINISTRATOR = "AD" - MANAGER = "MA" - REVIEWER = "RE" - ANNOTATOR = "AN" - DEACTIVATED = "DI" - NONE = "NO" - -def generate_random_word(length): - letters = string.ascii_lowercase - return ''.join(random.choice(letters) for _ in range(length)) - - -class URLConstructor: - def __init__(self, scheme="http", domain=None): - self.scheme = scheme - self.domain = domain - self.path_segments = [] - self.query_params = {} - - def add_path_segment(self, segment): - self.path_segments.append(segment) - return self - - def add_query_param(self, key, value): - self.query_params[key] = value - return self - - def build(self): - path = "/".join(self.path_segments) - query_string = "&".join([f"{key}={value}" for key, value in self.query_params.items()]) - url = f"{self.scheme}://{self.domain}" - if path: - url += f"/{path}" - if query_string: - url += f"?{query_string}" - return url - - -class LabelStudioAPIURLConstructor: - """ - This class is responsible for constructing the URL for the Label Studio API. - """ - - def __init__(self, project_id: str = '58475', organization_id: str = '1'): - self.base_url_constructor = URLConstructor( - domain='app.heartex.com', - scheme='https' - ).add_path_segment('api') - self.project_id = project_id - self.organization_id = organization_id - # self.label_studio_api_root_url = 'https://app.heartex.com/api' - # self.label_studio_api_root_url = f'https://app.heartex.com/api/projects/{project_id}' - - def get_import_url(self) -> str: - """ - This method returns the URL for importing data into Label Studio. - Returns: str - """ - new_constructor = copy.deepcopy(self.base_url_constructor) - return (new_constructor - .add_path_segment('projects') - .add_path_segment(self.project_id) - .add_path_segment('import') - .add_query_param('return_task_ids', 'true') - .build() - ) - - def get_project_url(self) -> str: - """ - This method returns the URL for the project. - Returns: str - """ - new_constructor = copy.deepcopy(self.base_url_constructor) - return (new_constructor - .add_path_segment('projects') - .add_path_segment(self.project_id) - .build() - ) - - def delete_project_tasks_url(self) -> str: - """ - This method returns the URL for deleting all tasks in the project. - Returns: str - """ - new_constructor = copy.deepcopy(self.base_url_constructor) - return (new_constructor - .add_path_segment('projects') - .add_path_segment(self.project_id) - .add_path_segment('ground-truth-tasks') - .build() - ) - - def get_easy_export_url(self, all_tasks: bool) -> str: - """ - This method returns the URL for the easy export. - Returns: str - """ - new_constructor = copy.deepcopy(self.base_url_constructor) - return (new_constructor - .add_path_segment('projects') - .add_path_segment(self.project_id) - .add_path_segment('export') - .add_query_param('exportType', 'JSON') - .add_query_param('download_all_tasks', str(all_tasks).lower()) - .build() - ) - - def get_organization_membership_url(self) -> str: - """ - This method returns the URL for organization membership - Used for querying the members in the organization as well as updating the role of a member. - Returns: str - """ - new_constructor = copy.deepcopy(self.base_url_constructor) - return (new_constructor - .add_path_segment('organizations') - .add_path_segment(self.organization_id) - .add_path_segment('memberships') - .build() - ) - - -class LabelStudioAPIManager: - - def __init__( - self, - config: LabelStudioConfig = LabelStudioConfig(), - ): - """ - This class is responsible for managing the API requests to Label Studio. - Args: - config: The user's authorization token for the Label Studio API. - """ - self.config = config - self.api_url_constructor = LabelStudioAPIURLConstructor( - project_id=self.config.project_id, - organization_id=self.config.organization_id - ) - - # region Task Import/Export - def export_tasks_into_project( - self, - data: list[LabelStudioTaskExportInfo] - ) -> Annotated[list[int], "The task IDs"]: - """ - This method imports task input data into Label Studio. - https://labelstud.io/api#tag/Import/operation/api_projects_import_create - Args: - data: dict - The data to import into Label Studio. - This should be a list of dictionaries, each containing - the same keys, representing data for the task - Returns: requests.Response - """ - dict_data = [] - for task in data: - dict_data.append(task.model_dump()) - import_url = self.api_url_constructor.get_import_url() - response = requests.post( - url=import_url, - data=json.dumps(dict_data), - # TODO: Consider extracting header construction - headers={ - 'Content-Type': 'application/json', - 'Authorization': self.config.authorization_token - } - ) - response.raise_for_status() - return response.json()["task_ids"] - - def import_tasks_from_project(self, all_tasks: bool = False) -> requests.Response: - """ - This method exports the data from the project. - Args: - all_tasks: bool - Whether to export all tasks or just the annotated tasks. - output_filename: str - The filename to save the exported data to. - Returns: requests.Response - """ - export_url = self.api_url_constructor.get_easy_export_url(all_tasks=all_tasks) - response = requests.get( - url=export_url, - headers={ - 'Authorization': self.config.authorization_token - } - ) - response.raise_for_status() - return response - - # endregion - - # region Project Information - def get_project_info(self) -> requests.Response: - """ - This method retrieves information about the project. - Returns: requests.Response - """ - project_url = self.api_url_constructor.get_project_url() - response = requests.get( - url=project_url, - headers={ - 'Authorization': self.config.authorization_token - } - ) - return response - - def ping_project(self) -> bool: - """ - This method pings the project, returning True if the project is accessible. - Returns: bool - """ - project_url = self.api_url_constructor.get_project_url() - response = requests.get( - url=project_url, - headers={ - 'Authorization': self.config.authorization_token - } - ) - return response.status_code == 200 - - # endregion - - # region User Management - def get_members_in_organization(self) -> requests.Response: - """ - This method retrieves the members in the organization. - https://app.heartex.com/docs/api#tag/Organizations/operation/api_organizations_memberships_list - Returns: requests.Response - """ - membership_url = self.api_url_constructor.get_organization_membership_url() - response = requests.get( - url=membership_url, - headers={ - 'Authorization': self.config.authorization_token - } - ) - response.raise_for_status() - return response - - def update_member_role(self, user_id: int, role: Role) -> requests.Response: - """ - This method updates the role of a member in the organization. - Args: - user_id: str - The ID of the user to update the role for. - role: Role - The role to update the user to. - Returns: requests.Response - """ - membership_url = self.api_url_constructor.get_organization_membership_url() - response = requests.patch( - url=membership_url, - headers={ - 'Authorization': self.config.authorization_token, - 'Content-Type': 'application/json' - }, - json={ - "user_id": user_id, - "role": role.value - } - ) - return response - - def delete_project_tasks(self) -> requests.Response: - """ - This method deletes all tasks from the project. - Returns: requests.Response - """ - delete_url = self.api_url_constructor.delete_project_tasks_url() - response = requests.delete( - url=delete_url, - headers={ - 'Authorization': self.config.authorization_token - } - ) - return response - - # endregion - - -if __name__ == "__main__": - - # Example usage - api_manager = LabelStudioAPIManager(config=LabelStudioConfig()) - project_accessible = api_manager.ping_project() - if project_accessible: - print("Project is accessible") - - # Test export - # data = [{"url": f"https://example.com/{generate_random_word(10)}"} for _ in range(10)] - # - # response = api_manager.import_data(data) - # print(response.status_code) - # print(response.json()) - - # Test import - response = api_manager.import_tasks_from_project() - print(response.status_code) - print(response.json()) diff --git a/label_studio_interface/LabelStudioConfig.py b/label_studio_interface/LabelStudioConfig.py deleted file mode 100644 index 14e5cef1..00000000 --- a/label_studio_interface/LabelStudioConfig.py +++ /dev/null @@ -1,32 +0,0 @@ -import os - -from dotenv import load_dotenv - - -class LabelStudioConfig: - """ - This class is responsible for loading the configuration for the Label Studio API. - """ - def __init__(self, dotenv_file=".env"): - """ - - Args: - dotenv_file: the path to the .env file which contains the configuration for the Label Studio API - """ - load_dotenv(dotenv_file) - # Note that if the environment variables are not set, the default values, given below, are used - self._project_id = os.getenv('LABEL_STUDIO_PROJECT_ID', '58475') - self._organization_id = os.getenv('LABEL_STUDIO_ORGANIZATION_ID', '1') - self._authorization_token = f'Token {os.getenv("LABEL_STUDIO_ACCESS_TOKEN", "abc123")}' - - @property - def project_id(self): - return self._project_id - - @property - def authorization_token(self): - return self._authorization_token - - @property - def organization_id(self): - return self._organization_id diff --git a/label_studio_interface/PreAnnotationCreator.py b/label_studio_interface/PreAnnotationCreator.py deleted file mode 100644 index 9630d464..00000000 --- a/label_studio_interface/PreAnnotationCreator.py +++ /dev/null @@ -1,88 +0,0 @@ -""" -This class combines data with pre-annotation data, converting it into the requisite format for Label Studio -""" -from typing import Any - -class BaseResultInfo: - """ - Contains information required for every result - """ - def __init__(self, result_type: str, to_name: str, from_name: str, origin: str = "manual"): - """ - - Args: - result_type: One of the permitted Label Studio result types - to_name: Name of the entity being labeled - from_name: Source name of the result in the label configuration - origin: Where the result came from, defaults to "manual" - """ - self.result_type = result_type - self.to_name = to_name - self.from_name = from_name - self.origin = origin - -class TaxonomyResult: - - def __init__(self, base_info: BaseResultInfo, taxonomy_data: list[list[str]]): - self.base_info = base_info - self.taxonomy_data = taxonomy_data - - def to_dict(self) -> dict: - """ - Converts the taxonomy data to a dictionary - Returns: - - """ - return { - "type": self.base_info.result_type, - "value": { - "taxonomy": self.taxonomy_data - }, - "origin": self.base_info.origin, - "to_name": self.base_info.to_name, - "from_name": self.base_info.from_name - } - - - - -class PreAnnotationCreator: - - def __init__(self): - pass - - def add_taxonomy_data(self, raw_taxonomy_data: Any) -> list[list[str]]: - """ - This method adds taxonomy data to the pre-annotation data - - Taxonomy data exists as a list of lists - Each sub-list represents a single selection in the taxonomy - and is a list of strings representing each level of the taxonomy - with the first being the most superordinate, and the last being the most subordinate - Selections do not have to include all levels of the taxonomy - For example, in a taxonomy of animals, if "Dog" is selected, the selection is represented as ["Dog"] - However, a selection of a subordinate category entails selection of all relevant superordinate categories - For example, If "German Shepherd" is selected, the selection is represented as ["Dog", "German Shepherd"] - If "Dog" is also selected, that is included as a separate sub-list containing only ["Dog"] - - Example format: - [ - ["Dog", "German Shepherd"], - ["Dog"] - ] - - Args: - raw_taxonomy_data: Any: Taxonomy data to be converted into the requisite format for Label Studio - Returns: - list[list[str]]: The pre-annotation data with the taxonomy data added - - """ - - taxonomy_results = [] - - - - - # Note that for multi-hierarchical taxonomy data, - # any selection of the subordinate category - # will automatically entail selection of the superordinate category diff --git a/label_studio_interface/README.md b/label_studio_interface/README.md deleted file mode 100644 index 491ab4d8..00000000 --- a/label_studio_interface/README.md +++ /dev/null @@ -1,28 +0,0 @@ -This directory handles interfacing with -[Label Studio](https://labelstud.io/), a data labeling tool. It handles: -- Converting data from the format used by the rest of the pipeline to the format - used by Label Studio -- Uploading data to Label Studio -- Downloading labeled data from Label Studio -- Updating member roles in Label Studio - -# Environment Variables -For proper functioning of application, the following environment variables must be set in an `.env` file in the root directory: - -- LABEL_STUDIO_ACCESS_TOKEN: The access token for the Label Studio API. This can be - obtained by logging into Label Studio and navigating to the [user account section](https://app.heartex.com/user/account), where the access token can be copied. -- LABEL_STUDIO_PROJECT_ID: The project ID for the Label Studio API. This can be - obtained by logging into Label Studio and navigating to the relevant project, where the project id will be in the URL. -- LABEL_STUDIO_ORGANIZATION_ID: The organization ID for the Label Studio API. This can - be obtained by logging into Label Studio and navigating to the [Organization section](https://app.heartex.com/organization?page=1), where the organization ID can be copied. - -# To run basic demonstration -1. Set the environment variables as described above; in dev.env, all but LABEL_STUDIO_ACCESS_TOKEN are pre-set. -2. Install the required python libraries by running the following command (from the working directory): -```bash -pip install -r requirements.txt -``` -2. Run the following command (from the label_studio_interface_directory): -```bash -python basic_demonstration.py -``` \ No newline at end of file diff --git a/label_studio_interface/basic_demonstration.py b/label_studio_interface/basic_demonstration.py deleted file mode 100644 index 17e3d327..00000000 --- a/label_studio_interface/basic_demonstration.py +++ /dev/null @@ -1,126 +0,0 @@ -""" -This script will serve as a basic demonstration of the functionality of -Label Studio and the Python configuration developed. - -The script will: - 1. Load the configuration for the Label Studio API - 2. Delete all task data from the associated project in Label Studio (if any exists) - 3. Import new task data into the project - 4. Prompt the user to access Label Studio and perform review and annotation tasks - 5. Export the annotated data from Label Studio and present it to the user - -The configuration for the Label Studio API will be loaded from the dev.env file within this directory -However, the access token in the file is not valid and will need to be replaced with a valid access token - -All actions will be performed on the 'Simple URL Labeler" project viewable at https://app.heartex.com/projects/58903/ -""" - -from LabelStudioAPIManager import LabelStudioAPIManager -from LabelStudioConfig import LabelStudioConfig - -# Simple URL Labeler project URL -project_url = "https://app.heartex.com/projects/58903/" - -# Load the configuration for the Label Studio API -config = LabelStudioConfig("dev.env") -if "REPLACE_WITH_YOUR_TOKEN" in config.authorization_token: - raise ValueError("Please replace the access token in dev.env with your own access token") - -# Create an API manager -api_manager = LabelStudioAPIManager(config) - -print("Deleting project tasks...") -# Delete all task data from the associated project in Label Studio (if any exists) -api_manager.delete_project_tasks() - -# Prompt the user to access Label Studio and confirm that the project has been cleared -print(f"Please access the project at {project_url} to confirm that the project has been cleared") - -# Wait for the user to confirm that the project has been cleared -input("Press Enter once confirmed...") -print("Continuing...") - -# Import new task data into the project -# Two tasks will be imported: one which has not been annotated and one which has been pre-annotated -# These tasks are provided in their final data form, -# but will need to be converted into this form in the eventual pipeline -data = [ - { - "data": { - "url": "https://test_data.gov/test/test-services/annual-test/" - } - }, - { - "data": { - "url": "www.example.com" - }, - "annotations": [ - { - "result": [ - { - "type": "taxonomy", - "value": { - "taxonomy": [ - [ - "Police Public Interactions" - ], - [ - "Police Public Interactions", - "Accident Reports" - ], - [ - "Police Public Interactions", - "Arrest Records" - ], - [ - "Agency Published Resources" - ], - [ - "Agency Published Resources", - "Crime Maps and Reports" - ], - [ - "Non-Criminal Justice" - ] - ] - }, - "origin": "manual", - "to_name": "url_text", - "from_name": "category" - }, - { - "type": "choices", - "value": { - "choices": [ - "Y" - ] - }, - "origin": "manual", - "to_name": "is_single_record", - "from_name": "single_record_checkbox" - } - ] - } - ] - } -] -api_manager.export_tasks_into_project(data) - -# Prompt the user to access Label Studio and perform review and annotation tasks -print(f"Please access the project at {project_url} to perform review and annotation tasks") - -# Wait for the user to complete the tasks -input("Press Enter when complete...") -print("Continuing...") - -# Import the annotated data from Label Studio and present it to the user -response = api_manager.import_tasks_from_project(all_tasks=True) -print("Presenting annotated data (showing only first results)...") -results = response.json() -for result in results: - print(f"Task URL: {result['data']['url']}") - if len(result['annotations']) == 0: - print("No annotations") - else: - print(f"Annotations: {result['annotations'][0]['result']}") - print("\n") diff --git a/label_studio_interface/dev.env b/label_studio_interface/dev.env deleted file mode 100644 index 5b603e4d..00000000 --- a/label_studio_interface/dev.env +++ /dev/null @@ -1,3 +0,0 @@ -LABEL_STUDIO_ACCESS_TOKEN=REPLACE_WITH_YOUR_TOKEN -LABEL_STUDIO_PROJECT_ID=58903 -LABEL_STUDIO_ORGANIZATION_ID=9876 \ No newline at end of file diff --git a/llm_api_logic/DeepSeekRecordClassifier.py b/llm_api_logic/DeepSeekRecordClassifier.py new file mode 100644 index 00000000..67f6fa09 --- /dev/null +++ b/llm_api_logic/DeepSeekRecordClassifier.py @@ -0,0 +1,33 @@ +import json +import os + +from openai import AsyncOpenAI + +from collector_db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo +from core.enums import RecordType +from llm_api_logic.LLMRecordClassifierBase import RecordClassifierBase + +class DeepSeekRecordClassifier(RecordClassifierBase): + + + @property + def api_key(self): + return os.getenv("DEEPSEEK_API_KEY") + + @property + def model_name(self): + return "deepseek-chat" + + @property + def base_url(self): + return "https://api.deepseek.com" + + @property + def response_format(self): + return { + 'type': 'json_object' + } + + @property + def completions_func(self) -> callable: + return AsyncOpenAI.chat.completions.create \ No newline at end of file diff --git a/llm_api_logic/LLMRecordClassifierBase.py b/llm_api_logic/LLMRecordClassifierBase.py new file mode 100644 index 00000000..85142aea --- /dev/null +++ b/llm_api_logic/LLMRecordClassifierBase.py @@ -0,0 +1,76 @@ +import json +from abc import ABC, abstractmethod +from typing import Any + +from openai import AsyncOpenAI + +from collector_db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo +from llm_api_logic.RecordTypeStructuredOutput import RecordTypeStructuredOutput +from llm_api_logic.constants import RECORD_CLASSIFICATION_QUERY_CONTENT +from llm_api_logic.helpers import dictify_html_info + + +class RecordClassifierBase(ABC): + + def __init__(self): + self.client = AsyncOpenAI( + api_key=self.api_key, + base_url=self.base_url + ) + + @property + @abstractmethod + def api_key(self) -> str: + pass + + @property + @abstractmethod + def model_name(self) -> str: + pass + + @property + @abstractmethod + def base_url(self) -> str: + pass + + @property + @abstractmethod + def response_format(self) -> dict | RecordTypeStructuredOutput: + pass + + @property + @abstractmethod + def completions_func(self) -> callable: + pass + + def build_query_messages(self, content_infos: list[URLHTMLContentInfo]) -> list[dict[str, str]]: + insert_content = dictify_html_info(content_infos) + return [ + { + "role": "system", + "content": RECORD_CLASSIFICATION_QUERY_CONTENT + }, + { + "role": "user", + "content": str(insert_content) + } + ] + + @abstractmethod + def post_process_response(self, response: Any) -> str: + pass + + async def classify_url(self, content_infos: list[URLHTMLContentInfo]) -> str: + func = self.completions_func + response = await func( + model=self.model_name, + messages=self.build_query_messages(content_infos), + #stream=False, # Note that this is set for DeepSeek, but may not be needed for it + response_format=self.response_format + ) + return self.post_process_response(response) + + result_str = response.choices[0].message.content + + result_dict = json.loads(result_str) + return result_dict["record_type"] \ No newline at end of file diff --git a/llm_api_logic/OpenAIRecordClassifier.py b/llm_api_logic/OpenAIRecordClassifier.py new file mode 100644 index 00000000..cc0829b5 --- /dev/null +++ b/llm_api_logic/OpenAIRecordClassifier.py @@ -0,0 +1,33 @@ + +from openai.types.chat import ParsedChatCompletion + +from core.EnvVarManager import EnvVarManager +from llm_api_logic.LLMRecordClassifierBase import RecordClassifierBase +from llm_api_logic.RecordTypeStructuredOutput import RecordTypeStructuredOutput + + +class OpenAIRecordClassifier(RecordClassifierBase): + + @property + def api_key(self): + return EnvVarManager.get().openai_api_key + + @property + def model_name(self): + return "gpt-4o-mini-2024-07-18" + + @property + def base_url(self): + return None + + @property + def response_format(self): + return RecordTypeStructuredOutput + + @property + def completions_func(self) -> callable: + return self.client.beta.chat.completions.parse + + def post_process_response(self, response: ParsedChatCompletion) -> str: + output: RecordTypeStructuredOutput = response.choices[0].message.parsed + return output.record_type.value \ No newline at end of file diff --git a/llm_api_logic/RecordTypeStructuredOutput.py b/llm_api_logic/RecordTypeStructuredOutput.py new file mode 100644 index 00000000..a5993ae9 --- /dev/null +++ b/llm_api_logic/RecordTypeStructuredOutput.py @@ -0,0 +1,13 @@ +""" +Used per the guidance given in Open AI's documentation on structured outputs: +https://platform.openai.com/docs/guides/structured-outputs +""" + +from pydantic import BaseModel + +from core.enums import RecordType + + + +class RecordTypeStructuredOutput(BaseModel): + record_type: RecordType \ No newline at end of file diff --git a/llm_api_logic/__init__.py b/llm_api_logic/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/llm_api_logic/constants.py b/llm_api_logic/constants.py new file mode 100644 index 00000000..55133abf --- /dev/null +++ b/llm_api_logic/constants.py @@ -0,0 +1,48 @@ +RECORD_CLASSIFICATION_QUERY_CONTENT = """ + You will be provided with structured data from a web page and determine + the record type. + + The record types are as follows + + "Accident Reports": Records of vehicle accidents. + "Arrest Records": Records of each arrest made in the agency's jurisdiction. + "Calls for Service": Records of officers initiating activity or responding to requests for police response. Often called "Dispatch Logs" or "Incident Reports" when published. + "Car GPS": Records of police car location. Not generally posted online. + "Citations": Records of low-level criminal offenses where a police officer issued a citation instead of an arrest. + "Dispatch Logs": Records of calls or orders made by police dispatchers. + "Dispatch Recordings": Audio feeds and/or archives of municipal dispatch channels. + "Field Contacts": Reports of contact between police and civilians. May include uses of force, incidents, arrests, or contacts where nothing notable happened. + "Incident Reports": Reports made by police officers after responding to a call which may or may not be criminal in nature. Not generally posted online. + "Misc Police Activity": Records or descriptions of police activity not covered by other record types. + "Officer Involved Shootings": Case files of gun violence where a police officer was involved, typically as the shooter. Detailed, often containing references to records like Media Bulletins and Use of Force Reports. + "Stops": Records of pedestrian or traffic stops made by police. + "Surveys": Information captured from a sample of some population, like incarcerated people or magistrate judges. Often generated independently. + "Use of Force Reports": Records of use of force against civilians by police officers. + "Vehicle Pursuits": Records of cases where police pursued a person fleeing in a vehicle. + "Complaints & Misconduct": Records, statistics, or summaries of complaints and misconduct investigations into law enforcement officers. + "Daily Activity Logs": Officer-created reports or time sheets of what happened on a shift. Not generally posted online. + "Training & Hiring Info": Records and descriptions of additional training for police officers. + "Personnel Records": Records of hiring and firing, certification, discipline, and other officer-specific events. Not generally posted online. + "Annual & Monthly Reports": Often in PDF form, featuring summaries or high-level updates about the police force. Can contain versions of other record types, especially summaries. + "Budgets & Finances": Budgets, finances, grants, or other financial documents. + "Contact Info & Agency Meta": Information about organizational structure, including department structure and contact info. + "Geographic": Maps or geographic data about how land is divided up into municipal sectors, zones, and jurisdictions. + "List of Data Sources": Places on the internet, often data portal homepages, where many links to potential data sources can be found. + "Policies & Contracts": Policies or contracts related to agency procedure. + "Crime Maps & Reports": Records of individual crimes in map or table form for a given jurisdiction. + "Crime Statistics": Summarized information about crime in a given jurisdiction. + "Media Bulletins": Press releases, blotters, or blogs intended to broadly communicate alerts, requests, or other timely information. + "Records Request Info": Portals, forms, policies, or other resources for making public records requests. + "Resources": Agency-provided information or guidance about services, prices, best practices, etc. + "Sex Offender Registry": Index of people registered, usually by law, with the government as sex offenders. + "Wanted Persons": Names, descriptions, images, and associated information about people with outstanding arrest warrants. + "Booking Reports": Records of booking or intake into corrections institutions. + "Court Cases": Records such as dockets about individual court cases. + "Incarceration Records": Records of current inmates, often with full names and features for notification upon inmate release. + "Other": Other record types not otherwise described. + + Output the record type in the following JSON format: + { + "record_type": "" + } + """ diff --git a/llm_api_logic/helpers.py b/llm_api_logic/helpers.py new file mode 100644 index 00000000..3d5bde11 --- /dev/null +++ b/llm_api_logic/helpers.py @@ -0,0 +1,8 @@ +from collector_db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo + + +def dictify_html_info(html_infos: list[URLHTMLContentInfo]) -> dict[str, str]: + d = {} + for html_info in html_infos: + d[html_info.content_type.value] = html_info.content + return d diff --git a/local_database/DTOs.py b/local_database/DTOs.py new file mode 100644 index 00000000..f222e5ba --- /dev/null +++ b/local_database/DTOs.py @@ -0,0 +1,52 @@ +from typing import Annotated, Optional + +from pydantic import BaseModel, AfterValidator + +from local_database.local_db_util import is_absolute_path, get_absolute_path + + +class VolumeInfo(BaseModel): + host_path: str + container_path: Annotated[str, AfterValidator(is_absolute_path)] + + def build_volumes(self): + return { + self.host_path: { + "bind": self.container_path, + "mode": "rw" + } + } + + +class DockerfileInfo(BaseModel): + image_tag: str + dockerfile_directory: Optional[str] = None + + +class HealthCheckInfo(BaseModel): + test: list[str] + interval: int + timeout: int + retries: int + start_period: int + + def build_healthcheck(self) -> dict: + multiplicative_factor = 1000000000 # Assume 1 second + return { + "test": self.test, + "interval": self.interval * multiplicative_factor, + "timeout": self.timeout * multiplicative_factor, + "retries": self.retries, + "start_period": self.start_period * multiplicative_factor + } + + +class DockerInfo(BaseModel): + dockerfile_info: DockerfileInfo + volume_info: Optional[VolumeInfo] = None + name: str + ports: Optional[dict] = None + environment: Optional[dict] + command: Optional[str] = None + entrypoint: Optional[list[str]] = None + health_check_info: Optional[HealthCheckInfo] = None diff --git a/local_database/DataDumper/dump.sh b/local_database/DataDumper/dump.sh index 6f1954c4..482a3ca1 100644 --- a/local_database/DataDumper/dump.sh +++ b/local_database/DataDumper/dump.sh @@ -1,19 +1,28 @@ #!/bin/bash -set -e +#set -e # Variables (customize these or pass them as environment variables) DB_HOST=${DUMP_HOST:-"postgres_container"} DB_USER=${DUMP_USER:-"your_user"} -DB_PORT=${DUMP_PORT:-"5432"} # Default to 5432 if not provided +DB_PORT=${DUMP_PORT:-"5432"} DB_PASSWORD=${DUMP_PASSWORD:-"your_password"} DB_NAME=${DUMP_NAME:-"your_database"} DUMP_FILE=${DUMP_FILE:-"/dump/db_dump.sql"} +DUMP_SCHEMA_ONLY=${DUMP_SCHEMA_ONLY:-false} # Set to "true" to dump only schema # Export password for pg_dump export PGPASSWORD=$DB_PASSWORD -# Dump the database -echo "Dumping database $DB_NAME from $DB_HOST:$DB_PORT..." -pg_dump -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME --no-owner --no-acl -F c -f $DUMP_FILE +# Determine pg_dump flags +PG_DUMP_FLAGS="--no-owner --no-acl -F c" +if [[ "$DUMP_SCHEMA_ONLY" == "true" ]]; then + PG_DUMP_FLAGS="$PG_DUMP_FLAGS --schema-only" + echo "Dumping schema only..." +else + echo "Dumping full database..." +fi -echo "Dump completed. File saved to $DUMP_FILE." \ No newline at end of file +# Run pg_dump +pg_dump -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME $PG_DUMP_FLAGS -f $DUMP_FILE + +echo "Dump completed. File saved to $DUMP_FILE." diff --git a/local_database/DataDumper/restore.sh b/local_database/DataDumper/restore.sh index d2046fb0..1efbe242 100644 --- a/local_database/DataDumper/restore.sh +++ b/local_database/DataDumper/restore.sh @@ -1,6 +1,5 @@ #!/bin/bash set -e - # Variables (customize these or pass them as environment variables) DB_HOST=${RESTORE_HOST:-"postgres_container"} DB_USER=${RESTORE_USER:-"your_user"} @@ -8,33 +7,30 @@ DB_PORT=${RESTORE_PORT:-"5432"} # Default to 5432 if not provided DB_PASSWORD=${RESTORE_PASSWORD:-"your_password"} NEW_DB_NAME=${RESTORE_DB_NAME:-"new_database"} # Name of the database to restore into DUMP_FILE=${DUMP_FILE:-"/dump/db_dump.sql"} - MAINTENANCE_DB="postgres" - # Export password for pg_restore export PGPASSWORD=$DB_PASSWORD - CONNECTION_STRING="postgresql://$DB_USER:$DB_PASSWORD@$DB_HOST:$DB_PORT/$NEW_DB_NAME" MAINT_CONNECTION_STRING="postgresql://$DB_USER:$DB_PASSWORD@$DB_HOST:$DB_PORT/$MAINTENANCE_DB" - echo "Checking if database $NEW_DB_NAME exists on $DB_HOST:$DB_PORT..." psql -d $MAINT_CONNECTION_STRING -tc "SELECT 1 FROM pg_database WHERE datname = '$NEW_DB_NAME';" | grep -q 1 && { echo "Database $NEW_DB_NAME exists. Dropping it..." - # Terminate all connections to the database - psql -d $MAINT_CONNECTION_STRING -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '$NEW_DB_NAME';" + psql -d $MAINT_CONNECTION_STRING -tAc "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = 'your_database_name' AND pid <> pg_backend_pid();" # Drop the database psql -d $MAINT_CONNECTION_STRING -c "DROP DATABASE $NEW_DB_NAME;" + echo "Waiting for connections to terminate..." + while psql -d $MAINT_CONNECTION_STRING -tAc "SELECT 1 FROM pg_stat_activity WHERE datname = '$NEW_DB_NAME';" | grep -q 1; do + sleep 1 + echo "Still waiting..." + done } - # Create the new database echo "Creating new database $NEW_DB_NAME on $DB_HOST:$DB_PORT..." psql -d $MAINT_CONNECTION_STRING -c "CREATE DATABASE $NEW_DB_NAME;" || { echo "Failed to create database $NEW_DB_NAME. It might already exist." exit 1 } - # Restore the dump into the new database echo "Restoring dump from $DUMP_FILE into database $NEW_DB_NAME..." pg_restore -d $CONNECTION_STRING --no-owner --no-acl -F c $DUMP_FILE - -echo "Database restoration completed." +echo "Database restoration completed." \ No newline at end of file diff --git a/local_database/DockerInfos.py b/local_database/DockerInfos.py new file mode 100644 index 00000000..17180bab --- /dev/null +++ b/local_database/DockerInfos.py @@ -0,0 +1,59 @@ +from local_database.DTOs import DockerInfo, DockerfileInfo, HealthCheckInfo, VolumeInfo +from util.helper_functions import get_from_env, project_path + + +def get_database_docker_info() -> DockerInfo: + return DockerInfo( + dockerfile_info=DockerfileInfo( + image_tag="postgres:15", + ), + name="data_source_identification_db", + ports={ + "5432/tcp": 5432 + }, + environment={ + "POSTGRES_PASSWORD": "HanviliciousHamiltonHilltops", + "POSTGRES_USER": "test_source_collector_user", + "POSTGRES_DB": "source_collector_test_db" + }, + health_check_info=HealthCheckInfo( + test=["pg_isready", "-U", "test_source_collector_user", "-h", "127.0.0.1", "-p", "5432"], + interval=1, + timeout=3, + retries=30, + start_period=2 + ) + ) + +def get_source_collector_data_dumper_info() -> DockerInfo: + return DockerInfo( + dockerfile_info=DockerfileInfo( + image_tag="datadumper", + dockerfile_directory=str(project_path( + "local_database", + "DataDumper" + )) + ), + volume_info=VolumeInfo( + host_path=str(project_path( + "local_database", + "DataDumper", + "dump" + )), + container_path="/dump" + ), + name="datadumper", + environment={ + "DUMP_HOST": get_from_env("DUMP_HOST"), + "DUMP_USER": get_from_env("DUMP_USER"), + "DUMP_PASSWORD": get_from_env("DUMP_PASSWORD"), + "DUMP_NAME": get_from_env("DUMP_DB_NAME"), + "DUMP_PORT": get_from_env("DUMP_PORT"), + "RESTORE_HOST": "data_source_identification_db", + "RESTORE_USER": "test_source_collector_user", + "RESTORE_PORT": "5432", + "RESTORE_DB_NAME": "source_collector_test_db", + "RESTORE_PASSWORD": "HanviliciousHamiltonHilltops", + }, + command="bash" + ) diff --git a/local_database/classes/DockerClient.py b/local_database/classes/DockerClient.py new file mode 100644 index 00000000..ca9d535b --- /dev/null +++ b/local_database/classes/DockerClient.py @@ -0,0 +1,118 @@ +import docker +from docker.errors import NotFound, APIError + +from local_database.DTOs import DockerfileInfo, DockerInfo + + +class DockerClient: + + def __init__(self): + self.client = docker.from_env() + + def run_command(self, command: str, container_id: str): + exec_id = self.client.api.exec_create( + container_id, + cmd=command, + tty=False, + stdin=False + ) + output_stream = self.client.api.exec_start(exec_id=exec_id, stream=True) + for line in output_stream: + print(line.decode().rstrip()) + + def start_network(self, network_name): + try: + self.client.networks.create(network_name, driver="bridge") + except APIError as e: + # Assume already exists + if e.response.status_code != 409: + raise e + print("Network already exists") + return self.client.networks.get(network_name) + + def stop_network(self, network_name): + self.client.networks.get(network_name).remove() + + def get_image( + self, + dockerfile_info: DockerfileInfo, + force_rebuild: bool = False + ): + if dockerfile_info.dockerfile_directory: + # Build image from Dockerfile + self.client.images.build( + path=dockerfile_info.dockerfile_directory, + tag=dockerfile_info.image_tag, + nocache=force_rebuild, + rm=True # Remove intermediate images + ) + return + + if force_rebuild: + # Even if not from Dockerfile, re-pull to ensure freshness + self.client.images.pull(dockerfile_info.image_tag) + return + + try: + self.client.images.get(dockerfile_info.image_tag) + except NotFound: + self.client.images.pull(dockerfile_info.image_tag) + + def get_existing_container(self, docker_info_name: str): + try: + return self.client.containers.get(docker_info_name) + except NotFound: + return None + + def create_container(self, docker_info: DockerInfo, network_name: str, force_rebuild: bool = False): + self.get_image( + docker_info.dockerfile_info, + force_rebuild=force_rebuild + ) + + container = self.client.containers.run( + image=docker_info.dockerfile_info.image_tag, + volumes=docker_info.volume_info.build_volumes() if docker_info.volume_info is not None else None, + command=docker_info.command, + entrypoint=docker_info.entrypoint, + detach=True, + name=docker_info.name, + ports=docker_info.ports, + network=network_name, + environment=docker_info.environment, + stdout=True, + stderr=True, + tty=True, + healthcheck=docker_info.health_check_info.build_healthcheck() if docker_info.health_check_info is not None else None + ) + return container + + + def run_container( + self, + docker_info: DockerInfo, + network_name: str, + force_rebuild: bool = False + ): + print(f"Running container {docker_info.name}") + container = self.get_existing_container(docker_info.name) + if container is None: + return self.create_container( + docker_info=docker_info, + network_name=network_name, + force_rebuild=force_rebuild + ) + if force_rebuild: + print("Rebuilding container...") + container.remove(force=True) + return self.create_container( + docker_info=docker_info, + network_name=network_name, + force_rebuild=force_rebuild + ) + if container.status == 'running': + print(f"Container '{docker_info.name}' is already running") + return container + container.start() + return container + diff --git a/local_database/classes/DockerContainer.py b/local_database/classes/DockerContainer.py new file mode 100644 index 00000000..33b71ce0 --- /dev/null +++ b/local_database/classes/DockerContainer.py @@ -0,0 +1,34 @@ +import time + +from docker.models.containers import Container + +from local_database.classes.DockerClient import DockerClient + + +class DockerContainer: + + def __init__(self, dc: DockerClient, container: Container): + self.dc = dc + self.container = container + + def run_command(self, command: str): + self.dc.run_command(command, self.container.id) + + def stop(self): + self.container.stop() + + def log_to_file(self): + logs = self.container.logs(stdout=True, stderr=True) + container_name = self.container.name + with open(f"{container_name}.log", "wb") as f: + f.write(logs) + + def wait_for_pg_to_be_ready(self): + for i in range(30): + exit_code, output = self.container.exec_run("pg_isready") + print(output) + if exit_code == 0: + return + time.sleep(1) + raise Exception("Timed out waiting for postgres to be ready") + diff --git a/local_database/classes/DockerManager.py b/local_database/classes/DockerManager.py new file mode 100644 index 00000000..ac294dc1 --- /dev/null +++ b/local_database/classes/DockerManager.py @@ -0,0 +1,78 @@ +import platform +import subprocess +import sys + +import docker +from docker.errors import APIError + +from local_database.DTOs import DockerfileInfo, DockerInfo +from local_database.classes.DockerClient import DockerClient +from local_database.classes.DockerContainer import DockerContainer + + +class DockerManager: + def __init__(self): + if not self.is_docker_running(): + self.start_docker_engine() + + self.client = DockerClient() + self.network_name = "my_network" + self.network = self.start_network() + + @staticmethod + def start_docker_engine(): + system = platform.system() + + match system: + case "Windows": + # Use PowerShell to start Docker Desktop on Windows + subprocess.run([ + "powershell", "-Command", + "Start-Process 'Docker Desktop' -Verb RunAs" + ]) + case "Darwin": + # MacOS: Docker Desktop must be started manually or with open + subprocess.run(["open", "-a", "Docker"]) + case "Linux": + # Most Linux systems use systemctl to manage Docker + subprocess.run(["sudo", "systemctl", "start", "docker"]) + case _: + print(f"Unsupported OS: {system}") + sys.exit(1) + + @staticmethod + def is_docker_running(): + try: + client = docker.from_env() + client.ping() + return True + except docker.errors.DockerException as e: + print(f"Docker is not running: {e}") + return False + + def run_command(self, command: str, container_id: str): + self.client.run_command(command, container_id) + + def start_network(self): + return self.client.start_network(self.network_name) + + def stop_network(self): + self.client.stop_network(self.network_name) + + def get_image(self, dockerfile_info: DockerfileInfo): + self.client.get_image(dockerfile_info) + + def run_container( + self, + docker_info: DockerInfo, + force_rebuild: bool = False + ) -> DockerContainer: + raw_container = self.client.run_container( + docker_info, + network_name=self.network_name, + force_rebuild=force_rebuild + ) + return DockerContainer(self.client, raw_container) + + def get_containers(self): + return self.client.client.containers.list() \ No newline at end of file diff --git a/local_database/classes/TimestampChecker.py b/local_database/classes/TimestampChecker.py new file mode 100644 index 00000000..56779fd4 --- /dev/null +++ b/local_database/classes/TimestampChecker.py @@ -0,0 +1,32 @@ +import datetime +import os +from typing import Optional + + +class TimestampChecker: + def __init__(self): + self.last_run_time: Optional[datetime.datetime] = self.load_last_run_time() + + def load_last_run_time(self) -> Optional[datetime.datetime]: + # Check if file `last_run.txt` exists + # If it does, load the last run time + if os.path.exists("local_state/last_run.txt"): + with open("local_state/last_run.txt", "r") as f: + return datetime.datetime.strptime( + f.read(), + "%Y-%m-%d %H:%M:%S" + ) + return None + + def last_run_within_24_hours(self): + if self.last_run_time is None: + return False + return datetime.datetime.now() - self.last_run_time < datetime.timedelta(days=1) + + def set_last_run_time(self): + # If directory `local_state` doesn't exist, create it + if not os.path.exists("local_state"): + os.makedirs("local_state") + + with open("local_state/last_run.txt", "w") as f: + f.write(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) diff --git a/local_database/classes/__init__.py b/local_database/classes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/local_database/constants.py b/local_database/constants.py new file mode 100644 index 00000000..51147717 --- /dev/null +++ b/local_database/constants.py @@ -0,0 +1,4 @@ +LOCAL_SOURCE_COLLECTOR_DB_NAME = "source_collector_test_db" + +DUMP_SH_DOCKER_PATH = "/usr/local/bin/dump.sh" +RESTORE_SH_DOCKER_PATH = "/usr/local/bin/restore.sh" \ No newline at end of file diff --git a/local_database/create_database.py b/local_database/create_database.py new file mode 100644 index 00000000..67eae70b --- /dev/null +++ b/local_database/create_database.py @@ -0,0 +1,56 @@ +import argparse +import os +import subprocess + +import psycopg2 +from psycopg2 import sql + +from local_database.constants import LOCAL_SOURCE_COLLECTOR_DB_NAME, RESTORE_SH_DOCKER_PATH + +# Defaults (can be overridden via environment variables) +POSTGRES_HOST = os.getenv("POSTGRES_HOST", "host.docker.internal") +POSTGRES_PORT = int(os.getenv("POSTGRES_PORT", "5432")) +POSTGRES_USER = os.getenv("POSTGRES_USER", "test_source_collector_user") +POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "HanviliciousHamiltonHilltops") + + +# Connect to the default 'postgres' database to create other databases +def connect(database="postgres", autocommit=True): + conn = psycopg2.connect( + dbname=database, + user=POSTGRES_USER, + password=POSTGRES_PASSWORD, + host=POSTGRES_HOST, + port=POSTGRES_PORT + ) + if autocommit: + conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) + return conn + +def create_database(db_name): + conn = connect("postgres") + with conn.cursor() as cur: + cur.execute(sql.SQL(""" + SELECT pg_terminate_backend(pid) + FROM pg_stat_activity + WHERE datname = %s AND pid <> pg_backend_pid() + """), [db_name]) + + # Drop the database if it exists + cur.execute(sql.SQL("DROP DATABASE IF EXISTS {}").format(sql.Identifier(db_name))) + print(f"🗑️ Dropped existing database: {db_name}") + + try: + cur.execute(sql.SQL("CREATE DATABASE {}").format(sql.Identifier(db_name))) + print(f"✅ Created database: {db_name}") + except psycopg2.errors.DuplicateDatabase: + print(f"⚠️ Database {db_name} already exists") + except Exception as e: + print(f"❌ Failed to create {db_name}: {e}") + +def main(): + print("Creating databases...") + create_database(LOCAL_SOURCE_COLLECTOR_DB_NAME) + +if __name__ == "__main__": + main() diff --git a/local_database/local_db_util.py b/local_database/local_db_util.py new file mode 100644 index 00000000..7bc5bb12 --- /dev/null +++ b/local_database/local_db_util.py @@ -0,0 +1,18 @@ +from pathlib import Path + + +def get_absolute_path(relative_path: str) -> str: + """ + Get absolute path, using the current file as the point of reference + """ + current_dir = Path(__file__).parent + absolute_path = (current_dir / relative_path).resolve() + return str(absolute_path) + + +def is_absolute_path(path: str) -> str: + if len(path) == 0: + raise ValueError("Path is required") + if path[0] != "/": + raise ValueError("Container path must be absolute") + return path diff --git a/local_database/setup.py b/local_database/setup.py new file mode 100644 index 00000000..99ff1da9 --- /dev/null +++ b/local_database/setup.py @@ -0,0 +1,53 @@ +import subprocess +import time +import sys + +POSTGRES_SERVICE_NAME = "postgres" +FOLLOWUP_SCRIPT = "py create_database.py" +MAX_RETRIES = 20 +SLEEP_SECONDS = 1 + +def run_command(cmd, check=True, capture_output=False, **kwargs): + try: + return subprocess.run(cmd, shell=True, check=check, capture_output=capture_output, text=True, **kwargs) + except subprocess.CalledProcessError as e: + print(f"Command '{cmd}' failed: {e}") + sys.exit(1) + +def get_postgres_container_id(): + result = run_command(f"docker-compose ps -q {POSTGRES_SERVICE_NAME}", capture_output=True) + container_id = result.stdout.strip() + if not container_id: + print("Error: Could not find Postgres container.") + sys.exit(1) + return container_id + +def wait_for_postgres(container_id): + print("Waiting for Postgres to be ready...") + for i in range(MAX_RETRIES): + try: + run_command(f"docker exec {container_id} pg_isready -U postgres", check=True) + print("Postgres is ready!") + return + except subprocess.CalledProcessError as e: + print(f"Still waiting... ({i + 1}/{MAX_RETRIES}) Exit code: {e.returncode}") + print(f"Output: {e.output if hasattr(e, 'output') else 'N/A'}") + time.sleep(SLEEP_SECONDS) + print("Postgres did not become ready in time.") + sys.exit(1) + +def main(): + print("Stopping Docker Compose...") + run_command("docker-compose down") + + print("Starting Docker Compose...") + run_command("docker-compose up -d") + + container_id = get_postgres_container_id() + wait_for_postgres(container_id) + + print("Running follow-up script...") + run_command(FOLLOWUP_SCRIPT) + +if __name__ == "__main__": + main() diff --git a/pdap_api_client/AccessManager.py b/pdap_api_client/AccessManager.py deleted file mode 100644 index 87877466..00000000 --- a/pdap_api_client/AccessManager.py +++ /dev/null @@ -1,123 +0,0 @@ -from http import HTTPStatus -from typing import Optional - -import requests - -from pdap_api_client.DTOs import RequestType, Namespaces, RequestInfo, ResponseInfo - -API_URL = "https://data-sources-v2.pdap.dev/api" -request_methods = { - RequestType.POST: requests.post, - RequestType.PUT: requests.put, - RequestType.GET: requests.get, - RequestType.DELETE: requests.delete, -} - - -class CustomHTTPException(Exception): - pass - - -def build_url( - namespace: Namespaces, - subdomains: Optional[list[str]] = None -): - url = f"{API_URL}/{namespace.value}" - if subdomains is not None: - url = f"{url}/{'/'.join(subdomains)}" - return url - - -class AccessManager: - """ - Manages login, api key, access and refresh tokens - """ - def __init__(self, email: str, password: str, api_key: Optional[str] = None): - self.access_token = None - self.refresh_token = None - self.api_key = api_key - self.login(email=email, password=password) - - # TODO: Add means to refresh if token expired. - - def load_api_key(self): - url = build_url( - namespace=Namespaces.AUTH, - subdomains=["api-key"] - ) - request_info = RequestInfo( - type_ = RequestType.POST, - url=url, - headers=self.jwt_header() - ) - response_info = self.make_request(request_info) - self.api_key = response_info.data["api_key"] - - def refresh_access_token(self): - url = build_url( - namespace=Namespaces.AUTH, - subdomains=["refresh-session"], - ) - raise NotImplementedError("Waiting on https://github.com/Police-Data-Accessibility-Project/data-sources-app/issues/566") - - def make_request(self, ri: RequestInfo) -> ResponseInfo: - try: - response = request_methods[ri.type_]( - ri.url, - json=ri.json, - headers=ri.headers, - params=ri.params, - timeout=ri.timeout - ) - response.raise_for_status() - except requests.RequestException as e: - # TODO: Precise string matching here is brittle. Consider changing later. - if e.response.json().message == "Token is expired. Please request a new token.": - self.refresh_access_token() - return self.make_request(ri) - else: - raise CustomHTTPException(f"Error making {ri.type_} request to {ri.url}: {e}") - return ResponseInfo( - status_code=HTTPStatus(response.status_code), - data=response.json() - ) - - def login(self, email: str, password: str): - url = build_url( - namespace=Namespaces.AUTH, - subdomains=["login"] - ) - request_info = RequestInfo( - type_=RequestType.POST, - url=url, - json={ - "email": email, - "password": password - } - ) - response_info = self.make_request(request_info) - data = response_info.data - self.access_token = data["access_token"] - self.refresh_token = data["refresh_token"] - - - def jwt_header(self) -> dict: - """ - Retrieve JWT header - Returns: Dictionary of Bearer Authorization with JWT key - """ - return { - "Authorization": f"Bearer {self.access_token}" - } - - def api_key_header(self): - """ - Retrieve API key header - Returns: Dictionary of Basic Authorization with API key - - """ - if self.api_key is None: - self.load_api_key() - return { - "Authorization": f"Basic {self.api_key}" - } diff --git a/pdap_api_client/DTOs.py b/pdap_api_client/DTOs.py index 31c8c2cf..23d240d7 100644 --- a/pdap_api_client/DTOs.py +++ b/pdap_api_client/DTOs.py @@ -1,13 +1,17 @@ from enum import Enum -from http import HTTPStatus -from typing import Optional +from typing import Optional, List from pydantic import BaseModel +from pdap_api_client.enums import MatchAgencyResponseStatus + class MatchAgencyInfo(BaseModel): + id: int submitted_name: str - id: str + state: Optional[str] = None + county: Optional[str] = None + locality: Optional[str] = None class ApprovalStatus(Enum): APPROVED = "approved" @@ -15,40 +19,11 @@ class ApprovalStatus(Enum): PENDING = "pending" NEEDS_IDENTIFICATION = "needs identification" - - class UniqueURLDuplicateInfo(BaseModel): original_url: str approval_status: ApprovalStatus - rejection_note: str - -class UniqueURLResponseInfo(BaseModel): - is_unique: bool - duplicates: list[UniqueURLDuplicateInfo] - - -class Namespaces(Enum): - AUTH = "auth" - MATCH = "match" - CHECK = "check" - - -class RequestType(Enum): - POST = "POST" - PUT = "PUT" - GET = "GET" - DELETE = "DELETE" - - -class RequestInfo(BaseModel): - type_: RequestType - url: str - json: Optional[dict] = None - headers: Optional[dict] = None - params: Optional[dict] = None - timeout: Optional[int] = 10 - + rejection_note: Optional[str] = None -class ResponseInfo(BaseModel): - status_code: HTTPStatus - data: Optional[dict] +class MatchAgencyResponse(BaseModel): + status: MatchAgencyResponseStatus + matches: List[MatchAgencyInfo] diff --git a/pdap_api_client/PDAPClient.py b/pdap_api_client/PDAPClient.py index 6c03ce0f..491b7c3b 100644 --- a/pdap_api_client/PDAPClient.py +++ b/pdap_api_client/PDAPClient.py @@ -1,52 +1,76 @@ -from typing import List +from typing import Optional -from pdap_api_client.AccessManager import build_url, AccessManager -from pdap_api_client.DTOs import MatchAgencyInfo, UniqueURLDuplicateInfo, UniqueURLResponseInfo, Namespaces, \ - RequestType, RequestInfo +from core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmitApprovedURLTDO, SubmittedURLInfo +from pdap_api_client.DTOs import MatchAgencyInfo, UniqueURLDuplicateInfo, \ + MatchAgencyResponse +from pdap_api_client.enums import MatchAgencyResponseStatus +from pdap_access_manager import AccessManager, DataSourcesNamespaces, RequestInfo, RequestType class PDAPClient: - def __init__(self, access_manager: AccessManager): + def __init__( + self, + access_manager: AccessManager, + ): self.access_manager = access_manager - def match_agency( - self, - name: str, - state: str, - county: str, - locality: str - ) -> List[MatchAgencyInfo]: + async def match_agency( + self, + name: str, + state: Optional[str] = None, + county: Optional[str] = None, + locality: Optional[str] = None + ) -> MatchAgencyResponse: """ Returns agencies, if any, that match or partially match the search criteria """ - url = build_url( - namespace=Namespaces.MATCH, + url = self.access_manager.build_url( + namespace=DataSourcesNamespaces.MATCH, subdomains=["agency"] ) + + headers = await self.access_manager.jwt_header() + headers['Content-Type'] = "application/json" request_info = RequestInfo( type_=RequestType.POST, url=url, - json={ + headers=headers, + json_={ "name": name, "state": state, "county": county, "locality": locality } ) - response_info = self.access_manager.make_request(request_info) - return [MatchAgencyInfo(**agency) for agency in response_info.data["agencies"]] + response_info = await self.access_manager.make_request(request_info) + matches = [] + for agency in response_info.data["agencies"]: + mai = MatchAgencyInfo( + id=agency['id'], + submitted_name=agency['name'] + ) + if len(agency['locations']) > 0: + first_location = agency['locations'][0] + mai.state = first_location['state'] + mai.county = first_location['county'] + mai.locality = first_location['locality'] + matches.append(mai) + return MatchAgencyResponse( + status=MatchAgencyResponseStatus(response_info.data["status"]), + matches=matches + ) - def is_url_unique( + async def is_url_duplicate( self, url_to_check: str - ) -> UniqueURLResponseInfo: + ) -> bool: """ Check if a URL is unique. Returns duplicate info otherwise """ - url = build_url( - namespace=Namespaces.CHECK, + url = self.access_manager.build_url( + namespace=DataSourcesNamespaces.CHECK, subdomains=["unique-url"] ) request_info = RequestInfo( @@ -56,10 +80,65 @@ def is_url_unique( "url": url_to_check } ) - response_info = self.access_manager.make_request(request_info) + response_info = await self.access_manager.make_request(request_info) duplicates = [UniqueURLDuplicateInfo(**entry) for entry in response_info.data["duplicates"]] - is_unique = (len(duplicates) == 0) - return UniqueURLResponseInfo( - is_unique=is_unique, - duplicates=duplicates + is_duplicate = (len(duplicates) != 0) + return is_duplicate + + async def submit_urls( + self, + tdos: list[SubmitApprovedURLTDO] + ) -> list[SubmittedURLInfo]: + """ + Submits URLs to Data Sources App, + modifying tdos in-place with data source id or error + """ + request_url = self.access_manager.build_url( + namespace=DataSourcesNamespaces.SOURCE_COLLECTOR, + subdomains=["data-sources"] ) + + # Build url-id dictionary + url_id_dict = {} + for tdo in tdos: + url_id_dict[tdo.url] = tdo.url_id + + data_sources_json = [] + for tdo in tdos: + data_sources_json.append( + { + "name": tdo.name, + "description": tdo.description, + "source_url": tdo.url, + "record_type": tdo.record_type.value, + "record_formats": tdo.record_formats, + "data_portal_type": tdo.data_portal_type, + "last_approval_editor": tdo.approving_user_id, + "supplying_entity": tdo.supplying_entity, + "agency_ids": tdo.agency_ids + } + ) + + headers = await self.access_manager.jwt_header() + request_info = RequestInfo( + type_=RequestType.POST, + url=request_url, + headers=headers, + json_={ + "data_sources": data_sources_json + } + ) + response_info = await self.access_manager.make_request(request_info) + data_sources_response_json = response_info.data["data_sources"] + + results = [] + for data_source in data_sources_response_json: + url = data_source["url"] + response_object = SubmittedURLInfo( + url_id=url_id_dict[url], + data_source_id=data_source["data_source_id"], + request_error=data_source["error"] + ) + results.append(response_object) + + return results diff --git a/pdap_api_client/enums.py b/pdap_api_client/enums.py new file mode 100644 index 00000000..3dc7d931 --- /dev/null +++ b/pdap_api_client/enums.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class MatchAgencyResponseStatus(Enum): + EXACT_MATCH = "Exact Match" + PARTIAL_MATCH = "Partial Matches" + NO_MATCH = "No Match" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..8a2b1187 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,56 @@ +[project] +name = "data-source-identification" +version = "0.1.0" +requires-python = ">=3.11" +dependencies = [ + "aiohttp~=3.11.11", + "alembic~=1.14.0", + "apscheduler~=3.11.0", + "asyncpg~=0.30.0", + "beautifulsoup4>=4.12.3", + "bs4~=0.0.2", + "ckanapi~=4.8", + "datasets~=2.19.1", + "docker~=7.1.0", + "environs>=14.1.1", + "fastapi[standard]~=0.115.6", + "from-root~=1.3.0", + "google-api-python-client>=2.156.0", + "httpx~=0.28.1", + "huggingface-hub~=0.28.1", + "keras~=2.15.0", + "lxml~=5.1.0", + "marshmallow~=3.23.2", + "numpy~=1.26.4", + "openai~=1.60.1", + "pandas~=2.2.3", + "pdap-access-manager==0.3.5", + "playwright~=1.49.1", + "psycopg2-binary~=2.9.6", + "psycopg[binary]~=3.1.20", + "pydantic~=2.11.3", + "pyjwt~=2.10.1", + "python-dotenv~=1.0.1", + "requests~=2.32.3", + "sqlalchemy~=2.0.36", + "starlette~=0.45.3", + "tensorflow-cpu~=2.15.1", + "tensorflow-io-gcs-filesystem==0.31.0", + "tqdm>=4.64.1", + "transformers~=4.40.2", + "urllib3~=1.26.18", + "uvicorn~=0.34.0", +] + +[dependency-groups] +dev = [ + "deepdiff>=8.5.0", + "docker>=7.1.0", + "pendulum>=3.1.0", + "pytest>=7.2.2", + "pytest-asyncio~=0.25.2", + "pytest-mock==3.12.0", + "pytest-timeout~=2.3.1", +] + + diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index c05cfbfa..00000000 --- a/requirements.txt +++ /dev/null @@ -1,48 +0,0 @@ -requests~=2.31.0 -polars~=0.20.10 -python-dotenv~=1.0.1 -bs4~=0.0.2 -tqdm>=4.64.1 -pytest>=7.2.2 -pytest-mock==3.12.0 -urllib3~=1.26.18 -psycopg2-binary~=2.9.6 -pandas~=2.2.3 -datasets~=2.19.1 -# common_crawler only -huggingface-hub~=0.22.2 - -# html_tag_collector_only -requests_html>=0.10.0 -lxml~=5.1.0 -beautifulsoup4>=4.12.3 - -# CKAN Collector -from-root~=1.3.0 - -# Google Collector -google-api-python-client>=2.156.0 -marshmallow~=3.23.2 - -sqlalchemy~=2.0.36 -fastapi[standard]~=0.115.6 -httpx~=0.28.1 -ckanapi~=4.8 -psycopg[binary]~=3.1.20 -APScheduler~=3.11.0 -alembic~=1.14.0 -asyncpg~=0.30.0 -pytest-asyncio~=0.25.2 -transformers~=4.40.2 -tf-keras~=2.18.0 - -# HTML Collector -playwright~=1.49.1 - -# Security Manager -PyJWT~=2.10.1 - -# Tests -pytest-timeout~=2.3.1 - - diff --git a/security_manager/SecurityManager.py b/security_manager/SecurityManager.py index 18bc6a26..6d5236d6 100644 --- a/security_manager/SecurityManager.py +++ b/security_manager/SecurityManager.py @@ -20,6 +20,7 @@ def get_secret_key(): class Permissions(Enum): SOURCE_COLLECTOR = "source_collector" + SOURCE_COLLECTOR_FINAL_REVIEW = "source_collector_final_review" class AccessInfo(BaseModel): user_id: int @@ -64,9 +65,13 @@ def get_relevant_permissions(raw_permissions: list[str]) -> list[Permissions]: continue return relevant_permissions - def check_access(self, token: str) -> AccessInfo: + def check_access( + self, + token: str, + permission: Permissions + ) -> AccessInfo: access_info = self.validate_token(token) - if not access_info.has_permission(Permissions.SOURCE_COLLECTOR): + if not access_info.has_permission(permission): raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail="Access forbidden", @@ -79,4 +84,9 @@ def check_access(self, token: str) -> AccessInfo: def get_access_info( token: Annotated[str, Depends(oauth2_scheme)] ) -> AccessInfo: - return SecurityManager().check_access(token) \ No newline at end of file + return SecurityManager().check_access(token, Permissions.SOURCE_COLLECTOR) + +def require_permission(permission: Permissions): + def dependency(token: Annotated[str, Depends(oauth2_scheme)]) -> AccessInfo: + return SecurityManager().check_access(token, permission=permission) + return dependency \ No newline at end of file diff --git a/source_collectors/auto_googler/AutoGoogler.py b/source_collectors/auto_googler/AutoGoogler.py index 937466be..368f75fb 100644 --- a/source_collectors/auto_googler/AutoGoogler.py +++ b/source_collectors/auto_googler/AutoGoogler.py @@ -1,3 +1,5 @@ +import asyncio + from source_collectors.auto_googler.DTOs import GoogleSearchQueryResultsInnerDTO from source_collectors.auto_googler.GoogleSearcher import GoogleSearcher from source_collectors.auto_googler.SearchConfig import SearchConfig @@ -16,14 +18,14 @@ def __init__(self, search_config: SearchConfig, google_searcher: GoogleSearcher) query : [] for query in search_config.queries } - def run(self) -> str: + async def run(self) -> str: """ Runs the AutoGoogler Yields status messages """ for query in self.search_config.queries: yield f"Searching for '{query}' ..." - results = self.google_searcher.search(query) + results = await self.google_searcher.search(query) yield f"Found {len(results)} results for '{query}'." if results is not None: self.data[query] = results diff --git a/source_collectors/auto_googler/AutoGooglerCollector.py b/source_collectors/auto_googler/AutoGooglerCollector.py index 189eaa11..01387d0b 100644 --- a/source_collectors/auto_googler/AutoGooglerCollector.py +++ b/source_collectors/auto_googler/AutoGooglerCollector.py @@ -1,31 +1,39 @@ -from collector_manager.CollectorBase import CollectorBase + +from collector_manager.AsyncCollectorBase import AsyncCollectorBase from collector_manager.enums import CollectorType +from core.EnvVarManager import EnvVarManager from core.preprocessors.AutoGooglerPreprocessor import AutoGooglerPreprocessor from source_collectors.auto_googler.AutoGoogler import AutoGoogler from source_collectors.auto_googler.DTOs import AutoGooglerInputDTO, AutoGooglerInnerOutputDTO from source_collectors.auto_googler.GoogleSearcher import GoogleSearcher from source_collectors.auto_googler.SearchConfig import SearchConfig -from util.helper_functions import get_from_env, base_model_list_dump +from util.helper_functions import base_model_list_dump -class AutoGooglerCollector(CollectorBase): +class AutoGooglerCollector(AsyncCollectorBase): collector_type = CollectorType.AUTO_GOOGLER preprocessor = AutoGooglerPreprocessor - def run_implementation(self) -> None: + async def run_to_completion(self) -> AutoGoogler: dto: AutoGooglerInputDTO = self.dto + env_var_manager = EnvVarManager.get() auto_googler = AutoGoogler( search_config=SearchConfig( urls_per_result=dto.urls_per_result, queries=dto.queries, ), google_searcher=GoogleSearcher( - api_key=get_from_env("GOOGLE_API_KEY"), - cse_id=get_from_env("GOOGLE_CSE_ID"), + api_key=env_var_manager.google_api_key, + cse_id=env_var_manager.google_cse_id, ) ) - for log in auto_googler.run(): - self.log(log) + async for log in auto_googler.run(): + await self.log(log) + return auto_googler + + async def run_implementation(self) -> None: + + auto_googler = await self.run_to_completion() inner_data = [] for query in auto_googler.search_config.queries: diff --git a/source_collectors/auto_googler/GoogleSearcher.py b/source_collectors/auto_googler/GoogleSearcher.py index 6f7b4cc8..fe52ea45 100644 --- a/source_collectors/auto_googler/GoogleSearcher.py +++ b/source_collectors/auto_googler/GoogleSearcher.py @@ -1,5 +1,7 @@ +import asyncio from typing import Union +import aiohttp from googleapiclient.discovery import build from googleapiclient.errors import HttpError @@ -28,8 +30,7 @@ class GoogleSearcher: search results as dictionaries or None if the daily quota for the API has been exceeded. Raises a RuntimeError if any other error occurs during the search. """ - GOOGLE_SERVICE_NAME = "customsearch" - GOOGLE_SERVICE_VERSION = "v1" + GOOGLE_SEARCH_URL = "https://www.googleapis.com/customsearch/v1" def __init__( self, @@ -41,11 +42,7 @@ def __init__( self.api_key = api_key self.cse_id = cse_id - self.service = build(self.GOOGLE_SERVICE_NAME, - self.GOOGLE_SERVICE_VERSION, - developerKey=self.api_key) - - def search(self, query: str) -> Union[list[dict], None]: + async def search(self, query: str) -> Union[list[dict], None]: """ Searches for results using the specified query. @@ -56,7 +53,7 @@ def search(self, query: str) -> Union[list[dict], None]: If the daily quota is exceeded, None is returned. """ try: - return self.get_query_results(query) + return await self.get_query_results(query) # Process your results except HttpError as e: if "Quota exceeded" in str(e): @@ -64,16 +61,28 @@ def search(self, query: str) -> Union[list[dict], None]: else: raise RuntimeError(f"An error occurred: {str(e)}") - def get_query_results(self, query) -> list[GoogleSearchQueryResultsInnerDTO] or None: - results = self.service.cse().list(q=query, cx=self.cse_id).execute() + async def get_query_results(self, query) -> list[GoogleSearchQueryResultsInnerDTO] or None: + params = { + "key": self.api_key, + "cx": self.cse_id, + "q": query, + } + + async with aiohttp.ClientSession() as session: + async with session.get(self.GOOGLE_SEARCH_URL, params=params) as response: + response.raise_for_status() + results = await response.json() + if "items" not in results: return None + items = [] + for item in results["items"]: inner_dto = GoogleSearchQueryResultsInnerDTO( url=item["link"], title=item["title"], - snippet=item["snippet"] + snippet=item.get("snippet", ""), ) items.append(inner_dto) diff --git a/source_collectors/ckan/CKANAPIInterface.py b/source_collectors/ckan/CKANAPIInterface.py index 551ed023..563d795d 100644 --- a/source_collectors/ckan/CKANAPIInterface.py +++ b/source_collectors/ckan/CKANAPIInterface.py @@ -1,13 +1,13 @@ +import asyncio from typing import Optional -from ckanapi import RemoteCKAN, NotFound +import aiohttp +from aiohttp import ContentTypeError class CKANAPIError(Exception): pass -# TODO: Maybe return Base Models? - class CKANAPIInterface: """ Interfaces with the CKAN API @@ -15,22 +15,47 @@ class CKANAPIInterface: def __init__(self, base_url: str): self.base_url = base_url - self.remote = RemoteCKAN(base_url, get_only=True) - - def package_search(self, query: str, rows: int, start: int, **kwargs): - return self.remote.action.package_search(q=query, rows=rows, start=start, **kwargs) - def get_organization(self, organization_id: str): + @staticmethod + def _serialize_params(params: dict) -> dict: + return { + k: str(v).lower() if isinstance(v, bool) else str(v) for k, v in params.items() + } + + async def _get(self, action: str, params: dict): + url = f"{self.base_url}/api/3/action/{action}" + serialized_params = self._serialize_params(params) + async with aiohttp.ClientSession() as session: + async with session.get(url, params=serialized_params) as response: + try: + data = await response.json() + if not data.get("success", False): + raise CKANAPIError(f"Request failed: {data}") + except ContentTypeError: + raise CKANAPIError(f"Request failed: {response.text()}") + return data["result"] + + async def package_search(self, query: str, rows: int, start: int, **kwargs): + return await self._get("package_search", { + "q": query, "rows": rows, "start": start, **kwargs + }) + + async def get_organization(self, organization_id: str): try: - return self.remote.action.organization_show(id=organization_id, include_datasets=True) - except NotFound as e: - raise CKANAPIError(f"Organization {organization_id} not found" - f" for url {self.base_url}. Original error: {e}") - - def get_group_package(self, group_package_id: str, limit: Optional[int]): + return await self._get("organization_show", { + "id": organization_id, "include_datasets": True + }) + except CKANAPIError as e: + raise CKANAPIError( + f"Organization {organization_id} not found for url {self.base_url}. {e}" + ) + + async def get_group_package(self, group_package_id: str, limit: Optional[int]): try: - return self.remote.action.group_package_show(id=group_package_id, limit=limit) - except NotFound as e: - raise CKANAPIError(f"Group Package {group_package_id} not found" - f" for url {self.base_url}. Original error: {e}") - + return await self._get("group_package_show", { + "id": group_package_id, "limit": limit + }) + except CKANAPIError as e: + raise CKANAPIError( + f"Group Package {group_package_id} not found for url {self.base_url}. {e}" + ) \ No newline at end of file diff --git a/source_collectors/ckan/CKANCollector.py b/source_collectors/ckan/CKANCollector.py index 24477aad..873a8593 100644 --- a/source_collectors/ckan/CKANCollector.py +++ b/source_collectors/ckan/CKANCollector.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from collector_manager.CollectorBase import CollectorBase +from collector_manager.AsyncCollectorBase import AsyncCollectorBase from collector_manager.enums import CollectorType from core.preprocessors.CKANPreprocessor import CKANPreprocessor from source_collectors.ckan.DTOs import CKANInputDTO @@ -16,30 +16,35 @@ "organization_search": ckan_package_search_from_organization } -class CKANCollector(CollectorBase): +class CKANCollector(AsyncCollectorBase): collector_type = CollectorType.CKAN preprocessor = CKANPreprocessor - def run_implementation(self): - results = self.get_results() + async def run_implementation(self): + results = await self.get_results() flat_list = get_flat_list(results) deduped_flat_list = deduplicate_entries(flat_list) - list_with_collection_child_packages = self.add_collection_child_packages(deduped_flat_list) + list_with_collection_child_packages = await self.add_collection_child_packages(deduped_flat_list) - filtered_results = list(filter(filter_result, list_with_collection_child_packages)) + filtered_results = list( + filter( + filter_result, + list_with_collection_child_packages + ) + ) parsed_results = list(map(parse_result, filtered_results)) self.data = {"results": parsed_results} - def add_collection_child_packages(self, deduped_flat_list): + async def add_collection_child_packages(self, deduped_flat_list): # TODO: Find a way to clearly indicate which parts call from the CKAN API list_with_collection_child_packages = [] count = len(deduped_flat_list) for idx, result in enumerate(deduped_flat_list): if "extras" in result.keys(): - self.log(f"Found collection ({idx + 1}/{count}): {result['id']}") - collections = get_collections(result) + await self.log(f"Found collection ({idx + 1}/{count}): {result['id']}") + collections = await get_collections(result) if collections: list_with_collection_child_packages += collections[0] continue @@ -47,16 +52,16 @@ def add_collection_child_packages(self, deduped_flat_list): list_with_collection_child_packages.append(result) return list_with_collection_child_packages - def get_results(self): + async def get_results(self): results = [] dto: CKANInputDTO = self.dto for search in SEARCH_FUNCTION_MAPPINGS.keys(): - self.log(f"Running search '{search}'...") + await self.log(f"Running search '{search}'...") sub_dtos: list[BaseModel] = getattr(dto, search) if sub_dtos is None: continue func = SEARCH_FUNCTION_MAPPINGS[search] - results = perform_search( + results = await perform_search( search_func=func, search_terms=base_model_list_dump(model_list=sub_dtos), results=results diff --git a/source_collectors/ckan/README.md b/source_collectors/ckan/README.md index be6c65cf..2afcbb28 100644 --- a/source_collectors/ckan/README.md +++ b/source_collectors/ckan/README.md @@ -19,28 +19,6 @@ Running the scraper will output a list of packages to a CSV file using the searc * `search_terms.py` - The search terms and CKAN portals to search from. * `ckan_scraper_toolkit.py` - Toolkit of functions that use ckanapi to retrieve packages from CKAN data portals. -## Setup - -1. In a terminal, navigate to the CKAN scraper folder - ```cmd - cd scrapers_library/data_portals/ckan/ - ``` -2. Create and activate a Python virtual environment - ```cmd - python -m venv venv - source venv/bin/activate - ``` - -3. Install the requirements - ```cmd - pip install -r requirements.txt - ``` -4. Run the multi-portal CKAN scraper - ```cmd - python scrape_ckan_data_portals.py - ``` -5. Review the generated `results.csv` file. - ## How can I tell if a website I want to scrape is hosted using CKAN? There's no easy way to tell, some websites will reference CKAN or link back to the CKAN documentation while others will not. There doesn't seem to be a database of all CKAN instances either. diff --git a/source_collectors/ckan/ckan_scraper_toolkit.py b/source_collectors/ckan/ckan_scraper_toolkit.py index 3d5c7296..641dec2a 100644 --- a/source_collectors/ckan/ckan_scraper_toolkit.py +++ b/source_collectors/ckan/ckan_scraper_toolkit.py @@ -1,16 +1,14 @@ """Toolkit of functions that use ckanapi to retrieve packages from CKAN data portals""" - +import asyncio import math import sys -import time -from concurrent.futures import as_completed, ThreadPoolExecutor from dataclasses import dataclass, field from datetime import datetime from typing import Any, Optional from urllib.parse import urljoin -import requests -from bs4 import BeautifulSoup +import aiohttp +from bs4 import BeautifulSoup, ResultSet, Tag from source_collectors.ckan.CKANAPIInterface import CKANAPIInterface @@ -46,7 +44,7 @@ def to_dict(self): } -def ckan_package_search( +async def ckan_package_search( base_url: str, query: Optional[str] = None, rows: Optional[int] = sys.maxsize, @@ -69,7 +67,7 @@ def ckan_package_search( while start < rows: num_rows = rows - start + offset - packages: dict = interface.package_search( + packages: dict = await interface.package_search( query=query, rows=num_rows, start=start, **kwargs ) add_base_url_to_packages(base_url, packages) @@ -94,7 +92,7 @@ def add_base_url_to_packages(base_url, packages): [package.update(base_url=base_url) for package in packages["results"]] -def ckan_package_search_from_organization( +async def ckan_package_search_from_organization( base_url: str, organization_id: str ) -> list[dict[str, Any]]: """Returns a list of CKAN packages from an organization. Only 10 packages are able to be returned. @@ -104,22 +102,22 @@ def ckan_package_search_from_organization( :return: List of dictionaries representing the packages associated with the organization. """ interface = CKANAPIInterface(base_url) - organization = interface.get_organization(organization_id) + organization = await interface.get_organization(organization_id) packages = organization["packages"] - results = search_for_results(base_url, packages) + results = await search_for_results(base_url, packages) return results -def search_for_results(base_url, packages): +async def search_for_results(base_url, packages): results = [] for package in packages: query = f"id:{package['id']}" - results += ckan_package_search(base_url=base_url, query=query) + results += await ckan_package_search(base_url=base_url, query=query) return results -def ckan_group_package_show( +async def ckan_group_package_show( base_url: str, id: str, limit: Optional[int] = sys.maxsize ) -> list[dict[str, Any]]: """Returns a list of CKAN packages from a group. @@ -130,13 +128,13 @@ def ckan_group_package_show( :return: List of dictionaries representing the packages associated with the group. """ interface = CKANAPIInterface(base_url) - results = interface.get_group_package(group_package_id=id, limit=limit) + results = await interface.get_group_package(group_package_id=id, limit=limit) # Add the base_url to each package [package.update(base_url=base_url) for package in results] return results -def ckan_collection_search(base_url: str, collection_id: str) -> list[Package]: +async def ckan_collection_search(base_url: str, collection_id: str) -> list[Package]: """Returns a list of CKAN packages from a collection. :param base_url: Base URL of the CKAN portal before the collection ID. e.g. "https://catalog.data.gov/dataset/" @@ -144,50 +142,36 @@ def ckan_collection_search(base_url: str, collection_id: str) -> list[Package]: :return: List of Package objects representing the packages associated with the collection. """ url = f"{base_url}?collection_package_id={collection_id}" - soup = _get_soup(url) + soup = await _get_soup(url) # Calculate the total number of pages of packages num_results = int(soup.find(class_="new-results").text.split()[0].replace(",", "")) pages = math.ceil(num_results / 20) - packages = get_packages(base_url, collection_id, pages) + packages = await get_packages(base_url, collection_id, pages) return packages -def get_packages(base_url, collection_id, pages): +async def get_packages(base_url, collection_id, pages): packages = [] for page in range(1, pages + 1): url = f"{base_url}?collection_package_id={collection_id}&page={page}" - soup = _get_soup(url) + soup = await _get_soup(url) - futures = get_futures(base_url, packages, soup) + packages = [] + for dataset_content in soup.find_all(class_="dataset-content"): + await asyncio.sleep(1) + package = await _collection_search_get_package_data(dataset_content, base_url) + packages.append(package) - # Take a break to avoid being timed out - if len(futures) >= 15: - time.sleep(10) return packages - -def get_futures(base_url: str, packages: list[Package], soup: BeautifulSoup) -> list[Any]: - """Returns a list of futures for the collection search.""" - with ThreadPoolExecutor(max_workers=10) as executor: - futures = [ - executor.submit( - _collection_search_get_package_data, dataset_content, base_url - ) - for dataset_content in soup.find_all(class_="dataset-content") - ] - - [packages.append(package.result()) for package in as_completed(futures)] - return futures - - -def _collection_search_get_package_data(dataset_content, base_url: str): +async def _collection_search_get_package_data(dataset_content, base_url: str): """Parses the dataset content and returns a Package object.""" package = Package() joined_url = urljoin(base_url, dataset_content.a.get("href")) - dataset_soup = _get_soup(joined_url) + dataset_soup = await _get_soup(joined_url) # Determine if the dataset url should be the linked page to an external site or the current site resources = get_resources(dataset_soup) button = get_button(resources) @@ -214,7 +198,9 @@ def get_data(dataset_soup): return dataset_soup.find(property="dct:modified").text.strip() -def get_button(resources): +def get_button(resources: ResultSet) -> Optional[Tag]: + if len(resources) == 0: + return None return resources[0].find(class_="btn-group") @@ -224,7 +210,12 @@ def get_resources(dataset_soup): ) -def set_url_and_data_portal_type(button, joined_url, package, resources): +def set_url_and_data_portal_type( + button: Optional[Tag], + joined_url: str, + package: Package, + resources: ResultSet +): if len(resources) == 1 and button is not None and button.a.text == "Visit page": package.url = button.a.get("href") else: @@ -255,8 +246,9 @@ def set_description(dataset_soup, package): package.description = dataset_soup.find(class_="notes").p.text -def _get_soup(url: str) -> BeautifulSoup: +async def _get_soup(url: str) -> BeautifulSoup: """Returns a BeautifulSoup object for the given URL.""" - time.sleep(1) - response = requests.get(url) - return BeautifulSoup(response.content, "lxml") + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + response.raise_for_status() + return BeautifulSoup(await response.text(), "lxml") diff --git a/source_collectors/ckan/main.py b/source_collectors/ckan/main.py deleted file mode 100644 index cc6f8da7..00000000 --- a/source_collectors/ckan/main.py +++ /dev/null @@ -1,44 +0,0 @@ -from source_collectors.ckan.ckan_scraper_toolkit import ckan_package_search, ckan_group_package_show, \ - ckan_package_search_from_organization -from source_collectors.ckan.scrape_ckan_data_portals import perform_search, get_flat_list, deduplicate_entries, \ - get_collection_child_packages, filter_result, parse_result, write_to_csv -from source_collectors.ckan.search_terms import package_search, group_search, organization_search - - - -def main(): - """ - Main function. - """ - results = [] - - print("Gathering results...") - results = perform_search( - search_func=ckan_package_search, - search_terms=package_search, - results=results, - ) - results = perform_search( - search_func=ckan_group_package_show, - search_terms=group_search, - results=results, - ) - results = perform_search( - search_func=ckan_package_search_from_organization, - search_terms=organization_search, - results=results, - ) - - flat_list = get_flat_list(results) - # Deduplicate entries - flat_list = deduplicate_entries(flat_list) - print("\nRetrieving collections...") - flat_list = get_collection_child_packages(flat_list) - - filtered_results = list(filter(filter_result, flat_list)) - parsed_results = list(map(parse_result, filtered_results)) - - write_to_csv(parsed_results) - -if __name__ == "__main__": - main() diff --git a/source_collectors/ckan/requirements.txt b/source_collectors/ckan/requirements.txt deleted file mode 100644 index fc41154b..00000000 --- a/source_collectors/ckan/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -from_root -ckanapi -bs4 -lxml -tqdm -pandas \ No newline at end of file diff --git a/source_collectors/ckan/schemas.py b/source_collectors/ckan/schemas.py deleted file mode 100644 index 6aeecf09..00000000 --- a/source_collectors/ckan/schemas.py +++ /dev/null @@ -1,6 +0,0 @@ -from marshmallow import Schema, fields - - -class PackageSearchSchema(Schema): - count = fields.Int(required=True) - results = fields.List(fields.Str(), required=True) # TODO: What is the structure of this? \ No newline at end of file diff --git a/source_collectors/ckan/scrape_ckan_data_portals.py b/source_collectors/ckan/scrape_ckan_data_portals.py index 9e0b2ff1..3a292b02 100644 --- a/source_collectors/ckan/scrape_ckan_data_portals.py +++ b/source_collectors/ckan/scrape_ckan_data_portals.py @@ -4,7 +4,6 @@ from itertools import chain from typing import Any, Callable, Optional -import pandas as pd from from_root import from_root from tqdm import tqdm @@ -15,7 +14,7 @@ sys.path.insert(1, str(p)) -def perform_search( +async def perform_search( search_func: Callable, search_terms: list[dict[str, Any]], results: list[dict[str, Any]], @@ -34,44 +33,26 @@ def perform_search( for search in tqdm(search_terms): item_results = [] for item in search[key]: - item_result = search_func(search["url"], item) + item_result = await search_func(search["url"], item) item_results.append(item_result) results += item_results return results -def get_collection_child_packages( - results: list[dict[str, Any]] -) -> list[dict[str, Any]]: - """Retrieves the child packages of each collection. - :param results: List of results. - :return: List of results containing child packages. - """ - new_list = [] - - for result in tqdm(results): - if "extras" in result.keys(): - collections = get_collections(result) - if collections: - new_list += collections[0] - continue - - new_list.append(result) - - return new_list +async def get_collections(result): + if "extras" not in result.keys(): + return [] -def get_collections(result): - collections = [ - ckan_collection_search( - base_url="https://catalog.data.gov/dataset/", - collection_id=result["id"], - ) - for extra in result["extras"] - if parent_package_has_no_resources(extra=extra, result=result) - ] + collections = [] + for extra in result["extras"]: + if parent_package_has_no_resources(extra=extra, result=result): + collections.append(await ckan_collection_search( + base_url="https://catalog.data.gov/dataset/", + collection_id=result["id"], + )) return collections @@ -263,7 +244,3 @@ def deduplicate_entries(flat_list): return flat_list -def write_to_csv(parsed_results): - df = pd.DataFrame(parsed_results) - df.to_csv("results.csv") - diff --git a/source_collectors/common_crawler/CommonCrawler.py b/source_collectors/common_crawler/CommonCrawler.py index 78d986cb..db683611 100644 --- a/source_collectors/common_crawler/CommonCrawler.py +++ b/source_collectors/common_crawler/CommonCrawler.py @@ -1,64 +1,76 @@ +import asyncio import json import time from http import HTTPStatus +from typing import Union from urllib.parse import quote_plus -import requests +import aiohttp from source_collectors.common_crawler.utils import URLWithParameters - -def make_request(search_url: URLWithParameters) -> requests.Response: +async def async_make_request( + search_url: 'URLWithParameters' +) -> Union[aiohttp.ClientResponse, None]: """ - Makes the HTTP GET request to the given search URL. - Return the response if successful, None if rate-limited. + Makes the HTTP GET request to the given search URL using aiohttp. + Return the response if successful, None if rate-limited or failed. """ try: - response = requests.get(str(search_url)) - response.raise_for_status() - return response - except requests.exceptions.RequestException as e: - if ( - response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR - and "SlowDown" in response.text - ): - return None - else: - print(f"Failed to get records: {e}") - return None - - -def process_response( - response: requests.Response, url: str, page: int -) -> list[str] or None: + async with aiohttp.ClientSession() as session: + async with session.get(str(search_url)) as response: + text = await response.text() + if ( + response.status == HTTPStatus.INTERNAL_SERVER_ERROR + and "SlowDown" in text + ): + return None + response.raise_for_status() + # simulate requests.Response interface for downstream compatibility + response.text_content = text # custom attribute for downstream use + response.status_code = response.status + return response + except aiohttp.ClientError as e: + print(f"Failed to get records: {e}") + return None + + +async def make_request( + search_url: 'URLWithParameters' +) -> Union[aiohttp.ClientResponse, None]: + """Synchronous wrapper around the async function.""" + return await async_make_request(search_url) + + +def process_response(response, url: str, page: int) -> Union[list[str], None]: """Processes the HTTP response and returns the parsed records if successful.""" + if response is None: + return None + if response.status_code == HTTPStatus.OK: - records = response.text.strip().split("\n") + records = response.text_content.strip().split("\n") print(f"Found {len(records)} records for {url} on page {page}") results = [] for record in records: d = json.loads(record) results.append(d["url"]) return results - if "First Page is 0, Last Page is 0" in response.text: + + if "First Page is 0, Last Page is 0" in response.text_content: print("No records exist in index matching the url search term") return None + print(f"Unexpected response: {response.status_code}") return None -def get_common_crawl_search_results( - search_url: URLWithParameters, + +async def get_common_crawl_search_results( + search_url: 'URLWithParameters', query_url: str, page: int -) -> list[str] or None: - response = make_request(search_url) - processed_data = process_response( - response=response, - url=query_url, - page=page - ) - # TODO: POINT OF MOCK - return processed_data +) -> Union[list[str], None]: + response = await make_request(search_url) + return process_response(response, query_url, page) @@ -88,10 +100,10 @@ def __init__( self.num_pages = num_pages self.url_results = None - def run(self): + async def run(self): url_results = [] for page in range(self.start_page, self.start_page + self.num_pages): - urls = self.search_common_crawl_index(query_url=self.url, page=page) + urls = await self.search_common_crawl_index(query_url=self.url, page=page) # If records were found, filter them and add to results if not urls: @@ -109,7 +121,7 @@ def run(self): self.url_results = url_results - def search_common_crawl_index( + async def search_common_crawl_index( self, query_url: str, page: int = 0, max_retries: int = 20 ) -> list[str] or None: """ @@ -132,7 +144,7 @@ def search_common_crawl_index( # put HTTP GET request in re-try loop in case of rate limiting. Once per second is nice enough per common crawl doc. while retries < max_retries: - results = get_common_crawl_search_results( + results = await get_common_crawl_search_results( search_url=search_url, query_url=query_url, page=page) if results is not None: return results diff --git a/source_collectors/common_crawler/CommonCrawlerCollector.py b/source_collectors/common_crawler/CommonCrawlerCollector.py index 71365680..eb28d545 100644 --- a/source_collectors/common_crawler/CommonCrawlerCollector.py +++ b/source_collectors/common_crawler/CommonCrawlerCollector.py @@ -1,15 +1,15 @@ -from collector_manager.CollectorBase import CollectorBase +from collector_manager.AsyncCollectorBase import AsyncCollectorBase from collector_manager.enums import CollectorType from core.preprocessors.CommonCrawlerPreprocessor import CommonCrawlerPreprocessor from source_collectors.common_crawler.CommonCrawler import CommonCrawler from source_collectors.common_crawler.DTOs import CommonCrawlerInputDTO -class CommonCrawlerCollector(CollectorBase): +class CommonCrawlerCollector(AsyncCollectorBase): collector_type = CollectorType.COMMON_CRAWLER preprocessor = CommonCrawlerPreprocessor - def run_implementation(self) -> None: + async def run_implementation(self) -> None: print("Running Common Crawler...") dto: CommonCrawlerInputDTO = self.dto common_crawler = CommonCrawler( @@ -17,9 +17,9 @@ def run_implementation(self) -> None: url=dto.url, keyword=dto.search_term, start_page=dto.start_page, - num_pages=dto.total_pages + num_pages=dto.total_pages, ) - for status in common_crawler.run(): - self.log(status) + async for status in common_crawler.run(): + await self.log(status) self.data = {"urls": common_crawler.url_results} \ No newline at end of file diff --git a/source_collectors/common_crawler/README.md b/source_collectors/common_crawler/README.md deleted file mode 100644 index 3701b5d5..00000000 --- a/source_collectors/common_crawler/README.md +++ /dev/null @@ -1,87 +0,0 @@ -# Common Crawler - -This module interfaces with the Common Crawl dataset to extract urls. - -## Installation - -Python Version Required: 3.11 - -To install all necessary dependencies, run the following command from the root directory: - -```bash -pip install -r requirements.txt -``` - - -## Usage Example - -### Environment Requirements - -Please ensure you have a `.env` file located in the root directory (not the `common_crawler` directory) -which contains the following environment variable: - -* HUGGINGFACE_ACCESS_TOKEN = The access token to enable writing to the associated PDAP dataset. -To obtain your access token, consult user settings at -and ensure you have write access to . -* LABEL_STUDIO_ACCESS_TOKEN = The access token for the Label Studio API. This can be - obtained by logging into Label Studio and navigating to the [user account section](https://app.heartex.com/user/account), where the access token can be copied. -* LABEL_STUDIO_PROJECT_ID = The project ID for the Label Studio API. This can be - obtained by logging into Label Studio and navigating to the relevant project, where the project id will be in the URL. - -### Instructions - -Run the following script from the root directory -```bash -python common_crawler/main.py CC-MAIN-2023-50 '*.gov' police --config common_crawler/config.ini --pages 2 -``` - -This example will crawl a single page (typically 15000 records) of the Common Crawl dataset with ID `CC-MAIN-2023-50` -and search for the term `police` in all the pages with the `.gov` domain. It will use the default configuration file `config.ini` -to determine the json cache location and the location of the output csv file. - -Note that the cache records the most recent page number that was used for given combination of Common Crawl ID, url search term, and keyword. -If the same command is run again, it will start from the next page. -If you want to reset the cache, you can use the `--reset-cache` flag. - -By default, the output csv file will be named `urls.csv` and will be located in the `data` directory of the module. -This csv file contains both the url and the parameters used to query it. - -### Parameters - -- **common_crawl_id**: Required. Specifies the Common Crawl Index to perform the search on. -- **url**: Required. Specifies the domain URL to query. Wildcard characters such as * can be used to expand the search. Note that the query must be contained within quotes (as in '*.gov') to prevent misinterpretation of wildcards -- **search_term**: Required. Specifies keyword within the url to search for. -- **-c or --config**: Optional. Specifies the configuration file to use. The default value is config.ini. -- **-p or --pages**: Optional. Specifies the number of pages to search. The default value is 1. -- **--reset-cache**: Optional. If set, it resets the cache before starting the crawl. - -### Configuration - -Several attributes are currently defined in `config.ini`: -- **cache_filename**: This is the name of the cache file. The default value is `cache`. The file will be saved with a `.json` extension. -- **output_filename**: This is the name of the output file. The default value is `urls`. The file will be saved with a `.csv` extension. -- **data_dir**: This is the directory where the cache and output files will be saved. The default value is `data`. -- **huggingface_repo_id**: This is the repository ID for the hugging face dataset which urls will be uploaded to - -## Code Structure - -The code is structured as follows: -- **main.py**: This is the main file that is used to run the module. It contains the logic to parse the command line arguments and call the necessary functions. -- **crawler.py**: This file contains the logic to interface with the Common Crawl dataset and extract urls. -- **cache.py**: This file contains the logic to read and write the cache file. -- **argparser.py**: This file contains the logic to parse the command line and config arguments. -- **csv_manager.py**: This file contains the logic to write the output csv file. -- **utils.py**: This file contains utility functions. -- **config.ini**: This file contains the default configuration values. -- **README.md**: This file contains the documentation for the module. You're reading it right now. Isn't that nifty! - -## Testing - -A suite of unit and integration tests were developed for this module. - -To run the tests, run the following command from this directory: - -```bash -pytest ../tests/test_common_crawler_integration.py -pytest ../tests/test_common_crawler_unit.py -``` \ No newline at end of file diff --git a/source_collectors/common_crawler/argparser.py b/source_collectors/common_crawler/argparser.py deleted file mode 100644 index 67f4a290..00000000 --- a/source_collectors/common_crawler/argparser.py +++ /dev/null @@ -1,95 +0,0 @@ -import argparse -import configparser -import re - -""" -This module contains the argument parser for command line arguments -for the Common Crawler script. -""" - - -def valid_common_crawl_id(common_crawl_id: str) -> bool: - """ - Validate the Common Crawl ID format. - The Common Crawl ID should be in the format CC-MAIN-YYYY-WW. - Args: - common_crawl_id: The Common Crawl ID to validate - Returns: - True if the Common Crawl ID is valid, False otherwise - """ - return re.match(r"CC-MAIN-\d{4}-\d{2}", common_crawl_id) is not None - - -def parse_args() -> argparse.Namespace: - """ - Parse the command line arguments for the Common Crawler script - as well as the configuration file. - Arguments parsed include: - - The Common Crawl ID - - The URL to query - - The search term - - The number of pages to search - - The configuration file (defaults to config.ini) - - A flag to reset the cache - Returns: The parsed arguments - """ - - parser = argparse.ArgumentParser( - description="Query the Common Crawl dataset and optionally save the results to a file." - ) - # Add the required arguments - parser.add_argument("common_crawl_id", type=str, help="The Common Crawl ID") - parser.add_argument("url", type=str, help="The URL to query") - parser.add_argument("keyword", type=str, help="The keyword to search in the url") - # Optional arguments for the number of pages and the output file, and a flag to reset the cache - parser.add_argument( - "-c", - "--config", - type=str, - default="config.ini", - help="The configuration file to use", - ) - parser.add_argument( - "-p", - "--pages", - type=int, - default=1, - help="The number of pages to search (default: 1)", - ) - parser.add_argument( - "--reset-cache", - action="store_true", - default=False, - help="Reset the cache before starting the crawl", - ) - - args = parser.parse_args() - - # Validate the Common Crawl ID format - if not valid_common_crawl_id(args.common_crawl_id): - parser.error( - "Invalid Common Crawl ID format. Expected format is CC-MAIN-YYYY-WW." - ) - - # Read the configuration file - config = configparser.ConfigParser() - config.read(args.config) - - # Combine parsed arguments with configuration file defaults - app_parser = argparse.ArgumentParser(parents=[parser], add_help=False) - app_parser.set_defaults(**config["DEFAULT"]) - - app_args = app_parser.parse_args() - - # Print arguments - print(f"--Common Crawl ID: {app_args.common_crawl_id}") - print(f"--URL: {app_args.url}") - print(f"--Keyword: {app_args.keyword}") - print(f"--Number of Pages: {app_args.pages}") - print(f"--Configuration File: {app_args.config}") - print(f"--Reset Cache: {app_args.reset_cache}") - print(f"--Output File: {app_args.output_filename}.csv") - print(f"--Cache File: {app_args.cache_filename}.json") - print(f"--Data Directory: {app_args.data_dir}") - - return app_args diff --git a/source_collectors/common_crawler/cache.py b/source_collectors/common_crawler/cache.py deleted file mode 100644 index 23d58819..00000000 --- a/source_collectors/common_crawler/cache.py +++ /dev/null @@ -1,93 +0,0 @@ -import json - -from util.miscellaneous_functions import get_file_path - -""" -This module contains classes for managing a cache of Common Crawl search results -These classes include: - - CommonCrawlerCache: a class for managing the cache logic of Common Crawl search results -""" - - -class CommonCrawlerCacheManager: - """ - A class for managing the cache of Common Crawl search results. - This class is responsible for adding, retrieving, and saving cache data. - """ - - def __init__(self, file_name: str = "cache", directory=None): - """ - Initializes the CacheStorage object with a file name and directory. - Args: - file_name: the name of the cache file - directory: the directory to store the cache file - """ - self.file_path = get_file_path(f"{file_name}.json", directory) - print(f"Cache file path: {self.file_path}") - self.cache = self.load_or_create_cache() - - def upsert(self, index: str, url: str, keyword: str, last_page: int) -> None: - """ - Updates the cache with the last page crawled for a given index, url, and keyword. - Or adds a new cache object if it does not exist. - Args: - index: the index of the common crawl - url: the url to search - keyword: the search term to use - last_page: the last page crawled - Returns: None - """ - if index not in self.cache: - self.cache[index] = {} - if url not in self.cache[index]: - self.cache[index][url] = {} - self.cache[index][url][keyword] = last_page - - def get(self, index, url, keyword) -> int: - """ - Retrieves a page number from the cache. - Args: - index: the index of the common crawl - url: the url to search - keyword: the search term to use - - Returns: int - the last page crawled - - """ - if ( - index in self.cache - and url in self.cache[index] - and keyword in self.cache[index][url] - ): - return self.cache[index][url][keyword] - # The cache object does not exist. Return 0 as the default value. - return 0 - - def load_or_create_cache(self) -> dict: - """ - Loads the cache from the configured file path. - If the file does not exist, an empty dictionary is returned. - Returns: dict - the cache data - """ - try: - with open(self.file_path, "r") as file: - return json.load(file) - except FileNotFoundError: - return {} - - def save_cache(self) -> None: - """ - Converts the cache object into a JSON-serializable format and saves it to the configured file path. - This method ensures the cache is stored in a readable and easily reloadable format, allowing for - persistence of crawl data across sessions. - """ - # Reformat cache data for JSON serialization - with open(self.file_path, "w") as file: - json.dump(self.cache, file, indent=4) - - def reset_cache(self) -> None: - """ - Resets the cache to an empty state. - """ - self.cache = {} - print("Cache has been reset.") diff --git a/source_collectors/common_crawler/config.ini b/source_collectors/common_crawler/config.ini deleted file mode 100644 index fc558303..00000000 --- a/source_collectors/common_crawler/config.ini +++ /dev/null @@ -1,19 +0,0 @@ -# This configuration file contains default settings for the Common Crawler application. -# Settings can be modified to suit different environments or testing needs. - -[DEFAULT] -# Filename for the cache. Stores which pages have been crawled -# at which combinations of index, url search term, and keyword -# to avoid re-crawling them. -cache_filename = cache - -# Directory where data files (both cache and output) are stored. -# Change as needed for different environments. -# Path is relative from working directory that executes common_crawler/main.py -data_dir = common_crawler/data - -# Filename for the output CSV containing crawled URLs. -output_filename = urls - -# Name of the huggingface repo -huggingface_repo_id = PDAP/unlabeled-urls \ No newline at end of file diff --git a/source_collectors/common_crawler/csv_manager.py b/source_collectors/common_crawler/csv_manager.py deleted file mode 100644 index 5a80aeaa..00000000 --- a/source_collectors/common_crawler/csv_manager.py +++ /dev/null @@ -1,79 +0,0 @@ -import csv -import os - -from util.miscellaneous_functions import get_file_path - - -class CSVManager: - """ - Manages a CSV file for storing URLs. - Creates the file if it doesn't exist, and provides a method for adding new rows. - """ - - def __init__(self, file_name: str, headers: list[str], directory=None): - """ - Args: - file_name: the name of the CSV file - headers: the headers for the CSV file - directory: the directory to store the CSV file - """ - self.file_path = get_file_path(f"{file_name}.csv", directory) - self.headers = headers - if not os.path.exists(self.file_path): - self.initialize_file() - - def add_row(self, row_values: list[str] | tuple[str]): - """ - Appends a new row of data to the CSV. - Args: - row_values: list of values to add to the csv, in order of their inclusion in the list - """ - if isinstance(row_values, str): - # Single values must be converted to a list format - row_values = [row_values] - try: - with open(self.file_path, mode="a", newline="", encoding="utf-8") as file: - writer = csv.writer(file) - writer.writerow(row_values) - except Exception as e: - print(f"An error occurred while trying to write to {self.file_path}: {e}") - - def add_rows(self, results: list[list[str]]) -> None: - """ - Appends multiple rows of data to the CSV as a list of lists of strings. - Args: - results: list[list[str] - a list of lists of strings, each inner list representing a row - Returns: None - """ - for result in results: - self.add_row(result) - print(f"{len(results)} URLs written to {self.file_path}") - - def initialize_file(self): - """ - Initializes the CSV file. - If the file doesn't exist, it creates it with the header row. - """ - # check if file exists - file_exists = os.path.isfile(self.file_path) - - if not file_exists: - with open(self.file_path, mode="a", newline="", encoding="utf-8") as file: - writer = csv.writer(file) - writer.writerow(self.headers) - else: - # Open and check that headers match - with open(self.file_path, mode="r", encoding="utf-8") as file: - header_row = next(csv.reader(file)) - if header_row != self.headers: - raise ValueError( - f"Header row in {self.file_path} does not match expected headers" - ) - print(f"CSV file initialized at {self.file_path}") - - def delete_file(self): - """ - Deletes the CSV file. - """ - os.remove(self.file_path) - print(f"CSV file deleted at {self.file_path}") diff --git a/source_collectors/common_crawler/data/cache.json b/source_collectors/common_crawler/data/cache.json deleted file mode 100644 index e12687ad..00000000 --- a/source_collectors/common_crawler/data/cache.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "CC-MAIN-2023-50": { - "*.gov": { - "police": 10 - } - } -} \ No newline at end of file diff --git a/source_collectors/common_crawler/data/urls.csv b/source_collectors/common_crawler/data/urls.csv deleted file mode 100644 index 6fc4dc6f..00000000 --- a/source_collectors/common_crawler/data/urls.csv +++ /dev/null @@ -1,207 +0,0 @@ -Index,Search Term,Keyword,Page,URL -CC-MAIN-2023-50,*.gov,police,2,https://acworth-ga.gov/administering-the-oath-of-office-to-a-newly-promoted-member-of-the-police-department/ -CC-MAIN-2023-50,*.gov,police,2,https://www.ada.gov/policevideo/policebroadbandgallery.htm -CC-MAIN-2023-50,*.gov,police,2,https://archive.ada.gov/franklintonpolice.htm -CC-MAIN-2023-50,*.gov,police,2,https://archive.ada.gov/illinois_state_police.htm -CC-MAIN-2023-50,*.gov,police,2,https://www.adamn.gov/p/other/police-department -CC-MAIN-2023-50,*.gov,police,2,https://www.adamscountypa.gov/police/earpd -CC-MAIN-2023-50,*.gov,police,2,https://www.aftonwyoming.gov/government/police_department/index.php -CC-MAIN-2023-50,*.gov,police,6,https://www.akronohio.gov/departments/police/community_relations.php -CC-MAIN-2023-50,*.gov,police,6,https://akronohio.gov/departments/police/community_relations.php -CC-MAIN-2023-50,*.gov,police,6,https://www.akronohio.gov/departments/police/crime_snapshot_statistics.php -CC-MAIN-2023-50,*.gov,police,6,https://akronohio.gov/departments/police/crime_snapshot_statistics.php -CC-MAIN-2023-50,*.gov,police,6,https://www.akronohio.gov/departments/police/index.php -CC-MAIN-2023-50,*.gov,police,6,https://akronohio.gov/departments/police/index.php -CC-MAIN-2023-50,*.gov,police,6,https://www.akronohio.gov/departments/police/investigative_subdivision.php -CC-MAIN-2023-50,*.gov,police,6,https://akronohio.gov/departments/police/investigative_subdivision.php -CC-MAIN-2023-50,*.gov,police,6,https://www.akronohio.gov/departments/police/procedures.php -CC-MAIN-2023-50,*.gov,police,6,https://akronohio.gov/departments/police/procedures.php -CC-MAIN-2023-50,*.gov,police,6,https://www.akronohio.gov/departments/police/recruiting/index.php -CC-MAIN-2023-50,*.gov,police,6,https://akronohio.gov/departments/police/recruiting/index.php -CC-MAIN-2023-50,*.gov,police,6,https://www.akronohio.gov/departments/police/services_subdivision.php -CC-MAIN-2023-50,*.gov,police,6,https://akronohio.gov/departments/police/services_subdivision.php -CC-MAIN-2023-50,*.gov,police,6,https://www.akronohio.gov/departments/police/transparency_hub.php -CC-MAIN-2023-50,*.gov,police,6,https://akronohio.gov/departments/police/transparency_hub.php -CC-MAIN-2023-50,*.gov,police,6,https://www.akronohio.gov/departments/police/uniform_subdivision.php -CC-MAIN-2023-50,*.gov,police,6,https://akronohio.gov/departments/police/uniform_subdivision.php -CC-MAIN-2023-50,*.gov,police,6,https://www.akronohio.gov/departments/police/zone_command.php -CC-MAIN-2023-50,*.gov,police,6,https://akronohio.gov/departments/police/zone_command.php -CC-MAIN-2023-50,*.gov,police,6,https://adeca.alabama.gov/2022/11/14/gov-ivey-announces-grant-to-help-auburn-police-deter-crime/ -CC-MAIN-2023-50,*.gov,police,7,https://governor.alabama.gov/newsroom/2020/02/kimberly-police-officer-nick-orear-flag-memo/ -CC-MAIN-2023-50,*.gov,police,7,https://www.revenue.alabama.gov/de/sales-use/2022-police-jurisdiction-annexations-deannexations-and-ordinances/ -CC-MAIN-2023-50,*.gov,police,7,https://www.revenue.alabama.gov/ko/sales-use/2022-police-jurisdiction-annexations-deannexations-and-ordinances/ -CC-MAIN-2023-50,*.gov,police,7,https://www.revenue.alabama.gov/ko/sales-use/police-jurisdictions/ -CC-MAIN-2023-50,*.gov,police,7,https://www.revenue.alabama.gov/ru/sales-use/2022-police-jurisdiction-annexations-deannexations-and-ordinances/ -CC-MAIN-2023-50,*.gov,police,7,https://www.revenue.alabama.gov/sales-use/2015-police-jurisdiction-annexations-deannexations-ordinances/ -CC-MAIN-2023-50,*.gov,police,7,https://www.revenue.alabama.gov/sales-use/2022-police-jurisdiction-annexations-deannexations-and-ordinances/ -CC-MAIN-2023-50,*.gov,police,7,https://www.revenue.alabama.gov/sales-use/2023-police-jurisdiction-deannexations-ordinances-and-maps/ -CC-MAIN-2023-50,*.gov,police,8,https://tourism.alabama.gov/tag/world-police-and-fire-games/ -CC-MAIN-2023-50,*.gov,police,8,https://www.alamedaca.gov/files/content/public/departments/police-department/community_resources_apd.pdf -CC-MAIN-2023-50,*.gov,police,8,https://www.alamedaca.gov/files/content/public/v/237/departments/police-department/community_resources_apd.pdf -CC-MAIN-2023-50,*.gov,police,8,https://www.alamedaca.gov/files/sharedassets/public/alameda/police/policy-manual.pdf -CC-MAIN-2023-50,*.gov,police,8,http://alamedaca.gov/sites/default/files/department-files/2016-02-02/alameda_police_department_alpr_policy_20160122.pdf -CC-MAIN-2023-50,*.gov,police,8,https://alamedaca.gov/sites/default/files/department-files/2016-02-02/alameda_police_department_alpr_policy_20160122.pdf -CC-MAIN-2023-50,*.gov,police,8,https://www.alamedaca.gov/sites/default/files/department-files/2016-02-02/alameda_police_department_alpr_policy_20160122.pdf -CC-MAIN-2023-50,*.gov,police,8,https://www.alamoheightstx.gov/departments/police/ -CC-MAIN-2023-50,*.gov,police,8,https://www.alamoheightstx.gov/news/stories/peace-officers-memorial-day-and-national-police-week/ -CC-MAIN-2023-50,*.gov,police,8,https://www.alamoheightstx.gov/public-safety/police/police-blotter/ -CC-MAIN-2023-50,*.gov,police,9,https://dot.alaska.gov/anc/airport-police-fire.shtml -CC-MAIN-2023-50,*.gov,police,9,https://dot.alaska.gov/anc/airport-police-fire.shtml -CC-MAIN-2023-50,*.gov,police,9,https://dot.alaska.gov/anc/business/policefire/index.shtml -CC-MAIN-2023-50,*.gov,police,9,https://dot.alaska.gov/anc/business/policefire/jobs/ -CC-MAIN-2023-50,*.gov,police,9,https://dot.alaska.gov/anc/contact-police-fire.shtml -CC-MAIN-2023-50,*.gov,police,9,https://dot.alaska.gov/anc/contact-police-fire.shtml -CC-MAIN-2023-50,*.gov,police,9,https://dot.alaska.gov/anc/police-fire-organization-chart.shtml -CC-MAIN-2023-50,*.gov,police,9,https://dot.alaska.gov/anc/police-fire-organization-chart.shtml -CC-MAIN-2023-50,*.gov,police,9,https://dot.alaska.gov/anc/police.shtml -CC-MAIN-2023-50,*.gov,police,9,https://dot.alaska.gov/faiiap/police-fire/index.shtml -CC-MAIN-2023-50,*.gov,police,10,https://gov.alaska.gov/a-proclamation-on-honoring-united-states-capitol-police-officers/ -CC-MAIN-2023-50,*.gov,police,10,https://geohub.albanyga.gov/datasets/corrected-police-beat -CC-MAIN-2023-50,*.gov,police,10,https://data.albanyny.gov/browse?tags=police+report -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/departments/police/contact-the-albany-police-department -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/departments/police/programs/medication-and-sharps-disposal -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/hr/salary-schedules/police-table -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/images/stories/police/apba/scholarship_packet.pdf -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/images/stories/police/forms/a18_alarm_user_permit_application.pdf -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/images/stories/police/forms/secondhand_dealer.pdf -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/images/stories/police/forms/Solicitor_License.pdf -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/images/stories/police/neighborhood-watch/2013_nw_brochure-update.pdf -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/images/stories/police/property/propertyinventoryrecord-fillable.pdf -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/images/stories/police/sm-smartcards/child_safety_smartcard.pdf -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/images/stories/police/sm-smartcards/facebook_smart_card.pdf -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/images/stories/police/sm-smartcards/linkedln_smart_card.pdf -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/images/stories/police/sm-smartcards/photosharingservices_smartcard.pdf -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/images/stories/police/sm-smartcards/smartphone_smartcard.pdf -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/images/stories/police/sm-smartcards/twitter_smart_card.pdf -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/ -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police -CC-MAIN-2023-50,*.gov,police,10,https://albanyoregon.gov/police -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/accreditation -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/accreditation -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/administration -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/apd-policies -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/apd-policies -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/communications-section -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/communications-section -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/community-resource-unit-cru -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/community-resource-unit-cru -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/community-resource-unit-cru -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/history -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/operations -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/operations -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/quarterly-report -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/quarterly-report -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/records-section -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/support-division -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/support-division -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/about/support-division -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/contact-apd -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/contact-apd -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/contact-apd -CC-MAIN-2023-50,*.gov,police,10,https://albanyoregon.gov/police/contact-apd -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/crime/cold-cases -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/crime/statistics-crime-analysis -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/crime/statistics-crime-analysis -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/crime/statistics-crime-analysis -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/crime/statistics-crime-analysis -CC-MAIN-2023-50,*.gov,police,10,https://albanyoregon.gov/police/crime/statistics-crime-analysis -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/forms -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/forms/2dogs -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/forms/2dogs -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/forms/apbascholarship -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/forms/apbascholarship -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/forms/filing-a-complaint -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/forms/filing-a-complaint -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/forms/home-security-alarm-permits -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/forms/home-security-alarm-permits -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/forms/patch-requests -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/forms/patch-requests -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/forms/property-inventory-record -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/forms/ride-along-requests -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/forms/ride-along-requests -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/animal-control -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/apba -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/community-police-academy -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/medication-and-sharps-disposal -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/national-night-out -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/national-night-out -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/national-night-out -CC-MAIN-2023-50,*.gov,police,10,https://albanyoregon.gov/police/programs/national-night-out -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/neighborhood-speed-watch -CC-MAIN-2023-50,*.gov,police,10,https://albanyoregon.gov/police/programs/neighborhood-speed-watch -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/neighborhood-watch-program -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/resources -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/resources -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/safe-and-secure-seniors-independent -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/safe-and-secure-seniors-independent -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/safereturn -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/safety-camp -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/safety-camp -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/tow -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/tow -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/victim-assistance -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/victim-assistance -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/programs/youthacademy -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/qrcode -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/robots.txt -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/safety/bicycle-theft-prevention-and-safety -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/safety/bicycle-theft-prevention-and-safety -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/safety/child-safety -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/safety/crime-prevention-through-environmental-design-cpted -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/safety/crime-prevention-through-environmental-design-cpted -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/safety/online-social-media-safety-tips -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/safety/protecting-your-business -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/safety/safe-exchange-zones -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/safety/safety-on-the-road -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/safety/vehicle -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/working-at-apd/cadet-program -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/working-at-apd/career-opportunities -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/working-at-apd/lateral-officers -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/working-at-apd/volunteer-program -CC-MAIN-2023-50,*.gov,police,10,https://www.albanyoregon.gov/police/working-at-apd/volunteer-program -CC-MAIN-2023-50,*.gov,police,11,https://alexandriava.gov/news-apd/2022-02-22/alexandria-police-department-makes-arrest-in-connection-to-shots-fired-incident -CC-MAIN-2023-50,*.gov,police,11,https://www.alexandriava.gov/news-apd/2022-03-15/alexandria-police-department-apprehends-assault-suspect -CC-MAIN-2023-50,*.gov,police,11,https://alexandriava.gov/news-apd/2022-03-22/alexandria-police-officer-arrested -CC-MAIN-2023-50,*.gov,police,11,https://alexandriava.gov/news-apd/2022-03-25/alexandria-police-department-investigates-first-homicide-of-the-year -CC-MAIN-2023-50,*.gov,police,11,https://alexandriava.gov/news-apd/2022-04-18/don-hayes-appointed-alexandria-police-chief -CC-MAIN-2023-50,*.gov,police,11,https://alexandriava.gov/news-apd/2022-06-06/alexandria-police-makes-arrest-in-fatal-shooting -CC-MAIN-2023-50,*.gov,police,11,https://www.alexandriava.gov/news-apd/2022-08-29/alexandria-police-department-investigates-serious-crash -CC-MAIN-2023-50,*.gov,police,11,https://www.alexandriava.gov/news-apd/2022-12-21/alexandria-police-department-investigates-shooting-incident -CC-MAIN-2023-50,*.gov,police,11,https://www.alexandriava.gov/news-apd/2023-09-29/apd-lt-graduates-from-dc-police-leadership-academy -CC-MAIN-2023-50,*.gov,police,11,https://www.alexandriava.gov/news-apd/2023-11-17/apd-assists-fairfax-county-police-in-apprehension-of-suspect-driving-stolen -CC-MAIN-2023-50,*.gov,police,11,https://alexandriava.gov/news-apd/2023-11-17/apd-assists-fairfax-county-police-in-apprehension-of-suspect-driving-stolen -CC-MAIN-2023-50,*.gov,police,11,https://alexandriava.gov/police/ -CC-MAIN-2023-50,*.gov,police,11,https://www.alexandriava.gov/police-department/community-police-academy -CC-MAIN-2023-50,*.gov,police,11,https://www.alexandriava.gov/police-department/criminal-investigation-division -CC-MAIN-2023-50,*.gov,police,11,https://www.alexandriava.gov/police-department/listing-page/apd-news-releases -CC-MAIN-2023-50,*.gov,police,11,https://www.alexandriava.gov/police-department/office-of-the-police-chief -CC-MAIN-2023-50,*.gov,police,11,https://www.alexandriava.gov/police-department/other-services -CC-MAIN-2023-50,*.gov,police,11,https://www.alexandriava.gov/police-department/police-services -CC-MAIN-2023-50,*.gov,police,11,http://www3.alexandriava.gov/police/crime_reports/reporter.php -CC-MAIN-2023-50,*.gov,police,11,https://www3.alexandriava.gov/police/crime_reports/reporter.php -CC-MAIN-2023-50,*.gov,police,11,https://www.alexandriava.gov/police/info/default.aspx?id=112991 -CC-MAIN-2023-50,*.gov,police,11,https://alexandriava.gov/police/info/default.aspx?id=24274 -CC-MAIN-2023-50,*.gov,police,11,https://www.alexandriava.gov/police/info/default.aspx?id=59358 -CC-MAIN-2023-50,*.gov,police,11,https://alexandriava.gov/police/info/news_policedisplay.aspx?id=27648 -CC-MAIN-2023-50,*.gov,police,11,https://alexandriava.gov/police/info/news_policedisplay.aspx?id=33624 -CC-MAIN-2023-50,*.gov,police,11,https://alexandriava.gov/police/info/news_policedisplay.aspx?id=68136 -CC-MAIN-2023-50,*.gov,police,11,https://wdc.alexandriava.gov/employment/special-police-officer-listing-3030.aspx -CC-MAIN-2023-50,*.gov,police,11,https://wdc.alexandriava.gov/employment/special-police-officer-listing-4122.aspx -CC-MAIN-2023-50,*.gov,police,11,https://aliquippapa.gov/events/light-up-night-at-the-aliquippa-police-station/ -CC-MAIN-2023-50,*.gov,police,11,https://www.almaarkansas.gov/police/ -CC-MAIN-2023-50,*.gov,police,11,https://www.almontmichigan.gov/departments/police-department/ -CC-MAIN-2023-50,*.gov,police,11,https://altoonapa.gov/contact-forms/departments/police/report-an-abandoned-vehicle-on-public-streets -CC-MAIN-2023-50,*.gov,police,11,https://www.altoonapa.gov/contacts/police/commander-of-criminal-investigation/lt-ashley-day -CC-MAIN-2023-50,*.gov,police,11,https://altoonapa.gov/departments/police/animal-control -CC-MAIN-2023-50,*.gov,police,11,https://altoonapa.gov/departments/police/directory -CC-MAIN-2023-50,*.gov,police,11,https://altoonapa.gov/departments/police/services -CC-MAIN-2023-50,*.gov,police,11,https://alvordtx.gov/police-documents/ -CC-MAIN-2023-50,*.gov,police,11,https://alvordtx.gov/police-staff/ -CC-MAIN-2023-50,*.gov,police,11,https://alvordtx.gov/question/how-do-i-file-a-police-report-2/ -CC-MAIN-2023-50,*.gov,police,11,https://alvordtx.gov/question/who-do-i-call-about-police-related-non-emergencies-2/ -CC-MAIN-2023-50,*.gov,police,11,https://alvordtx.gov/topics/police-courts/ -CC-MAIN-2023-50,*.gov,police,11,http://police.amarillo.gov/robots.txt -CC-MAIN-2023-50,*.gov,police,11,http://police.amarillo.gov/robots.txt -CC-MAIN-2023-50,*.gov,police,11,https://share.america.gov/ar/heres-police-held-accountable-shooting-incidents-video/ diff --git a/source_collectors/common_crawler/main.py b/source_collectors/common_crawler/main.py deleted file mode 100644 index 67bd4c45..00000000 --- a/source_collectors/common_crawler/main.py +++ /dev/null @@ -1,366 +0,0 @@ -import argparse -import collections -import dataclasses -import os -import re -import sys -from datetime import datetime - -from dotenv import load_dotenv - -from source_collectors.common_crawler.argparser import parse_args -from source_collectors.common_crawler.cache import CommonCrawlerCacheManager -from source_collectors.common_crawler.crawler import CommonCrawlResult, CommonCrawlerManager -from source_collectors.common_crawler.csv_manager import CSVManager - -# The below code sets the working directory to be the root of the entire repository -# This is done to solve otherwise quite annoying import issues. -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) - -from util.huggingface_api_manager import HuggingFaceAPIManager -from util.miscellaneous_functions import get_filename_friendly_timestamp -from label_studio_interface.LabelStudioConfig import LabelStudioConfig -from label_studio_interface.LabelStudioAPIManager import LabelStudioAPIManager - -""" -This module contains the main function for the Common Crawler script. -""" - - -@dataclasses.dataclass -class BatchInfo: - """ - Dataclass for batch info - """ - datetime: str - source: str - count: str - keywords: str - notes: str - filename: str - - -class LabelStudioError(Exception): - """Custom exception for Label Studio Errors""" - - pass - - -BATCH_HEADERS = ["Datetime", "Source", "Count", "Keywords", "Notes", "Filename"] - - -def get_current_time(): - """ - Returns the current time - """ - return str(datetime.now()) - - -def add_batch_info_to_csv( - common_crawl_result: CommonCrawlResult, args: argparse.Namespace, last_page: int -) -> BatchInfo: - """ - Adds batch info to CSV - """ - batch_info = BatchInfo( - datetime=get_current_time(), - source="Common Crawl", - count=str(len(common_crawl_result.url_results)), - keywords=f"{args.url} - {args.keyword}", - notes=f"{args.common_crawl_id}, {args.pages} pages, starting at {last_page + 1}", - filename=f"{args.output_filename}_{get_filename_friendly_timestamp()}", - ) - - batch_info_csv_manager = CSVManager( - file_name="batch_info", directory=args.data_dir, headers=BATCH_HEADERS - ) - batch_info_csv_manager.add_row(dataclasses.astuple(batch_info)) - - return batch_info - - -def main(): - """ - Main function - """ - # Parse the arguments - args = parse_args() - - # Initialize the Cache - cache_manager = CommonCrawlerCacheManager( - file_name=args.cache_filename, directory=args.data_dir - ) - - load_dotenv() - - # Initialize the HuggingFace API Manager - hf_access_token = os.getenv("HUGGINGFACE_ACCESS_TOKEN") - if not hf_access_token: - raise ValueError( - "HUGGINGFACE_ACCESS_TOKEN not accessible in .env file in root directory. " - "Please obtain access token from your personal account at " - "https://huggingface.co/settings/tokens and ensure you have write access to " - "https://huggingface.co/PDAP. Then include in .env file in root directory." - ) - huggingface_api_manager = HuggingFaceAPIManager( - access_token=hf_access_token, repo_id=args.huggingface_repo_id - ) - ls_access_token = os.getenv("LABEL_STUDIO_ACCESS_TOKEN") - if not ls_access_token: - raise ValueError( - "LABEL_STUDIO_ACCESS_TOKEN not accessible in .env file in root directory. " - "Please obtain access token from your personal account at " - "https://app.heartex.com/user/account and ensure you have read access to " - "https://app.heartex.com/projects/61550. Then include in .env file in root directory." - ) - ls_project_id = os.getenv("LABEL_STUDIO_PROJECT_ID") - if not ls_project_id: - raise ValueError( - "LABEL_STUDIO_PROJECT_ID not accessible in .env file in root directory. " - "Please obtain a project ID by navigating to the Label Studio project " - "where it will be visibile in the url. Then include in .env file in root directory." - ) - - try: - print("Retrieving Label Studio data for deduplication") - label_studio_results = get_ls_data() - if label_studio_results is None: - raise LabelStudioError("Failed to retrieve Label Studio Data") - print("Label Studio data retrieved successfully") - except LabelStudioError as e: - print(e) - raise - - if args.reset_cache: - cache_manager.reset_cache() - - try: - # Retrieve the last page from the cache, or 0 if it does not exist - last_page = cache_manager.get(args.common_crawl_id, args.url, args.keyword) - common_crawl_result = process_crawl_and_upload( - args, last_page, huggingface_api_manager, label_studio_results - ) - except ValueError as e: - print(f"Error during crawling: {e}") - return - - try: - cache_manager.upsert( - index=args.common_crawl_id, - url=args.url, - keyword=args.keyword, - last_page=common_crawl_result.last_page_search, - ) - cache_manager.save_cache() - - except ValueError as e: - print(f"Error while saving cache manager: {e}") - - -def handle_remote_results_error(remote_results): - """ - Handles errors in the remote results - - Args: remote_results (dict): The results from the label studio project - Raises: LabelStudioError: If an error is found in the remote results - """ - - status_code = remote_results.get("status_code") - if status_code == 401: - raise LabelStudioError("Invalid Label Studio token passed! Exiting...") - elif status_code == 404: - raise LabelStudioError("Invalid Label Studio Project ID! Exiting...") - else: - raise LabelStudioError(f"Unexpected error: {remote_results}") - - -def validate_remote_results(remote_results): - """ - Validates the remote results retrieved from the Label Studio project - - Args: remote_results (dict or list): The results from the Label Studio project - - Returns: - list[dict]: If the remote results are valid - None: If the remote results are invalid - """ - if isinstance(remote_results, list): - if not remote_results: - print("No data in Label Studio project.") - return [] - elif "url" not in remote_results[0]["data"]: - raise LabelStudioError( - "Column 'url' not present in Label Studio project. Exiting..." - ) - else: - return remote_results - elif isinstance(remote_results, dict): - handle_remote_results_error(remote_results) - else: - raise LabelStudioError("Unexpected response type.") - - -def get_ls_data() -> list[dict] | None: - """Retrieves data from a Label Studio project to be used in deduplication of common crawl results. - - Returns: - list[dict] | None: Data from the Labels Studio project or None if the result is invalid. - """ - # Retrieve the data from the Labels Studio project - config = LabelStudioConfig() - api_manager = LabelStudioAPIManager(config) - response = api_manager.import_tasks_from_project(all_tasks=True) - remote_results = response.json() - - return validate_remote_results(remote_results) - - -def strip_url(url: str) -> str: - """Strips http(s)://www. from the beginning of a url if applicable. - - Args: - url (str): The URL to strip. - - Returns: - str: The stripped URL. - """ - result = re.search(r"^(?:https?://)?(?:www\.)?(.*)$", url).group(1) - return result - - -def remove_local_duplicates(url_results: list[str]) -> list[str]: - """Removes duplicate URLs from a list, ignoring http(s)://www. - - Args: - url_results (list[str]): List of URLs to deduplicate. - - Returns: - list[str]: List of unique URLs. - """ - stripped_url_results = [strip_url(url) for url in url_results] - unique_urls = collections.deque() - adjust = 0 - - for index, url in enumerate(stripped_url_results): - if url in unique_urls: - del url_results[index - adjust] - adjust += 1 - else: - unique_urls.appendleft(url) - - return url_results - - -def remove_remote_duplicates( - url_results: list[str], label_studio_data: list[dict] -) -> list[str]: - """Removes URLs from a list that are already present in the Label Studio project, ignoring http(s)://www. - - Args: - url_results (list[str]): List of URLs to deduplicate. - label_studio_data (list[dict]): Label Studio project data to check for duplicates. - - Returns: - list[str]: List of remaining URLs not present in the Label Studio project. - """ - try: - remote_urls = [strip_url(task["data"]["url"]) for task in label_studio_data] - except TypeError: - print( - "Invalid Label Studio credentials. Database could not be checked for duplicates." - ) - return url_results - remote_urls = set(remote_urls) - - stripped_url_results = [strip_url(url) for url in url_results] - adjust = 0 - - for index, url in enumerate(stripped_url_results): - if url in remote_urls: - del url_results[index - adjust] - adjust += 1 - - return url_results - - -def handle_csv_and_upload( - common_crawl_result: CommonCrawlResult, - huggingface_api_manager: HuggingFaceAPIManager, - args: argparse.Namespace, - last_page: int, -): - """ - Handles the CSV file and uploads it to Hugging Face repository. - Args: - common_crawl_result: The result from Common Crawl. - huggingface_api_manager: The Hugging Face API manager. - args: The command-line arguments. - last_page: last page crawled - - """ - batch_info = add_batch_info_to_csv(common_crawl_result, args, last_page) - - csv_manager = CSVManager( - file_name=batch_info.filename, headers=["url"], directory=args.data_dir - ) - csv_manager.add_rows(common_crawl_result.url_results) - huggingface_api_manager.upload_file( - local_file_path=csv_manager.file_path, - repo_file_path=f"{args.output_filename}/{csv_manager.file_path.name}", - ) - print( - f"Uploaded file to Hugging Face repo {huggingface_api_manager.repo_id} at {args.output_filename}/{csv_manager.file_path.name}" - ) - csv_manager.delete_file() - - -def process_crawl_and_upload( - args: argparse.Namespace, - last_page: int, - huggingface_api_manager: HuggingFaceAPIManager, - label_studio_data: list[dict], -) -> CommonCrawlResult: - """ - Processes a crawl and uploads the results to Hugging Face. - """ - # Initialize the CommonCrawlerManager - crawler_manager = CommonCrawlerManager(args.common_crawl_id) - # Determine the pages to search, based on the last page searched - start_page = last_page + 1 - # Use the parsed arguments - common_crawl_result: CommonCrawlResult = crawler_manager.crawl( - search_term=args.url, - keyword=args.keyword, - num_pages=args.pages, - start_page=start_page, - ) - # Logic should conclude here if no results are found - if not common_crawl_result.url_results: - print("No url results found. Ceasing main execution.") - add_batch_info_to_csv(common_crawl_result, args, last_page) - return common_crawl_result - - print("Removing urls already in the database") - common_crawl_result.url_results = remove_local_duplicates( - common_crawl_result.url_results - ) - common_crawl_result.url_results = remove_remote_duplicates( - common_crawl_result.url_results, label_studio_data - ) - if not common_crawl_result.url_results: - print( - "No urls not already present in the database found. Ceasing main execution." - ) - add_batch_info_to_csv(common_crawl_result, args, last_page) - return common_crawl_result - - handle_csv_and_upload(common_crawl_result, huggingface_api_manager, args, last_page) - - return common_crawl_result - - -if __name__ == "__main__": - # Example usage: python main.py CC-MAIN-2023-50 *.gov "police" - # Usage with optional arguments: python main.py CC-MAIN-2023-50 *.gov "police" -p 2 -o police_urls.txt - print("Running Common Crawler...") - main() diff --git a/source_collectors/common_crawler/requirements_common_crawler_action.txt b/source_collectors/common_crawler/requirements_common_crawler_action.txt deleted file mode 100644 index 22823fd0..00000000 --- a/source_collectors/common_crawler/requirements_common_crawler_action.txt +++ /dev/null @@ -1,3 +0,0 @@ -requests~=2.31.0 -python-dotenv~=1.0.1 -huggingface-hub~=0.22.2 \ No newline at end of file diff --git a/source_collectors/common_crawler/schemas.py b/source_collectors/common_crawler/schemas.py deleted file mode 100644 index 608f9632..00000000 --- a/source_collectors/common_crawler/schemas.py +++ /dev/null @@ -1,22 +0,0 @@ -from marshmallow import Schema, fields - - -class CommonCrawlerConfigSchema(Schema): - common_crawl_id = fields.String( - required=True, - description="The Common Crawl ID", - example="CC-MAIN-2022-10" - ) - url = fields.String(required=True, description="The URL to query", example="*.gov") - keyword = fields.String(required=True, description="The keyword to search in the url", example="police") - start_page = fields.Integer(required=False, description="The page to start from", example=1) - pages = fields.Integer(required=False, description="The number of pages to search", example=1) - -class CommonCrawlerOutputSchema(Schema): - urls = fields.List( - fields.String( - required=True - ), - required=True, - description="The list of URLs found in the search" - ) \ No newline at end of file diff --git a/source_collectors/muckrock/.gitignore b/source_collectors/muckrock/.gitignore index 3ad8c498..5047d9bc 100644 --- a/source_collectors/muckrock/.gitignore +++ b/source_collectors/muckrock/.gitignore @@ -226,4 +226,3 @@ flycheck_*.el *.json *.csv /csv -last_page_fetched.txt diff --git a/source_collectors/muckrock/MuckrockAPIInterface.py b/source_collectors/muckrock/MuckrockAPIInterface.py new file mode 100644 index 00000000..703164fc --- /dev/null +++ b/source_collectors/muckrock/MuckrockAPIInterface.py @@ -0,0 +1,51 @@ +from enum import Enum +from typing import Optional + +import requests +from aiohttp import ClientSession +from pydantic import BaseModel + + +class AgencyLookupResponseType(Enum): + FOUND = "found" + NOT_FOUND = "not_found" + ERROR = "error" + +class AgencyLookupResponse(BaseModel): + name: Optional[str] + type: AgencyLookupResponseType + error: Optional[str] = None + + + +class MuckrockAPIInterface: + + def __init__(self, session: Optional[ClientSession] = None): + self.base_url = "https://www.muckrock.com/api_v1/" + self.session = session + + def build_url(self, subpath: str): + return f"{self.base_url}{subpath}" + + + async def lookup_agency(self, muckrock_agency_id: int) -> AgencyLookupResponse: + url = self.build_url(f"agency/{muckrock_agency_id}") + try: + async with self.session.get(url) as results: + results.raise_for_status() + json = await results.json() + name = json["name"] + return AgencyLookupResponse( + name=name, type=AgencyLookupResponseType.FOUND + ) + except requests.exceptions.HTTPError as e: + return AgencyLookupResponse( + name=None, + type=AgencyLookupResponseType.ERROR, + error=str(e) + ) + except KeyError: + return AgencyLookupResponse( + name=None, type=AgencyLookupResponseType.NOT_FOUND + ) + diff --git a/source_collectors/muckrock/README.md b/source_collectors/muckrock/README.md index 43bae80d..a7e75b71 100644 --- a/source_collectors/muckrock/README.md +++ b/source_collectors/muckrock/README.md @@ -4,85 +4,3 @@ This repo provides tools for searching Muckrock FOIA requests, it includes scripts for downloading data from MuckRock, generating CSV files per PDAP database requirements, and automatic labeling -## Installation - -### 1. Clone the `scrapers` repository and navigate to the `muckrock_tools` directory. - -``` -git clone git@github.com:Police-Data-Accessibility-Project/scrapers.git -cd scrapers/scrapers_library/data_portals/muckrock/muckrock_tools -``` - -### 2. Create a virtual environment. - -If you don't already have virtualenv, install the package: - -``` - -pip install virtualenv - -``` - -Then run the following command to create a virtual environment (ensure the python version is as below): - -``` - -virtualenv -p python3.12 venv - -``` - -### 3. Activate the virtual environment. - -``` - -source venv/bin/activate - -``` - -### 4. Install dependencies. - -``` - -pip install -r requirements.txt - -``` - -## Uses - -### 1. Simple Search Term - -- `muck_get.py` -- script to perform searches on MuckRock's database, by matching a search string to title of request. Search is slow due to rate limiting (cannot multi thread around it). - -### 2. Clone Muckrock database & search locally - -- scripts to clone the MuckRock foia requests collection for fast local querying (total size <2GB at present) - -- `create_foia_data_db.py` creates and populates a SQLite database (`foia_data.db`) with all MuckRock foia requests. Various errors outside the scope of this script may occur; a counter (`last_page_fetched.txt`) is created to keep track of the most recent page fetched and inserted into the database. If the program exits prematurely, simply run `create_foia_data_db.py` again to continue where you left off. A log file is created to capture errors for later reference. - -- After `foia_data.db` is created, run `search_foia_data_db.py`, which receives a search string as input and outputs a JSON file with all related FOIA requests for later processing by `generate_detailed_muckrock_csv.py`. For example, - -``` -python3 create_foia_data_db.py - -python3 search_foia_data_db.py --search_for "use of force" -``` - -produces 'use_of_force.json'. - -### 3. County Level Search - -- `get_allegheny_foias.py`, `allegheny_county_towns.txt` -- To search for any and all requests in a certain county (e.g. Allegheny in this case) you must provide a list of all municipalities contained within the county. Muckrock stores geographic info in tiers, from Federal, State, and local level. At the local level, e.g. Pittsburgh and Allegheny County are in the same tier, with no way to determine which municipalities reside within a county (without providing it yourself). - -The `get_allegheny_foias.py` script will find the jurisdiction ID for each municipality in `allegheny_county_towns.txt`, then find all completed FOIA requests for those jurisdictions. - -### 4. Generate detailed FOIA data in PDAP database format - -- `generate_detailed_muckrock_csv.py` -- Once you have a json of relevant FOIA's, run it through this script to generate a CSV that fulfills PDAP database requirements. - -### 5. ML Labeling - -- `muckrock_ml_labeler.py` -- A tool for auto labeling MuckRock sources. This script is using [fine-url-classifier](https://huggingface.co/PDAP/fine-url-classifier) to assign 1 of 36 record type labels. At present, script is expecting each source to have associated header tags, provided via `html-tag-collector/collector.py`. (TODO: For muckrock sources, `collector.py` insufficient, does not grab main text of the request) diff --git a/source_collectors/muckrock/classes/FOIADBSearcher.py b/source_collectors/muckrock/classes/FOIADBSearcher.py deleted file mode 100644 index 391f7a8d..00000000 --- a/source_collectors/muckrock/classes/FOIADBSearcher.py +++ /dev/null @@ -1,65 +0,0 @@ -import os -import sqlite3 - -import pandas as pd - -from source_collectors.muckrock.constants import FOIA_DATA_DB - -check_results_table_query = """ - SELECT name FROM sqlite_master - WHERE (type = 'table') - AND (name = 'results') - """ - -search_foia_query = """ - SELECT * FROM results - WHERE (title LIKE ? OR tags LIKE ?) - AND (status = 'done') - """ - - -class FOIADBSearcher: - - def __init__(self, db_path = FOIA_DATA_DB): - self.db_path = db_path - if not os.path.exists(self.db_path): - raise FileNotFoundError("foia_data.db does not exist.\nRun create_foia_data_db.py first to create and populate it.") - - - def search(self, search_string: str) -> pd.DataFrame | None: - """ - Searches the foia_data.db database for FOIA request entries matching the provided search string. - - Args: - search_string (str): The string to search for in the `title` and `tags` of the `results` table. - - Returns: - Union[pandas.DataFrame, None]: - - pandas.DataFrame: A DataFrame containing the matching entries from the database. - - None: If an error occurs during the database operation. - - Raises: - sqlite3.Error: If any database operation fails, prints error and returns None. - Exception: If any unexpected error occurs, prints error and returns None. - """ - try: - with sqlite3.connect(self.db_path) as conn: - results_table = pd.read_sql_query(check_results_table_query, conn) - if results_table.empty: - print("The `results` table does not exist in the database.") - return None - - df = pd.read_sql_query( - sql=search_foia_query, - con=conn, - params=[f"%{search_string}%", f"%{search_string}%"] - ) - - except sqlite3.Error as e: - print(f"Sqlite error: {e}") - return None - except Exception as e: - print(f"An unexpected error occurred: {e}") - return None - - return df \ No newline at end of file diff --git a/source_collectors/muckrock/classes/FOIASearcher.py b/source_collectors/muckrock/classes/FOIASearcher.py index b4d3abaa..cb3af7e8 100644 --- a/source_collectors/muckrock/classes/FOIASearcher.py +++ b/source_collectors/muckrock/classes/FOIASearcher.py @@ -17,11 +17,11 @@ def __init__(self, fetcher: FOIAFetcher, search_term: Optional[str] = None): self.fetcher = fetcher self.search_term = search_term - def fetch_page(self) -> list[dict] | None: + async def fetch_page(self) -> list[dict] | None: """ Fetches the next page of results using the fetcher. """ - data = self.fetcher.fetch_next_page() + data = await self.fetcher.fetch_next_page() if data is None or data.get("results") is None: return None return data.get("results") @@ -43,7 +43,7 @@ def update_progress(self, pbar: tqdm, results: list[dict]) -> int: pbar.update(num_results) return num_results - def search_to_count(self, max_count: int) -> list[dict]: + async def search_to_count(self, max_count: int) -> list[dict]: """ Fetches and processes results up to a maximum count. """ @@ -52,7 +52,7 @@ def search_to_count(self, max_count: int) -> list[dict]: with tqdm(total=max_count, desc="Fetching results", unit="result") as pbar: while count > 0: try: - results = self.get_next_page_results() + results = await self.get_next_page_results() except SearchCompleteException: break @@ -61,11 +61,11 @@ def search_to_count(self, max_count: int) -> list[dict]: return all_results - def get_next_page_results(self) -> list[dict]: + async def get_next_page_results(self) -> list[dict]: """ Fetches and processes the next page of results. """ - results = self.fetch_page() + results = await self.fetch_page() if not results: raise SearchCompleteException return self.filter_results(results) diff --git a/source_collectors/muckrock/classes/MuckrockCollector.py b/source_collectors/muckrock/classes/MuckrockCollector.py index 8924b116..0511a21d 100644 --- a/source_collectors/muckrock/classes/MuckrockCollector.py +++ b/source_collectors/muckrock/classes/MuckrockCollector.py @@ -1,6 +1,6 @@ import itertools -from collector_manager.CollectorBase import CollectorBase +from collector_manager.AsyncCollectorBase import AsyncCollectorBase from collector_manager.enums import CollectorType from core.preprocessors.MuckrockPreprocessor import MuckrockPreprocessor from source_collectors.muckrock.DTOs import MuckrockAllFOIARequestsCollectorInputDTO, \ @@ -15,7 +15,7 @@ from source_collectors.muckrock.classes.muckrock_fetchers.MuckrockFetcher import MuckrockNoMoreDataError -class MuckrockSimpleSearchCollector(CollectorBase): +class MuckrockSimpleSearchCollector(AsyncCollectorBase): """ Performs searches on MuckRock's database by matching a search string to title of request @@ -29,7 +29,7 @@ def check_for_count_break(self, count, max_count) -> None: if count >= max_count: raise SearchCompleteException - def run_implementation(self) -> None: + async def run_implementation(self) -> None: fetcher = FOIAFetcher() dto: MuckrockSimpleSearchCollectorInputDTO = self.dto searcher = FOIASearcher( @@ -41,15 +41,15 @@ def run_implementation(self) -> None: results_count = 0 for search_count in itertools.count(): try: - results = searcher.get_next_page_results() + results = await searcher.get_next_page_results() all_results.extend(results) results_count += len(results) self.check_for_count_break(results_count, max_count) except SearchCompleteException: break - self.log(f"Search {search_count}: Found {len(results)} results") + await self.log(f"Search {search_count}: Found {len(results)} results") - self.log(f"Search Complete. Total results: {results_count}") + await self.log(f"Search Complete. Total results: {results_count}") self.data = {"urls": self.format_results(all_results)} def format_results(self, results: list[dict]) -> list[dict]: @@ -64,19 +64,19 @@ def format_results(self, results: list[dict]) -> list[dict]: return formatted_results -class MuckrockCountyLevelSearchCollector(CollectorBase): +class MuckrockCountyLevelSearchCollector(AsyncCollectorBase): """ Searches for any and all requests in a certain county """ collector_type = CollectorType.MUCKROCK_COUNTY_SEARCH preprocessor = MuckrockPreprocessor - def run_implementation(self) -> None: - jurisdiction_ids = self.get_jurisdiction_ids() + async def run_implementation(self) -> None: + jurisdiction_ids = await self.get_jurisdiction_ids() if jurisdiction_ids is None: - self.log("No jurisdictions found") + await self.log("No jurisdictions found") return - all_data = self.get_foia_records(jurisdiction_ids) + all_data = await self.get_foia_records(jurisdiction_ids) formatted_data = self.format_data(all_data) self.data = {"urls": formatted_data} @@ -89,19 +89,17 @@ def format_data(self, all_data): }) return formatted_data - def get_foia_records(self, jurisdiction_ids): - # TODO: Mock results here and test separately + async def get_foia_records(self, jurisdiction_ids): all_data = [] for name, id_ in jurisdiction_ids.items(): - self.log(f"Fetching records for {name}...") + await self.log(f"Fetching records for {name}...") request = FOIALoopFetchRequest(jurisdiction=id_) fetcher = FOIALoopFetcher(request) - fetcher.loop_fetch() + await fetcher.loop_fetch() all_data.extend(fetcher.ffm.results) return all_data - def get_jurisdiction_ids(self): - # TODO: Mock results here and test separately + async def get_jurisdiction_ids(self): dto: MuckrockCountySearchCollectorInputDTO = self.dto parent_jurisdiction_id = dto.parent_jurisdiction_id request = JurisdictionLoopFetchRequest( @@ -110,40 +108,39 @@ def get_jurisdiction_ids(self): town_names=dto.town_names ) fetcher = JurisdictionGeneratorFetcher(initial_request=request) - for message in fetcher.generator_fetch(): - self.log(message) + async for message in fetcher.generator_fetch(): + await self.log(message) jurisdiction_ids = fetcher.jfm.jurisdictions return jurisdiction_ids -class MuckrockAllFOIARequestsCollector(CollectorBase): +class MuckrockAllFOIARequestsCollector(AsyncCollectorBase): """ Retrieves urls associated with all Muckrock FOIA requests """ collector_type = CollectorType.MUCKROCK_ALL_SEARCH preprocessor = MuckrockPreprocessor - def run_implementation(self) -> None: + async def run_implementation(self) -> None: dto: MuckrockAllFOIARequestsCollectorInputDTO = self.dto start_page = dto.start_page fetcher = FOIAFetcher( start_page=start_page, ) total_pages = dto.total_pages - all_page_data = self.get_page_data(fetcher, start_page, total_pages) + all_page_data = await self.get_page_data(fetcher, start_page, total_pages) all_transformed_data = self.transform_data(all_page_data) self.data = {"urls": all_transformed_data} - def get_page_data(self, fetcher, start_page, total_pages): - # TODO: Mock results here and test separately + async def get_page_data(self, fetcher, start_page, total_pages): all_page_data = [] for page in range(start_page, start_page + total_pages): - self.log(f"Fetching page {fetcher.current_page}") + await self.log(f"Fetching page {fetcher.current_page}") try: - page_data = fetcher.fetch_next_page() + page_data = await fetcher.fetch_next_page() except MuckrockNoMoreDataError: - self.log(f"No more data to fetch at page {fetcher.current_page}") + await self.log(f"No more data to fetch at page {fetcher.current_page}") break if page_data is None: continue diff --git a/source_collectors/muckrock/classes/SQLiteClient.py b/source_collectors/muckrock/classes/SQLiteClient.py deleted file mode 100644 index 96a59d82..00000000 --- a/source_collectors/muckrock/classes/SQLiteClient.py +++ /dev/null @@ -1,38 +0,0 @@ -import logging -import sqlite3 - - -class SQLClientError(Exception): - pass - - -class SQLiteClient: - - def __init__(self, db_path: str) -> None: - self.conn = sqlite3.connect(db_path) - - def execute_query(self, query: str, many=None): - - try: - if many is not None: - self.conn.executemany(query, many) - else: - self.conn.execute(query) - self.conn.commit() - except sqlite3.Error as e: - print(f"SQLite error: {e}") - error_msg = f"Failed to execute query due to SQLite error: {e}" - logging.error(error_msg) - self.conn.rollback() - raise SQLClientError(error_msg) - -class SQLiteClientContextManager: - - def __init__(self, db_path: str) -> None: - self.client = SQLiteClient(db_path) - - def __enter__(self): - return self.client - - def __exit__(self, exc_type, exc_value, traceback): - self.client.conn.close() \ No newline at end of file diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/AgencyFetcher.py b/source_collectors/muckrock/classes/muckrock_fetchers/AgencyFetcher.py index d3e7364a..e73180df 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/AgencyFetcher.py +++ b/source_collectors/muckrock/classes/muckrock_fetchers/AgencyFetcher.py @@ -11,5 +11,5 @@ class AgencyFetcher(MuckrockFetcher): def build_url(self, request: AgencyFetchRequest) -> str: return f"{BASE_MUCKROCK_URL}/agency/{request.agency_id}/" - def get_agency(self, agency_id: int): - return self.fetch(AgencyFetchRequest(agency_id=agency_id)) \ No newline at end of file + async def get_agency(self, agency_id: int): + return await self.fetch(AgencyFetchRequest(agency_id=agency_id)) \ No newline at end of file diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetcher.py b/source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetcher.py index 526698b7..3a057864 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetcher.py +++ b/source_collectors/muckrock/classes/muckrock_fetchers/FOIAFetcher.py @@ -30,12 +30,12 @@ def __init__(self, start_page: int = 1, per_page: int = 100): def build_url(self, request: FOIAFetchRequest) -> str: return f"{FOIA_BASE_URL}?page={request.page}&page_size={request.page_size}&format=json" - def fetch_next_page(self) -> dict | None: + async def fetch_next_page(self) -> dict | None: """ Fetches data from a specific page of the MuckRock FOIA API. """ page = self.current_page self.current_page += 1 request = FOIAFetchRequest(page=page, page_size=self.per_page) - return self.fetch(request) + return await self.fetch(request) diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionByIDFetcher.py b/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionByIDFetcher.py index c8c467a1..08db97dd 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionByIDFetcher.py +++ b/source_collectors/muckrock/classes/muckrock_fetchers/JurisdictionByIDFetcher.py @@ -11,5 +11,5 @@ class JurisdictionByIDFetcher(MuckrockFetcher): def build_url(self, request: JurisdictionByIDFetchRequest) -> str: return f"{BASE_MUCKROCK_URL}/jurisdiction/{request.jurisdiction_id}/" - def get_jurisdiction(self, jurisdiction_id: int) -> dict: - return self.fetch(request=JurisdictionByIDFetchRequest(jurisdiction_id=jurisdiction_id)) + async def get_jurisdiction(self, jurisdiction_id: int) -> dict: + return await self.fetch(request=JurisdictionByIDFetchRequest(jurisdiction_id=jurisdiction_id)) diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockFetcher.py b/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockFetcher.py index 72ce8336..c1a6eecb 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockFetcher.py +++ b/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockFetcher.py @@ -1,7 +1,9 @@ import abc +import asyncio from abc import ABC import requests +import aiohttp from source_collectors.muckrock.classes.fetch_requests.FetchRequestBase import FetchRequest @@ -12,30 +14,18 @@ class MuckrockNoMoreDataError(Exception): class MuckrockServerError(Exception): pass -def fetch_muckrock_data_from_url(url: str) -> dict | None: - response = requests.get(url) - try: - response.raise_for_status() - except requests.exceptions.HTTPError as e: - print(f"Failed to get records on request `{url}`: {e}") - # If code is 404, raise NoMoreData error - if e.response.status_code == 404: - raise MuckrockNoMoreDataError - if 500 <= e.response.status_code < 600: - raise MuckrockServerError - return None - - # TODO: POINT OF MOCK - data = response.json() - return data - class MuckrockFetcher(ABC): - def fetch(self, request: FetchRequest) -> dict | None: + async def get_async_request(self, url: str) -> dict | None: + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + response.raise_for_status() + return await response.json() + + async def fetch(self, request: FetchRequest) -> dict | None: url = self.build_url(request) - response = requests.get(url) try: - response.raise_for_status() + return await self.get_async_request(url) except requests.exceptions.HTTPError as e: print(f"Failed to get records on request `{url}`: {e}") # If code is 404, raise NoMoreData error @@ -45,10 +35,6 @@ def fetch(self, request: FetchRequest) -> dict | None: raise MuckrockServerError return None - # TODO: POINT OF MOCK - data = response.json() - return data - @abc.abstractmethod def build_url(self, request: FetchRequest) -> str: pass diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockIterFetcherBase.py b/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockIterFetcherBase.py index 30024d36..67253034 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockIterFetcherBase.py +++ b/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockIterFetcherBase.py @@ -1,5 +1,7 @@ +import asyncio from abc import ABC, abstractmethod +import aiohttp import requests from source_collectors.muckrock.classes.exceptions.RequestFailureException import RequestFailureException @@ -11,15 +13,18 @@ class MuckrockIterFetcherBase(ABC): def __init__(self, initial_request: FetchRequest): self.initial_request = initial_request - def get_response(self, url) -> dict: - # TODO: POINT OF MOCK - response = requests.get(url) + async def get_response_async(self, url) -> dict: + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + response.raise_for_status() + return await response.json() + + async def get_response(self, url) -> dict: try: - response.raise_for_status() + return await self.get_response_async(url) except requests.exceptions.HTTPError as e: print(f"Failed to get records on request `{url}`: {e}") raise RequestFailureException - return response.json() @abstractmethod def process_results(self, results: list[dict]): diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockLoopFetcher.py b/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockLoopFetcher.py index 3558b7cd..2e4814a5 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockLoopFetcher.py +++ b/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockLoopFetcher.py @@ -7,11 +7,11 @@ class MuckrockLoopFetcher(MuckrockIterFetcherBase): - def loop_fetch(self): + async def loop_fetch(self): url = self.build_url(self.initial_request) while url is not None: try: - data = self.get_response(url) + data = await self.get_response(url) except RequestFailureException: break diff --git a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockNextFetcher.py b/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockNextFetcher.py index 7c5fd359..889e8446 100644 --- a/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockNextFetcher.py +++ b/source_collectors/muckrock/classes/muckrock_fetchers/MuckrockNextFetcher.py @@ -8,7 +8,7 @@ class MuckrockGeneratorFetcher(MuckrockIterFetcherBase): as a generator instead of a loop """ - def generator_fetch(self) -> str: + async def generator_fetch(self) -> str: """ Fetches data and yields status messages between requests """ @@ -16,7 +16,7 @@ def generator_fetch(self) -> str: final_message = "No more records found. Exiting..." while url is not None: try: - data = self.get_response(url) + data = await self.get_response(url) except RequestFailureException: final_message = "Request unexpectedly failed. Exiting..." break diff --git a/source_collectors/muckrock/constants.py b/source_collectors/muckrock/constants.py index 07dca8f4..f152d8c4 100644 --- a/source_collectors/muckrock/constants.py +++ b/source_collectors/muckrock/constants.py @@ -1,4 +1,3 @@ BASE_MUCKROCK_URL = "https://www.muckrock.com/api_v1" -FOIA_DATA_DB = "foia_data.db" \ No newline at end of file diff --git a/source_collectors/muckrock/create_foia_data_db.py b/source_collectors/muckrock/create_foia_data_db.py deleted file mode 100644 index 9114801c..00000000 --- a/source_collectors/muckrock/create_foia_data_db.py +++ /dev/null @@ -1,260 +0,0 @@ -""" -create_foia_data_db.py - -This script fetches data from the MuckRock FOIA API and stores it in a SQLite database. -Run this prior to companion script `search_foia_data_db.py`. - -A successful run will output a SQLite database `foia_data.db` with one table `results`. -The database will contain all FOIA requests available through MuckRock. - -Functions: - - create_db() - - fetch_page() - - transform_page_data() - - populate_db() - - main() - -Error Handling: -Errors encountered during API requests or database operations are logged to an `errors.log` file -and/or printed to the console. -""" - -import json -import logging -import os -import time -from typing import List, Tuple, Dict, Any - -from tqdm import tqdm - -from source_collectors.muckrock.classes.SQLiteClient import SQLiteClientContextManager, SQLClientError -from source_collectors.muckrock.classes.muckrock_fetchers import FOIAFetcher -from source_collectors.muckrock.classes.muckrock_fetchers.MuckrockFetcher import MuckrockNoMoreDataError - -logging.basicConfig( - filename="errors.log", level=logging.ERROR, format="%(levelname)s: %(message)s" -) - -# TODO: Why are we pulling every single FOIA request? - -last_page_fetched = "last_page_fetched.txt" - -NO_MORE_DATA = -1 # flag for program exit -JSON = Dict[str, Any] # type alias - - -create_table_query = """ - CREATE TABLE IF NOT EXISTS results ( - id INTEGER PRIMARY KEY, - title TEXT, - slug TEXT, - status TEXT, - embargo_status TEXT, - user INTEGER, - username TEXT, - agency INTEGER, - datetime_submitted TEXT, - date_due TEXT, - days_until_due INTEGER, - date_followup TEXT, - datetime_done TEXT, - datetime_updated TEXT, - date_embargo TEXT, - tracking_id TEXT, - price TEXT, - disable_autofollowups BOOLEAN, - tags TEXT, - communications TEXT, - absolute_url TEXT - ) - """ - - -foia_insert_query = """ - INSERT INTO results (id, title, slug, status, embargo_status, user, username, agency, - datetime_submitted, date_due, days_until_due, date_followup, - datetime_done, datetime_updated, date_embargo, tracking_id, - price, disable_autofollowups, tags, communications, absolute_url) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """ - - -def create_db() -> bool: - """ - Creates foia_data.db SQLite database with one table named `results`. - - Returns: - bool: True, if database is successfully created; False otherwise. - - Raises: - sqlite3.Error: If the table creation operation fails, - prints error and returns False. - """ - with SQLiteClientContextManager("foia_data.db") as client: - try: - client.execute_query(create_table_query) - return True - except SQLClientError as e: - print(f"SQLite error: {e}.") - logging.error(f"Failed to create foia_data.db due to SQLite error: {e}") - return False - -def transform_page_data(data_to_transform: JSON) -> List[Tuple[Any, ...]]: - """ - Transforms the data received from the MuckRock FOIA API - into a structured format for insertion into a database with `populate_db()`. - - Transforms JSON input into a list of tuples, - as well as serializes the nested `tags` and `communications` fields - into JSON strings. - - Args: - data_to_transform: The JSON data from the API response. - Returns: - A list of tuples, where each tuple contains the fields - of a single FOIA request. - """ - - transformed_data = [] - - for result in data_to_transform.get("results", []): - result["tags"] = json.dumps(result.get("tags", [])) - result["communications"] = json.dumps(result.get("communications", [])) - - transformed_data.append( - ( - result["id"], - result["title"], - result["slug"], - result["status"], - result["embargo_status"], - result["user"], - result["username"], - result["agency"], - result["datetime_submitted"], - result["date_due"], - result["days_until_due"], - result["date_followup"], - result["datetime_done"], - result["datetime_updated"], - result["date_embargo"], - result["tracking_id"], - result["price"], - result["disable_autofollowups"], - result["tags"], - result["communications"], - result["absolute_url"], - ) - ) - return transformed_data - - -def populate_db(transformed_data: List[Tuple[Any, ...]], page: int) -> None: - """ - Populates foia_data.db SQLite database with the transfomed FOIA request data. - - Args: - transformed_data (List[Tuple[Any, ...]]): A list of tuples, where each tuple contains the fields of a single FOIA request. - page (int): The current page number for printing and logging errors. - - Returns: - None - - Raises: - sqlite3.Error: If the insertion operation fails, attempts to retry operation (max_retries = 2). If retries are - exhausted, logs error and exits. - """ - with SQLiteClientContextManager("foia_data.db") as client: - retries = 0 - max_retries = 2 - while retries < max_retries: - try: - client.execute_query(foia_insert_query, many=transformed_data) - print("Successfully inserted data!") - return - except SQLClientError as e: - print(f"{e}. Retrying...") - retries += 1 - time.sleep(1) - - if retries == max_retries: - report_max_retries_error(max_retries, page) - - -def report_max_retries_error(max_retries, page): - print( - f"Failed to insert data from page {page} after { - max_retries} attempts. Skipping to next page." - ) - logging.error( - f"Failed to insert data from page {page} after { - max_retries} attempts." - ) - - -def main() -> None: - """ - Main entry point for create_foia_data_db.py. - - This function orchestrates the process of fetching - FOIA requests data from the MuckRock FOIA API, transforming it, - and storing it in a SQLite database. - """ - - if not os.path.exists("foia_data.db"): - print("Creating foia_data.db...") - success = create_db() - if success == False: - print("Failed to create foia_data.db") - return - - start_page = get_start_page() - fetcher = FOIAFetcher( - start_page=start_page - ) - - with tqdm(initial=start_page, unit="page") as pbar: - while True: - - # TODO: Build collector that does similar logic - try: - pbar.update() - page_data = fetcher.fetch_next_page() - except MuckrockNoMoreDataError: - # Exit program because no more data exists - break - if page_data is None: - continue - transformed_data = transform_page_data(page_data) - populate_db(transformed_data, fetcher.current_page) - - with open(last_page_fetched, mode="w") as file: - file.write(str(fetcher.current_page)) - - print("create_foia_data_db.py run finished") - - -def get_start_page(): - """ - Returns the page number to start fetching from. - - If the file `last_page_fetched` exists, - reads the page number from the file and returns it + 1. - Otherwise, returns 1. - """ - if os.path.exists(last_page_fetched): - with open(last_page_fetched, mode="r") as file: - page = int(file.read()) + 1 - else: - page = 1 - return page - - -if __name__ == "__main__": - try: - main() - except Exception as e: - logging.error(f"An unexpected error occurred: {e}") - print( - "Check errors.log to review errors. Run create_foia_data_db.py again to continue" - ) diff --git a/source_collectors/muckrock/generate_detailed_muckrock_csv.py b/source_collectors/muckrock/generate_detailed_muckrock_csv.py index 3cb884c0..94e0034f 100644 --- a/source_collectors/muckrock/generate_detailed_muckrock_csv.py +++ b/source_collectors/muckrock/generate_detailed_muckrock_csv.py @@ -67,22 +67,22 @@ def keys(self) -> list[str]: return list(self.model_dump().keys()) -def main(): +async def main(): json_filename = get_json_filename() json_data = load_json_file(json_filename) output_csv = format_filename_json_to_csv(json_filename) - agency_infos = get_agency_infos(json_data) + agency_infos = await get_agency_infos(json_data) write_to_csv(agency_infos, output_csv) -def get_agency_infos(json_data): +async def get_agency_infos(json_data): a_fetcher = AgencyFetcher() j_fetcher = JurisdictionByIDFetcher() agency_infos = [] # Iterate through the JSON data for item in json_data: print(f"Writing data for {item.get('title')}") - agency_data = a_fetcher.get_agency(agency_id=item.get("agency")) + agency_data = await a_fetcher.get_agency(agency_id=item.get("agency")) time.sleep(1) jurisdiction_data = j_fetcher.get_jurisdiction( jurisdiction_id=agency_data.get("jurisdiction") diff --git a/source_collectors/muckrock/get_allegheny_foias.py b/source_collectors/muckrock/get_allegheny_foias.py deleted file mode 100644 index ddfb1d60..00000000 --- a/source_collectors/muckrock/get_allegheny_foias.py +++ /dev/null @@ -1,67 +0,0 @@ -""" -Get Allegheny County FOIA requests -and save them to a JSON file - -""" - -from source_collectors.muckrock.classes.fetch_requests.FOIALoopFetchRequest import FOIALoopFetchRequest -from source_collectors.muckrock.classes.muckrock_fetchers import JurisdictionLoopFetchRequest, \ - JurisdictionLoopFetcher -from source_collectors.muckrock.classes.muckrock_fetchers.FOIALoopFetcher import FOIALoopFetcher -from source_collectors.muckrock.utils import save_json_file - - -def fetch_jurisdiction_ids(town_file, level="l", parent=126): - """ - fetch jurisdiction IDs based on town names from a text file - """ - with open(town_file, "r") as file: - town_names = [line.strip() for line in file] - - request = JurisdictionLoopFetchRequest( - level=level, parent=parent, town_names=town_names - ) - - fetcher = JurisdictionLoopFetcher(request) - fetcher.loop_fetch() - return fetcher.jurisdictions - - - -def fetch_foia_data(jurisdiction_ids): - """ - fetch FOIA data for each jurisdiction ID and save it to a JSON file - """ - all_data = [] - for name, id_ in jurisdiction_ids.items(): - print(f"\nFetching records for {name}...") - request = FOIALoopFetchRequest(jurisdiction=id_) - fetcher = FOIALoopFetcher(request) - fetcher.loop_fetch() - all_data.extend(fetcher.ffm.results) - - # Save the combined data to a JSON file - save_json_file(file_path="foia_data_combined.json", data=all_data) - print(f"Saved {len(all_data)} records to foia_data_combined.json") - - -def main(): - """ - Execute the script - """ - town_file = "allegheny-county-towns.txt" - # Fetch jurisdiction IDs based on town names - jurisdiction_ids = fetch_jurisdiction_ids( - town_file, - level="l", - parent=126 - ) - print(f"Jurisdiction IDs fetched: {jurisdiction_ids}") - - # Fetch FOIA data for each jurisdiction ID - fetch_foia_data(jurisdiction_ids) - - -# Run the main function -if __name__ == "__main__": - main() diff --git a/source_collectors/muckrock/muck_get.py b/source_collectors/muckrock/muck_get.py deleted file mode 100644 index b958b61c..00000000 --- a/source_collectors/muckrock/muck_get.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -A straightforward standalone script for downloading data from MuckRock -and searching for it with a specific search string. -""" -from source_collectors.muckrock.classes.FOIASearcher import FOIASearcher -from source_collectors.muckrock.classes.muckrock_fetchers import FOIAFetcher -from source_collectors.muckrock.utils import save_json_file - -if __name__ == "__main__": - search_term = "use of force" - fetcher = FOIAFetcher() - searcher = FOIASearcher(fetcher=fetcher, search_term=search_term) - results = searcher.search_to_count(20) - json_out_file = search_term.replace(" ", "_") + ".json" - save_json_file(file_path=json_out_file, data=results) - print(f"List dumped into {json_out_file}") diff --git a/source_collectors/muckrock/muckrock_ml_labeler.py b/source_collectors/muckrock/muckrock_ml_labeler.py deleted file mode 100644 index 49af4794..00000000 --- a/source_collectors/muckrock/muckrock_ml_labeler.py +++ /dev/null @@ -1,80 +0,0 @@ -""" -Utilizes a fine-tuned model to label a dataset of URLs. -""" - -import argparse - -import pandas as pd -import torch -from transformers import AutoTokenizer, AutoModelForSequenceClassification - - -def load_dataset_from_command_line() -> pd.DataFrame: - parser = argparse.ArgumentParser(description="Load CSV file into a pandas DataFrame.") - parser.add_argument("--csv_file", type=str, required=True, help="Path to the CSV file") - args = parser.parse_args() - return pd.read_csv(args.csv_file) - - -def create_combined_text_column(df: pd.DataFrame) -> None: - # Combine multiple columns (e.g., 'url', 'html_title', 'h1') into a single text field for each row - columns_to_combine = [ - "url_path", - "html_title", - "h1", - ] # Add other columns here as needed - df["combined_text"] = df[columns_to_combine].apply( - lambda row: " ".join(row.values.astype(str)), axis=1 - ) - - -def get_list_of_combined_texts(df: pd.DataFrame) -> list[str]: - # Convert the combined text into a list - return df["combined_text"].tolist() - - -def save_labeled_muckrock_dataset_to_csv(): - df.to_csv("labeled_muckrock_dataset.csv", index=False) - - -def create_predicted_labels_column(df: pd.DataFrame, predicted_labels: list[str]) -> None: - df["predicted_label"] = predicted_labels - - -def map_predictions_to_labels(model, predictions) -> list[str]: - labels = model.config.id2label - return [labels[int(pred)] for pred in predictions] - - -def get_predicted_labels(texts: list[str]) -> list[str]: - # Load the tokenizer and model - model_name = "PDAP/fine-url-classifier" - tokenizer = AutoTokenizer.from_pretrained(model_name) - - model = AutoModelForSequenceClassification.from_pretrained(model_name) - model.eval() - # Tokenize the inputs - inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt") - # Perform inference - with torch.no_grad(): - outputs = model(**inputs) - # Get the predicted labels - predictions = torch.argmax(outputs.logits, dim=-1) - # Map predictions to labels - predicted_labels = map_predictions_to_labels(model=model, predictions=predictions) - - return predicted_labels - - -if __name__ == "__main__": - df = load_dataset_from_command_line() - # TODO: Check for existence of required columns prior to further processing - create_combined_text_column(df=df) - - texts = get_list_of_combined_texts(df=df) - - predicted_labels = get_predicted_labels(texts=texts) - # Add the predicted labels to the dataframe and save - create_predicted_labels_column(df=df, predicted_labels=predicted_labels) - - save_labeled_muckrock_dataset_to_csv() \ No newline at end of file diff --git a/source_collectors/muckrock/requirements.txt b/source_collectors/muckrock/requirements.txt deleted file mode 100644 index babb4f3e..00000000 --- a/source_collectors/muckrock/requirements.txt +++ /dev/null @@ -1,30 +0,0 @@ -certifi==2024.8.30 -charset-normalizer==3.4.0 -filelock==3.16.1 -fsspec==2024.10.0 -huggingface-hub==0.26.1 -idna==3.10 -Jinja2==3.1.4 -logging==0.4.9.6 -MarkupSafe==3.0.2 -mpmath==1.3.0 -networkx==3.4.2 -numpy==2.1.2 -packaging==24.1 -pandas==2.2.3 -python-dateutil==2.9.0.post0 -pytz==2024.2 -PyYAML==6.0.2 -regex==2024.9.11 -requests==2.32.3 -safetensors==0.4.5 -setuptools==75.2.0 -six==1.16.0 -sympy==1.13.1 -tokenizers==0.20.1 -torch==2.5.0 -tqdm==4.66.5 -transformers==4.46.0 -typing_extensions==4.12.2 -tzdata==2024.2 -urllib3==2.2.3 diff --git a/source_collectors/muckrock/search_foia_data_db.py b/source_collectors/muckrock/search_foia_data_db.py deleted file mode 100644 index ede7d1de..00000000 --- a/source_collectors/muckrock/search_foia_data_db.py +++ /dev/null @@ -1,141 +0,0 @@ -""" -search_foia_data_db.py - -This script provides search functionality for the `foia_data.db` SQLite database. The search looks in `title`s and -`tags` of FOIA requests that match an input string provided by the user. -Run this after companion script `create_foia_data_db.py`. - -A successful run will output a JSON file containing entries matching the search string. - -Functions: - - parser_init() - - search_foia_db() - - parse_communications_column() - - generate_json() - - main() - -Error Handling: -Errors encountered during database operations, JSON parsing, or file writing are printed to the console. -""" - -import argparse -import json -from typing import Union, List, Dict - -import pandas as pd - -from source_collectors.muckrock.classes.FOIADBSearcher import FOIADBSearcher - - -def parser_init() -> argparse.ArgumentParser: - """ - Initializes the argument parser for search_foia_data_db.py. - - Returns: - argparse.ArgumentParser: The configured argument parser. - """ - - parser = argparse.ArgumentParser( - description="Search foia_data.db and generate a JSON file of resulting matches" - ) - parser.add_argument( - "--search_for", - type=str, - required=True, - metavar="", - help="Provide a string to search foia_data.db", - ) - - return parser - - -def search_foia_db(search_string: str) -> Union[pd.DataFrame, None]: - searcher = FOIADBSearcher() - return searcher.search(search_string) - - -def parse_communications_column(communications) -> List[Dict]: - """ - Parses a communications column value, decoding it from JSON format. - - Args: - communications : The input value to be parsed, which can be a JSON string or NaN. - - Returns: - list (List[Dict]): A list containing the parsed JSON data. If the input is NaN (missing values) or - there is a JSON decoding error, an empty list is returned. - - Raises: - json.JSONDecodeError: If deserialization fails, prints error and returns empty list. - """ - - if pd.isna(communications): - return [] - try: - return json.loads(communications) - except json.JSONDecodeError as e: - print(f"Error decoding JSON: {e}") - return [] - - -def generate_json(df: pd.DataFrame, search_string: str) -> None: - """ - Generates a JSON file from a pandas DataFrame. - - Args: - df (pandas.DataFrame): The DataFrame containing the data to be written to the JSON file. - - search_string (str): The string used to name the output JSON file. Spaces in the string - are replaced with underscores. - - Returns: - None - - Raises: - Exception: If writing to JSON file operation fails, prints error and returns. - """ - - output_json = f"{search_string.replace(' ', '_')}.json" - - try: - df.to_json(output_json, orient="records", indent=4) - print(f'Matching entries written to "{output_json}"') - except Exception as e: - print(f"An error occurred while writing JSON: {e}") - - -def main() -> None: - """ - Function to search the foia_data.db database for entries matching a specified search string. - - Command Line Args: - --search_for (str): A string to search for in the `title` and `tags` fields of FOIA requests. - """ - - parser = parser_init() - args = parser.parse_args() - search_string = args.search_for - - df = search_foia_db(search_string) - if df is None: - return - update_communications_column(df) - - announce_matching_entries(df, search_string) - - generate_json(df, search_string) - - -def announce_matching_entries(df, search_string): - print( - f'Found {df.shape[0]} matching entries containing "{search_string}" in the title or tags' - ) - - -def update_communications_column(df): - if not df["communications"].empty: - df["communications"] = df["communications"].apply(parse_communications_column) - - -if __name__ == "__main__": - main() diff --git a/start_mirrored_local_app.py b/start_mirrored_local_app.py new file mode 100644 index 00000000..7bcd573f --- /dev/null +++ b/start_mirrored_local_app.py @@ -0,0 +1,67 @@ +""" +Starts a local instance of the application utilizing a database +mirrored from production. +""" +import uvicorn + +from apply_migrations import apply_migrations +from local_database.DockerInfos import get_database_docker_info, get_source_collector_data_dumper_info +from local_database.classes.DockerManager import DockerManager +from local_database.classes.TimestampChecker import TimestampChecker +from local_database.constants import RESTORE_SH_DOCKER_PATH, DUMP_SH_DOCKER_PATH + + +def main(): + docker_manager = DockerManager() + + # Ensure Dockerfile for database is running, and if not, start it + database_docker_info = get_database_docker_info() + db_container = docker_manager.run_container(database_docker_info) + db_container.wait_for_pg_to_be_ready() + + + # Start dockerfile for Datadumper + data_dumper_docker_info = get_source_collector_data_dumper_info() + + # If not last run within 24 hours, run dump operation in Datadumper + # Check cache if exists and + checker = TimestampChecker() + data_dump_container = docker_manager.run_container(data_dumper_docker_info) + if checker.last_run_within_24_hours(): + print("Last run within 24 hours, skipping dump...") + else: + data_dump_container.run_command( + DUMP_SH_DOCKER_PATH, + ) + data_dump_container.run_command( + RESTORE_SH_DOCKER_PATH, + ) + print("Stopping datadumper container") + data_dump_container.stop() + checker.set_last_run_time() + + # Upgrade using alembic + apply_migrations() + + # Run `fastapi dev main.py` + try: + uvicorn.run( + "api.main:app", + host="0.0.0.0", + port=8000 + ) + finally: + # Add feature to stop all running containers + print("Stopping containers...") + for container in docker_manager.get_containers(): + container.stop() + + print("Containers stopped.") + + + + + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 6181dd50..c8f4bd64 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,16 +1,48 @@ import pytest -from alembic import command from alembic.config import Config -from sqlalchemy import create_engine +from sqlalchemy import create_engine, inspect, MetaData +from sqlalchemy.orm import scoped_session, sessionmaker +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.DatabaseClient import DatabaseClient from collector_db.helper_functions import get_postgres_connection_string from collector_db.models import Base +from core.EnvVarManager import EnvVarManager +from tests.helpers.AlembicRunner import AlembicRunner from tests.helpers.DBDataCreator import DBDataCreator +from util.helper_functions import load_from_environment @pytest.fixture(autouse=True, scope="session") def setup_and_teardown(): + # Set up environment variables that must be defined + # outside of tests + required_env_vars: dict = load_from_environment( + keys=[ + "POSTGRES_USER", + "POSTGRES_PASSWORD", + "POSTGRES_HOST", + "POSTGRES_PORT", + "POSTGRES_DB", + ] + ) + # Add test environment variables + test_env_vars = [ + "GOOGLE_API_KEY", + "GOOGLE_CSE_ID", + "PDAP_EMAIL", + "PDAP_PASSWORD", + "PDAP_API_KEY", + "PDAP_API_URL", + "DISCORD_WEBHOOK_URL", + "OPENAI_API_KEY", + ] + all_env_vars = required_env_vars.copy() + for env_var in test_env_vars: + all_env_vars[env_var] = "TEST" + + EnvVarManager.override(all_env_vars) + conn = get_postgres_connection_string() engine = create_engine(conn) alembic_cfg = Config("alembic.ini") @@ -19,9 +51,35 @@ def setup_and_teardown(): "sqlalchemy.url", get_postgres_connection_string() ) - command.upgrade(alembic_cfg, "head") - engine.dispose() + live_connection = engine.connect() + runner = AlembicRunner( + alembic_config=alembic_cfg, + inspector=inspect(live_connection), + metadata=MetaData(), + connection=live_connection, + session=scoped_session(sessionmaker(bind=live_connection)), + ) + try: + runner.upgrade("head") + except Exception as e: + print("Exception while upgrading: ", e) + print("Resetting schema") + runner.reset_schema() + runner.stamp("base") + runner.upgrade("head") + + yield + try: + runner.downgrade("base") + except Exception as e: + print("Exception while downgrading: ", e) + print("Resetting schema") + runner.reset_schema() + runner.stamp("base") + finally: + live_connection.close() + engine.dispose() @pytest.fixture def wipe_database(): @@ -46,6 +104,13 @@ def db_client_test(wipe_database) -> DatabaseClient: yield db_client db_client.engine.dispose() +@pytest.fixture +def adb_client_test(wipe_database) -> AsyncDatabaseClient: + conn = get_postgres_connection_string(is_async=True) + adb_client = AsyncDatabaseClient(db_url=conn) + yield adb_client + adb_client.engine.dispose() + @pytest.fixture def db_data_creator(db_client_test): db_data_creator = DBDataCreator(db_client=db_client_test) diff --git a/tests/test_alembic/AlembicRunner.py b/tests/helpers/AlembicRunner.py similarity index 62% rename from tests/test_alembic/AlembicRunner.py rename to tests/helpers/AlembicRunner.py index 51347d55..cb435d5a 100644 --- a/tests/test_alembic/AlembicRunner.py +++ b/tests/helpers/AlembicRunner.py @@ -2,7 +2,7 @@ from alembic import command from alembic.config import Config -from sqlalchemy import Connection, Inspector, MetaData, inspect +from sqlalchemy import Connection, Inspector, MetaData, inspect, text from sqlalchemy.orm import scoped_session @@ -21,6 +21,7 @@ def reflect(self): def upgrade(self, revision: str): command.upgrade(self.alembic_config, revision) + self.reflect() def downgrade(self, revision: str): print("Downgrading...") @@ -33,3 +34,16 @@ def reset_schema(self): self.connection.exec_driver_sql("DROP SCHEMA public CASCADE;") self.connection.exec_driver_sql("CREATE SCHEMA public;") self.connection.commit() + + def table_exists(self, table_name: str) -> bool: + return table_name in self.inspector.get_table_names() + + def tables_exist(self, table_names: list[str]) -> bool: + return all(table_name in self.inspector.get_table_names() for table_name in table_names) + + def execute(self, sql: str): + result = self.connection.execute(text(sql)) + if result.cursor is not None: + results = result.fetchall() + self.connection.commit() + return results diff --git a/tests/helpers/AwaitableBarrier.py b/tests/helpers/AwaitableBarrier.py new file mode 100644 index 00000000..8bf65a11 --- /dev/null +++ b/tests/helpers/AwaitableBarrier.py @@ -0,0 +1,13 @@ +import asyncio + + +class AwaitableBarrier: + def __init__(self): + self._event = asyncio.Event() + + async def __call__(self, *args, **kwargs): + await self._event.wait() + + def release(self): + self._event.set() + diff --git a/tests/helpers/DBDataCreator.py b/tests/helpers/DBDataCreator.py index c7fce247..38d70cfe 100644 --- a/tests/helpers/DBDataCreator.py +++ b/tests/helpers/DBDataCreator.py @@ -1,4 +1,9 @@ -from typing import List +import asyncio +from datetime import datetime +from random import randint +from typing import List, Optional + +from pydantic import BaseModel, model_validator from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.DTOs.BatchInfo import BatchInfo @@ -7,44 +12,354 @@ from collector_db.DTOs.URLErrorInfos import URLErrorPydanticInfo from collector_db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo, HTMLContentType from collector_db.DTOs.URLInfo import URLInfo -from collector_db.DTOs.URLMetadataInfo import URLMetadataInfo +from collector_db.DTOs.URLMapping import URLMapping from collector_db.DatabaseClient import DatabaseClient -from collector_db.enums import URLMetadataAttributeType, ValidationStatus, ValidationSource -from collector_manager.enums import CollectorType -from core.enums import BatchStatus +from collector_db.enums import TaskType +from collector_manager.enums import CollectorType, URLStatus +from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo +from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmittedURLInfo +from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO +from core.enums import BatchStatus, SuggestionType, RecordType +from tests.helpers.test_batch_creation_parameters import TestBatchCreationParameters, AnnotationInfo from tests.helpers.simple_test_data_functions import generate_test_urls +class URLCreationInfo(BaseModel): + url_mappings: list[URLMapping] + outcome: URLStatus + annotation_info: Optional[AnnotationInfo] = None + +class BatchURLCreationInfoV2(BaseModel): + batch_id: int + url_creation_infos: dict[URLStatus, URLCreationInfo] + +class BatchURLCreationInfo(BaseModel): + batch_id: int + url_ids: list[int] + urls: list[str] + class DBDataCreator: """ Assists in the creation of test data """ - def __init__(self, db_client: DatabaseClient = DatabaseClient()): - self.db_client = db_client - self.adb_client = AsyncDatabaseClient() + def __init__(self, db_client: Optional[DatabaseClient] = None): + if db_client is not None: + self.db_client = db_client + else: + self.db_client = DatabaseClient() + self.adb_client: AsyncDatabaseClient = AsyncDatabaseClient() - def batch(self): + def batch( + self, + strategy: CollectorType = CollectorType.EXAMPLE, + batch_status: BatchStatus = BatchStatus.IN_PROCESS, + created_at: Optional[datetime] = None + ) -> int: return self.db_client.insert_batch( BatchInfo( - strategy=CollectorType.EXAMPLE.value, - status=BatchStatus.IN_PROCESS, + strategy=strategy.value, + status=batch_status, total_url_count=1, parameters={"test_key": "test_value"}, - user_id=1 + user_id=1, + date_generated=created_at + ) + ) + + async def task(self, url_ids: Optional[list[int]] = None) -> int: + task_id = await self.adb_client.initiate_task(task_type=TaskType.HTML) + if url_ids is not None: + await self.adb_client.link_urls_to_task(task_id=task_id, url_ids=url_ids) + return task_id + + async def batch_v2( + self, + parameters: TestBatchCreationParameters + ) -> BatchURLCreationInfoV2: + batch_id = self.batch( + strategy=parameters.strategy, + batch_status=parameters.outcome, + created_at=parameters.created_at + ) + if parameters.outcome in (BatchStatus.ERROR, BatchStatus.ABORTED): + return BatchURLCreationInfoV2( + batch_id=batch_id, + url_creation_infos={} + ) + + d: dict[URLStatus, URLCreationInfo] = {} + for url_parameters in parameters.urls: + iui: InsertURLsInfo = self.urls( + batch_id=batch_id, + url_count=url_parameters.count, + outcome=url_parameters.status, + created_at=parameters.created_at + ) + url_ids = [iui.url_id for iui in iui.url_mappings] + if url_parameters.with_html_content: + await self.html_data(url_ids) + if url_parameters.annotation_info.has_annotations(): + for url_id in url_ids: + await self.annotate( + url_id=url_id, + annotation_info=url_parameters.annotation_info + ) + + d[url_parameters.status] = URLCreationInfo( + url_mappings=iui.url_mappings, + outcome=url_parameters.status, + annotation_info=url_parameters.annotation_info if url_parameters.annotation_info.has_annotations() else None + ) + return BatchURLCreationInfoV2( + batch_id=batch_id, + url_creation_infos=d + ) + + async def batch_and_urls( + self, + strategy: CollectorType = CollectorType.EXAMPLE, + url_count: int = 3, + with_html_content: bool = False, + batch_status: BatchStatus = BatchStatus.READY_TO_LABEL, + url_status: URLStatus = URLStatus.PENDING + ) -> BatchURLCreationInfo: + batch_id = self.batch( + strategy=strategy, + batch_status=batch_status + ) + if batch_status in (BatchStatus.ERROR, BatchStatus.ABORTED): + return BatchURLCreationInfo( + batch_id=batch_id, + url_ids=[], + urls=[] + ) + iuis: InsertURLsInfo = self.urls( + batch_id=batch_id, + url_count=url_count, + outcome=url_status + ) + url_ids = [iui.url_id for iui in iuis.url_mappings] + if with_html_content: + await self.html_data(url_ids) + + return BatchURLCreationInfo( + batch_id=batch_id, + url_ids=url_ids, + urls=[iui.url for iui in iuis.url_mappings] + ) + + async def agency(self) -> int: + agency_id = randint(1, 99999999) + await self.adb_client.upsert_new_agencies( + suggestions=[ + URLAgencySuggestionInfo( + url_id=-1, + suggestion_type=SuggestionType.UNKNOWN, + pdap_agency_id=agency_id, + agency_name=f"Test Agency {agency_id}", + state=f"Test State {agency_id}", + county=f"Test County {agency_id}", + locality=f"Test Locality {agency_id}" + ) + ] + ) + return agency_id + + async def auto_relevant_suggestions(self, url_id: int, relevant: bool = True): + await self.adb_client.add_auto_relevant_suggestion( + url_id=url_id, + relevant=relevant + ) + + async def annotate(self, url_id: int, annotation_info: AnnotationInfo): + info = annotation_info + if info.user_relevant is not None: + await self.user_relevant_suggestion(url_id=url_id, relevant=info.user_relevant) + if info.auto_relevant is not None: + await self.auto_relevant_suggestions(url_id=url_id, relevant=info.auto_relevant) + if info.user_record_type is not None: + await self.user_record_type_suggestion(url_id=url_id, record_type=info.user_record_type) + if info.auto_record_type is not None: + await self.auto_record_type_suggestions(url_id=url_id, record_type=info.auto_record_type) + if info.user_agency is not None: + await self.agency_user_suggestions(url_id=url_id, agency_id=info.user_agency) + if info.auto_agency is not None: + await self.agency_auto_suggestions(url_id=url_id, count=1, suggestion_type=SuggestionType.AUTO_SUGGESTION) + if info.confirmed_agency is not None: + await self.agency_auto_suggestions(url_id=url_id, count=1, suggestion_type=SuggestionType.CONFIRMED) + if info.final_review_approved is not None: + if info.final_review_approved: + final_review_approval_info = FinalReviewApprovalInfo( + url_id=url_id, + record_type=annotation_info.user_record_type, + agency_ids=[annotation_info.user_agency] if annotation_info.user_agency is not None else None, + description="Test Description", + ) + await self.adb_client.approve_url( + approval_info=final_review_approval_info, + user_id=1 + ) + else: + await self.adb_client.reject_url( + url_id=url_id, + user_id=1 + ) + + + async def user_relevant_suggestion( + self, + url_id: int, + user_id: Optional[int] = None, + relevant: bool = True + ): + if user_id is None: + user_id = randint(1, 99999999) + await self.adb_client.add_user_relevant_suggestion( + url_id=url_id, + user_id=user_id, + relevant=relevant + ) + + async def user_record_type_suggestion( + self, + url_id: int, + record_type: RecordType, + user_id: Optional[int] = None, + ): + if user_id is None: + user_id = randint(1, 99999999) + await self.adb_client.add_user_record_type_suggestion( + url_id=url_id, + user_id=user_id, + record_type=record_type + ) + + async def auto_record_type_suggestions(self, url_id: int, record_type: RecordType): + await self.adb_client.add_auto_record_type_suggestion( + url_id=url_id, + record_type=record_type + ) + + + async def auto_suggestions( + self, + url_ids: list[int], + num_suggestions: int, + suggestion_type: SuggestionType.AUTO_SUGGESTION or SuggestionType.UNKNOWN + ): + allowed_suggestion_types = [SuggestionType.AUTO_SUGGESTION, SuggestionType.UNKNOWN] + if suggestion_type not in allowed_suggestion_types: + raise ValueError(f"suggestion_type must be one of {allowed_suggestion_types}") + if suggestion_type == SuggestionType.UNKNOWN and num_suggestions > 1: + raise ValueError("num_suggestions must be 1 when suggestion_type is unknown") + + for url_id in url_ids: + suggestions = [] + for i in range(num_suggestions): + if suggestion_type == SuggestionType.UNKNOWN: + agency_id = None + else: + agency_id = await self.agency() + suggestion = URLAgencySuggestionInfo( + url_id=url_id, + suggestion_type=suggestion_type, + pdap_agency_id=agency_id + ) + suggestions.append(suggestion) + + await self.adb_client.add_agency_auto_suggestions( + suggestions=suggestions ) + + async def confirmed_suggestions(self, url_ids: list[int]): + for url_id in url_ids: + await self.adb_client.add_confirmed_agency_url_links( + suggestions=[ + URLAgencySuggestionInfo( + url_id=url_id, + suggestion_type=SuggestionType.CONFIRMED, + pdap_agency_id=await self.agency() + ) + ] + ) + + async def manual_suggestion(self, user_id: int, url_id: int, is_new: bool = False): + await self.adb_client.add_agency_manual_suggestion( + agency_id=await self.agency(), + url_id=url_id, + user_id=user_id, + is_new=is_new ) - def urls(self, batch_id: int, url_count: int) -> InsertURLsInfo: + + def urls( + self, + batch_id: int, + url_count: int, + collector_metadata: Optional[dict] = None, + outcome: URLStatus = URLStatus.PENDING, + created_at: Optional[datetime] = None + ) -> InsertURLsInfo: raw_urls = generate_test_urls(url_count) url_infos: List[URLInfo] = [] for url in raw_urls: url_infos.append( URLInfo( url=url, + outcome=outcome, + name="Test Name" if outcome == URLStatus.VALIDATED else None, + collector_metadata=collector_metadata, + created_at=created_at ) ) - return self.db_client.insert_urls(url_infos=url_infos, batch_id=batch_id) + url_insert_info = self.db_client.insert_urls( + url_infos=url_infos, + batch_id=batch_id, + ) + + # If outcome is submitted, also add entry to DataSourceURL + if outcome == URLStatus.SUBMITTED: + submitted_url_infos = [] + for url_id in url_insert_info.url_ids: + submitted_url_info = SubmittedURLInfo( + url_id=url_id, + data_source_id=url_id, # Use same ID for convenience, + request_error=None, + submitted_at=created_at + ) + submitted_url_infos.append(submitted_url_info) + self.db_client.mark_urls_as_submitted(submitted_url_infos) + + + return url_insert_info + + async def url_miscellaneous_metadata( + self, + url_id: int, + name: str = "Test Name", + description: str = "Test Description", + record_formats: Optional[list[str]] = None, + data_portal_type: Optional[str] = "Test Data Portal Type", + supplying_entity: Optional[str] = "Test Supplying Entity" + ): + if record_formats is None: + record_formats = ["Test Record Format", "Test Record Format 2"] + + tdo = URLMiscellaneousMetadataTDO( + url_id=url_id, + collector_metadata={}, + collector_type=CollectorType.EXAMPLE, + record_formats=record_formats, + name=name, + description=description, + data_portal_type=data_portal_type, + supplying_entity=supplying_entity + ) + + await self.adb_client.add_miscellaneous_metadata([tdo]) + def duplicate_urls(self, duplicate_batch_id: int, url_ids: list[int]): """ @@ -80,32 +395,87 @@ async def html_data(self, url_ids: list[int]): ) await self.adb_client.add_html_content_infos(html_content_infos) - async def metadata( + async def error_info( self, url_ids: list[int], - attribute: URLMetadataAttributeType = URLMetadataAttributeType.RELEVANT, - value: str = "False", - validation_status: ValidationStatus = ValidationStatus.PENDING_VALIDATION, - validation_source: ValidationSource = ValidationSource.MACHINE_LEARNING + task_id: Optional[int] = None ): - for url_id in url_ids: - await self.adb_client.add_url_metadata( - URLMetadataInfo( - url_id=url_id, - attribute=attribute, - value=value, - validation_status=validation_status, - validation_source=validation_source, - ) - ) - - async def error_info(self, url_ids: list[int]): + if task_id is None: + task_id = await self.task() error_infos = [] for url_id in url_ids: url_error_info = URLErrorPydanticInfo( url_id=url_id, error="test error", + task_id=task_id ) error_infos.append(url_error_info) await self.adb_client.add_url_error_infos(error_infos) + + async def agency_auto_suggestions( + self, + url_id: int, + count: int, + suggestion_type: SuggestionType = SuggestionType.AUTO_SUGGESTION + ): + if suggestion_type == SuggestionType.UNKNOWN: + count = 1 # Can only be one auto suggestion if unknown + + suggestions = [] + for _ in range(count): + if suggestion_type == SuggestionType.UNKNOWN: + pdap_agency_id = None + else: + pdap_agency_id = await self.agency() + suggestion = URLAgencySuggestionInfo( + url_id=url_id, + suggestion_type=suggestion_type, + pdap_agency_id=pdap_agency_id, + state="Test State", + county="Test County", + locality="Test Locality" + ) + suggestions.append(suggestion) + + await self.adb_client.add_agency_auto_suggestions( + suggestions=suggestions + ) + + async def agency_confirmed_suggestion( + self, + url_id: int + ) -> int: + """ + Creates a confirmed agency suggestion + and returns the auto-generated pdap_agency_id + """ + agency_id = await self.agency() + await self.adb_client.add_confirmed_agency_url_links( + suggestions=[ + URLAgencySuggestionInfo( + url_id=url_id, + suggestion_type=SuggestionType.CONFIRMED, + pdap_agency_id=agency_id + ) + ] + ) + return agency_id + + async def agency_user_suggestions( + self, + url_id: int, + user_id: Optional[int] = None, + agency_id: Optional[int] = None + ): + if user_id is None: + user_id = randint(1, 99999999) + + if agency_id is None: + agency_id = await self.agency() + await self.adb_client.add_agency_manual_suggestion( + agency_id=agency_id, + url_id=url_id, + user_id=user_id, + is_new=False + ) diff --git a/tests/helpers/assert_functions.py b/tests/helpers/assert_functions.py new file mode 100644 index 00000000..ef379d3e --- /dev/null +++ b/tests/helpers/assert_functions.py @@ -0,0 +1,7 @@ +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_db.models import Task + + +async def assert_database_has_no_tasks(adb_client: AsyncDatabaseClient): + tasks = await adb_client.get_all(Task) + assert len(tasks) == 0 \ No newline at end of file diff --git a/tests/helpers/complex_test_data_functions.py b/tests/helpers/complex_test_data_functions.py new file mode 100644 index 00000000..bc03020f --- /dev/null +++ b/tests/helpers/complex_test_data_functions.py @@ -0,0 +1,124 @@ +from typing import Optional + +from pydantic import BaseModel + +from collector_db.DTOs.InsertURLsInfo import InsertURLsInfo +from collector_db.DTOs.URLMapping import URLMapping +from collector_manager.enums import URLStatus +from core.enums import RecordType, SuggestionType +from tests.helpers.DBDataCreator import BatchURLCreationInfo +from tests.helpers.DBDataCreator import DBDataCreator + +class AnnotationSetupInfo(BaseModel): + batch_id: int + insert_urls_info: InsertURLsInfo + +async def setup_for_get_next_url_for_annotation( + db_data_creator: DBDataCreator, + url_count: int, + outcome: URLStatus = URLStatus.PENDING +) -> AnnotationSetupInfo: + batch_id = db_data_creator.batch() + insert_urls_info = db_data_creator.urls( + batch_id=batch_id, + url_count=url_count, + outcome=outcome + ) + await db_data_creator.html_data( + [ + url.url_id for url in insert_urls_info.url_mappings + ] + ) + return AnnotationSetupInfo(batch_id=batch_id, insert_urls_info=insert_urls_info) + +class AnnotateAgencySetupInfo(BaseModel): + batch_id: int + url_ids: list[int] + +async def setup_for_annotate_agency( + db_data_creator: DBDataCreator, + url_count: int, + suggestion_type: SuggestionType = SuggestionType.UNKNOWN, + with_html_content: bool = True +): + buci: BatchURLCreationInfo = await db_data_creator.batch_and_urls( + url_count=url_count, + with_html_content=with_html_content + ) + await db_data_creator.auto_suggestions( + url_ids=buci.url_ids, + num_suggestions=1, + suggestion_type=suggestion_type + ) + + return AnnotateAgencySetupInfo(batch_id=buci.batch_id, url_ids=buci.url_ids) + +class FinalReviewSetupInfo(BaseModel): + batch_id: int + url_mapping: URLMapping + user_agency_id: Optional[int] + +async def setup_for_get_next_url_for_final_review( + db_data_creator: DBDataCreator, + annotation_count: Optional[int] = None, + include_user_annotations: bool = True, + include_miscellaneous_metadata: bool = True +) -> FinalReviewSetupInfo: + """ + Sets up the database to test the final_review functions + Auto-labels the URL with 'relevant=True' and 'record_type=ARREST_RECORDS' + And applies auto-generated user annotations + """ + + batch_id = db_data_creator.batch() + url_mapping = db_data_creator.urls( + batch_id=batch_id, + url_count=1 + ).url_mappings[0] + if include_miscellaneous_metadata: + await db_data_creator.url_miscellaneous_metadata(url_id=url_mapping.url_id) + await db_data_creator.html_data([url_mapping.url_id]) + + async def add_agency_suggestion() -> int: + agency_id = await db_data_creator.agency() + await db_data_creator.agency_user_suggestions( + url_id=url_mapping.url_id, + agency_id=agency_id + ) + return agency_id + + async def add_record_type_suggestion(record_type: RecordType): + await db_data_creator.user_record_type_suggestion( + url_id=url_mapping.url_id, + record_type=record_type + ) + + async def add_relevant_suggestion(relevant: bool): + await db_data_creator.user_relevant_suggestion( + url_id=url_mapping.url_id, + relevant=relevant + ) + + await db_data_creator.auto_relevant_suggestions( + url_id=url_mapping.url_id, + relevant=True + ) + + await db_data_creator.auto_record_type_suggestions( + url_id=url_mapping.url_id, + record_type=RecordType.ARREST_RECORDS + ) + + if include_user_annotations: + await add_relevant_suggestion(False) + await add_record_type_suggestion(RecordType.ACCIDENT_REPORTS) + user_agency_id = await add_agency_suggestion() + else: + user_agency_id = None + + return FinalReviewSetupInfo( + batch_id=batch_id, + url_mapping=url_mapping, + user_agency_id=user_agency_id + ) + diff --git a/tests/helpers/patch_functions.py b/tests/helpers/patch_functions.py new file mode 100644 index 00000000..bb805d29 --- /dev/null +++ b/tests/helpers/patch_functions.py @@ -0,0 +1,10 @@ +from tests.helpers.AwaitableBarrier import AwaitableBarrier + + +async def block_sleep(monkeypatch) -> AwaitableBarrier: + barrier = AwaitableBarrier() + monkeypatch.setattr( + "collector_manager.ExampleCollector.ExampleCollector.sleep", + barrier + ) + return barrier diff --git a/tests/helpers/test_batch_creation_parameters.py b/tests/helpers/test_batch_creation_parameters.py new file mode 100644 index 00000000..cfb4805e --- /dev/null +++ b/tests/helpers/test_batch_creation_parameters.py @@ -0,0 +1,71 @@ +import datetime +from typing import Optional + +from pydantic import BaseModel, model_validator + +from collector_manager.enums import URLStatus, CollectorType +from core.enums import BatchStatus, AnnotationType, RecordType + + +class AnnotationInfo(BaseModel): + user_relevant: Optional[bool] = None + auto_relevant: Optional[bool] = None + user_record_type: Optional[RecordType] = None + auto_record_type: Optional[RecordType] = None + user_agency: Optional[int] = None + auto_agency: Optional[int] = None + confirmed_agency: Optional[int] = None + final_review_approved: Optional[bool] = None + + def has_annotations(self): + return any(value is not None for value in [ + self.user_relevant, + self.auto_relevant, + self.user_record_type, + self.auto_record_type, + self.user_agency, + self.auto_agency, + self.confirmed_agency, + self.final_review_approved + ]) + +class TestURLCreationParameters(BaseModel): + count: int + status: URLStatus = URLStatus.PENDING + with_html_content: bool = False + annotation_info: AnnotationInfo = AnnotationInfo() + + @model_validator(mode='after') + def validate_annotation_info(self): + if self.status == URLStatus.REJECTED: + self.annotation_info.final_review_approved = False + return self + if self.status != URLStatus.VALIDATED: + return self + + # Assume is validated + self.annotation_info.final_review_approved = True + if self.annotation_info.user_record_type is None: + self.annotation_info.user_record_type = RecordType.ARREST_RECORDS + if self.annotation_info.user_agency is None: + self.annotation_info.user_agency = 1 + + + return self + +class TestBatchCreationParameters(BaseModel): + created_at: Optional[datetime.datetime] = None + outcome: BatchStatus = BatchStatus.READY_TO_LABEL + strategy: CollectorType = CollectorType.EXAMPLE + urls: Optional[list[TestURLCreationParameters]] = None + + @model_validator(mode='after') + def validate_urls(self): + if self.outcome != BatchStatus.READY_TO_LABEL: + if self.urls is not None: + raise ValueError('URLs cannot be provided if outcome is not READY_TO_LABEL') + return self + + if self.urls is None: + self.urls = [TestURLCreationParameters(count=1)] + return self \ No newline at end of file diff --git a/tests/manual/agency_identifier/__init__.py b/tests/manual/agency_identifier/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/manual/agency_identifier/test_muckrock_api_interface.py b/tests/manual/agency_identifier/test_muckrock_api_interface.py new file mode 100644 index 00000000..e3a86ed9 --- /dev/null +++ b/tests/manual/agency_identifier/test_muckrock_api_interface.py @@ -0,0 +1,16 @@ +import pytest +from aiohttp import ClientSession + +from source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface + + +@pytest.mark.asyncio +async def test_muckrock_api_interface(): + + async with ClientSession() as session: + muckrock_api_interface = MuckrockAPIInterface(session=session) + + response = await muckrock_api_interface.lookup_agency( + muckrock_agency_id=1 + ) + print(response) diff --git a/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py b/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py index 2489d17f..9e5c0e49 100644 --- a/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py +++ b/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py @@ -1,15 +1,16 @@ import os import dotenv -from tests.automated.core.helpers.common_test_procedures import run_collector_and_wait_for_completion import api.dependencies from collector_db.DTOs.BatchInfo import BatchInfo from collector_manager.enums import CollectorType from core.enums import BatchStatus +from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion def test_auto_googler_collector_lifecycle(test_core): + # TODO: Rework for Async ci = test_core db_client = api.dependencies.db_client @@ -31,7 +32,7 @@ def test_auto_googler_collector_lifecycle(test_core): batch_info: BatchInfo = api.dependencies.db_client.get_batch_by_id(1) assert batch_info.strategy == "auto_googler" - assert batch_info.status == BatchStatus.COMPLETE + assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.total_url_count == 20 url_infos = db_client.get_urls_by_batch(1) diff --git a/tests/manual/core/lifecycle/test_ckan_lifecycle.py b/tests/manual/core/lifecycle/test_ckan_lifecycle.py index 10802c77..4e87bbbd 100644 --- a/tests/manual/core/lifecycle/test_ckan_lifecycle.py +++ b/tests/manual/core/lifecycle/test_ckan_lifecycle.py @@ -1,10 +1,10 @@ -from tests.automated.core.helpers.common_test_procedures import run_collector_and_wait_for_completion import api.dependencies from collector_db.DTOs.BatchInfo import BatchInfo from collector_manager.enums import CollectorType from core.enums import BatchStatus from source_collectors.ckan.search_terms import group_search, package_search, organization_search +from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion def test_ckan_lifecycle(test_core): @@ -24,7 +24,7 @@ def test_ckan_lifecycle(test_core): batch_info: BatchInfo = db_client.get_batch_by_id(1) assert batch_info.strategy == "ckan" - assert batch_info.status == BatchStatus.COMPLETE + assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.total_url_count >= 3000 url_infos = db_client.get_urls_by_batch(1) diff --git a/tests/manual/core/lifecycle/test_common_crawler_lifecycle.py b/tests/manual/core/lifecycle/test_common_crawler_lifecycle.py index d2ee4495..03fe5855 100644 --- a/tests/manual/core/lifecycle/test_common_crawler_lifecycle.py +++ b/tests/manual/core/lifecycle/test_common_crawler_lifecycle.py @@ -34,7 +34,7 @@ def test_common_crawler_lifecycle(test_core: SourceCollectorCore): batch_info = db_client.get_batch_by_id(1) assert batch_info.strategy == "common_crawler" - assert batch_info.status == BatchStatus.COMPLETE + assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.parameters == config url_infos = db_client.get_urls_by_batch(1) diff --git a/tests/manual/core/lifecycle/test_muckrock_lifecycles.py b/tests/manual/core/lifecycle/test_muckrock_lifecycles.py index d92fa0be..72d2d9fc 100644 --- a/tests/manual/core/lifecycle/test_muckrock_lifecycles.py +++ b/tests/manual/core/lifecycle/test_muckrock_lifecycles.py @@ -1,10 +1,10 @@ -from tests.automated.core.helpers.common_test_procedures import run_collector_and_wait_for_completion -from tests.automated.core.helpers.constants import ALLEGHENY_COUNTY_TOWN_NAMES, ALLEGHENY_COUNTY_MUCKROCK_ID import api.dependencies from collector_db.DTOs.BatchInfo import BatchInfo from collector_manager.enums import CollectorType from core.enums import BatchStatus +from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion +from test_automated.integration.core.helpers.constants import ALLEGHENY_COUNTY_MUCKROCK_ID, ALLEGHENY_COUNTY_TOWN_NAMES def test_muckrock_simple_search_collector_lifecycle(test_core): @@ -23,7 +23,7 @@ def test_muckrock_simple_search_collector_lifecycle(test_core): batch_info: BatchInfo = db_client.get_batch_by_id(1) assert batch_info.strategy == "muckrock_simple_search" - assert batch_info.status == BatchStatus.COMPLETE + assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.total_url_count >= 10 url_infos = db_client.get_urls_by_batch(1) @@ -45,7 +45,7 @@ def test_muckrock_county_level_search_collector_lifecycle(test_core): batch_info: BatchInfo = db_client.get_batch_by_id(1) assert batch_info.strategy == "muckrock_county_search" - assert batch_info.status == BatchStatus.COMPLETE + assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.total_url_count >= 10 url_infos = db_client.get_urls_by_batch(1) @@ -67,7 +67,7 @@ def test_muckrock_full_search_collector_lifecycle(test_core): batch_info: BatchInfo = db_client.get_batch_by_id(1) assert batch_info.strategy == CollectorType.MUCKROCK_ALL_SEARCH.value - assert batch_info.status == BatchStatus.COMPLETE + assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.total_url_count >= 1 url_infos = db_client.get_urls_by_batch(1) diff --git a/tests/manual/html_collector/test_html_tag_collector_integration.py b/tests/manual/html_collector/test_html_tag_collector_integration.py index cb803e96..3ffef203 100644 --- a/tests/manual/html_collector/test_html_tag_collector_integration.py +++ b/tests/manual/html_collector/test_html_tag_collector_integration.py @@ -1,9 +1,8 @@ -import polars as pl import pytest from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.DTOs.URLInfo import URLInfo -from core.classes.URLHTMLCycler import URLHTMLCycler +from core.classes.task_operators.URLHTMLTaskOperator import URLHTMLTaskOperator from helpers.DBDataCreator import DBDataCreator from html_tag_collector.ResponseParser import HTMLResponseParser from html_tag_collector.RootURLCache import RootURLCache @@ -43,32 +42,32 @@ async def test_url_html_cycle_live_data( """ Tests the cycle on whatever exists in the DB """ - cycler = URLHTMLCycler( + operator = URLHTMLTaskOperator( adb_client=AsyncDatabaseClient(), url_request_interface=URLRequestInterface(), html_parser=HTMLResponseParser( root_url_cache=RootURLCache() ) ) - await cycler.cycle() + await operator.run_task() @pytest.mark.asyncio async def test_url_html_cycle( db_data_creator: DBDataCreator ): batch_id = db_data_creator.batch() - db_client = db_data_creator.db_client + adb_client: AsyncDatabaseClient = db_data_creator.adb_client url_infos = [] for url in URLS: url_infos.append(URLInfo(url=url)) - db_client.insert_urls(url_infos=url_infos, batch_id=batch_id) + await adb_client.insert_urls(url_infos=url_infos, batch_id=batch_id) - cycler = URLHTMLCycler( - adb_client=AsyncDatabaseClient(), + operator = URLHTMLTaskOperator( + adb_client=adb_client, url_request_interface=URLRequestInterface(), html_parser=HTMLResponseParser( root_url_cache=RootURLCache() ) ) - await cycler.cycle() \ No newline at end of file + await operator.run_task() \ No newline at end of file diff --git a/tests/manual/huggingface/test_hugging_face_interface.py b/tests/manual/huggingface/test_hugging_face_interface.py index b1b86350..08ce8ccd 100644 --- a/tests/manual/huggingface/test_hugging_face_interface.py +++ b/tests/manual/huggingface/test_hugging_face_interface.py @@ -1,13 +1,15 @@ +import pytest + from collector_db.DTOs.URLWithHTML import URLWithHTML from hugging_face.HuggingFaceInterface import HuggingFaceInterface - -def test_get_url_relevancy(): +@pytest.mark.asyncio +async def test_get_url_relevancy(): hfi = HuggingFaceInterface() def package_url(url: str) -> URLWithHTML: return URLWithHTML(url=url, url_id=1, html_infos=[]) - result = hfi.get_url_relevancy([ + result = await hfi.get_url_relevancy_async([ package_url("https://coloradosprings.gov/police-department/article/news/i-25-traffic-safety-deployment-after-stop"), package_url("https://example.com"), package_url("https://police.com") diff --git a/tests/manual/label_studio_interface/test_label_studio_interface_integration.py b/tests/manual/label_studio_interface/test_label_studio_interface_integration.py deleted file mode 100644 index d8e6fdb4..00000000 --- a/tests/manual/label_studio_interface/test_label_studio_interface_integration.py +++ /dev/null @@ -1,73 +0,0 @@ -import pytest - -from label_studio_interface.DTOs.LabelStudioTaskExportInfo import LabelStudioTaskExportInfo -from label_studio_interface.LabelStudioAPIManager import LabelStudioAPIManager, generate_random_word -from label_studio_interface.LabelStudioConfig import LabelStudioConfig - - -# Setup method -@pytest.fixture -def api_manager() -> LabelStudioAPIManager: - config = LabelStudioConfig() - return LabelStudioAPIManager(config) - -# Helper methods -def get_member_role_and_user_id(user_id: str, org_id: str, data: dict) -> tuple[str, int]: - for result in data['results']: - if result['organization'] == int(org_id) and result['user']['username'] == user_id: - return result['role'], result['user']['id'] - -def test_import_tasks_from_project(api_manager): - response = api_manager.import_tasks_from_project() - print(response.json()) - -def test_export_tasks_into_project(api_manager): - data = [] - for _ in range(10): - data.append( - LabelStudioTaskExportInfo(url=f"https://example.com/{generate_random_word(10)}") - ) - import_id = api_manager.export_tasks_into_project(data) - print("Import ID:", import_id) - - -def test_ping_project(api_manager): - project_accessible = api_manager.ping_project() - assert project_accessible - print("Project is accessible") - - -def test_get_members_in_organization(api_manager): - response = api_manager.get_members_in_organization() - assert response.status_code == 200 - print(response.json()) - -def test_update_member_role(api_manager): - # Note that for this test to work, you need to ensure there is seat available for the user in the organization - # A seat can be made available by deactivating a seat from another user - # (Remember to reassign the seat to the user after the test) - from label_studio_interface.LabelStudioAPIManager import Role - username = 'resibe6343' - response = api_manager.get_members_in_organization() - org_id = api_manager.config.organization_id - role, user_id = get_member_role_and_user_id(username, org_id, response.json()) - print(role) - - # Update role to Annotator - response = api_manager.update_member_role(user_id, Role.ANNOTATOR) - assert response.status_code == 200 - response = api_manager.get_members_in_organization() - role, _ = get_member_role_and_user_id(username, org_id, response.json()) - assert role == Role.ANNOTATOR.value - - # Update role to Deactivated - response = api_manager.update_member_role(user_id, Role.DEACTIVATED) - assert response.status_code == 200 - response = api_manager.get_members_in_organization() - role, _ = get_member_role_and_user_id(username, org_id, response.json()) - assert role == Role.DEACTIVATED.value - - - # response = api_manager.update_member_role("user_id", "role") - # assert response.status_code == 200 - # print(response.json()) \ No newline at end of file diff --git a/tests/manual/llm_api_logic/__init__.py b/tests/manual/llm_api_logic/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/manual/llm_api_logic/test_deepseek_record_classifier.py b/tests/manual/llm_api_logic/test_deepseek_record_classifier.py new file mode 100644 index 00000000..b0a6c1fb --- /dev/null +++ b/tests/manual/llm_api_logic/test_deepseek_record_classifier.py @@ -0,0 +1,25 @@ +import pytest + +from collector_db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo +from llm_api_logic.DeepSeekRecordClassifier import DeepSeekRecordClassifier + + +@pytest.mark.asyncio +async def test_deepseek_record_classifier(): + from collector_db.DTOs.URLHTMLContentInfo import HTMLContentType as hct + + d = { + hct.TITLE: "Oath of Office for Newly Promoted Corporal Lumpkin with Acworth Police – City of Acworth, GA", + hct.DESCRIPTION: "At the Thursday, November 2 regular city council meeting, Chief Evans administered the oath of office and swearing in of Corporal Cody Lumpkin. Corporal Lumpkin was surrounded by his family and members of the Acworth Police Department for the occasion. Corporal Lumpkin began employment with the Acworth Police Department on June 8,", + hct.H3: ["Oath of Office for Newly Promoted Corporal Lumpkin with Acworth Police"], + hct.H4: ["Share this on Social Media"], + hct.DIV: "PHONE DIRECTORY RESOURCES Search for: Search Button NEWS DEPARTMENTS GOVERNANCE & DEVELOPMENT Administration Development Clerks Office Court Services DDA, Tourism, and Historic Preservation OPERATIONS Parks, Recreation, and Community Resources Power, Public Works, and Stormwater SUPPORT SERVICES Customer Service Human Resources Finance Information Technology PUBLIC SAFETY Acworth Police RESIDENTS Public Art Master Plan Application for Boards & Commissions Board of Aldermen Customer Service Parks, Recreation, and Community Resources Historic Acworth Master Fee Schedule E-News Sign Up Online Payments BUSINESS Bids & Projects E-Verify Permits, Applications, & Ordinances City Code of Ordinances Master Fee Schedule Start a Business EVENTS VISIT ACWORTH NEWS DEPARTMENTS GOVERNANCE & DEVELOPMENT Administration Development Clerks Office Court Services DDA, Tourism, and Historic Preservation OPERATIONS Parks, Recreation, and Community Resources Power, Public Works, and Stormwater SUPPORT SERVICES Customer Service Human Resources Finance Information Technology PUBLIC SAFETY Acworth Police RESIDENTS Public Art Master Plan Application for Boards & Commissions Board of Aldermen Customer Service Parks, Recreation, and Community Resources Historic Acworth Master Fee Schedule E-News Sign Up Online Payments BUSINESS Bids & Projects E-Verify Permits, Applications, & Ordinances City Code of Ordinances Master Fee Schedule Start a Business EVENTS VISIT ACWORTH Oath of Office for Newly Promoted Corporal Lumpkin with Acworth Police Published On: November 3, 2023 At the Thursday, November 2 regular city council meeting, Chief Evans administered the oath of office and swearing in of Corporal Cody Lumpkin.  Corporal Lumpkin was surrounded by his family and members of the Acworth Police Department for the occasion.  Corporal Lumpkin began employment with the Acworth Police Department on June 8 , 2015, and has served as a patrol officer in addition to time spent time in Special Operations prior to his recent promotion. Share this on Social Media 4415 Center Street, Acworth GA 30101 Phone Directory Contact Us © 2025 City of Acworth Acworth is located in the foothills of the North Georgia mountains and is nestled along the banks of Lake Acworth and Lake Allatoona, hence its nickname “The Lake City.” The city boasts a rich history, a charming downtown, abundant outdoor recreational activities, a vibrant restaurant scene, and an active festival and events calendar. Acworth is one of the best, family-friendly destinations in the Atlanta region. Come discover why You’re Welcome in Acworth! ESS | Webmail | Handbook | Peak | Laserfiche | Login ", + } + content_infos = [] + for content_type, value in d.items(): + content_info = URLHTMLContentInfo(content_type=content_type, content=value) + content_infos.append(content_info) + + classifier = DeepSeekRecordClassifier() + result = await classifier.classify_url(content_infos) + print(result) \ No newline at end of file diff --git a/tests/manual/llm_api_logic/test_openai_record_classifier.py b/tests/manual/llm_api_logic/test_openai_record_classifier.py new file mode 100644 index 00000000..72d474d2 --- /dev/null +++ b/tests/manual/llm_api_logic/test_openai_record_classifier.py @@ -0,0 +1,26 @@ +import pytest + +from collector_db.DTOs.URLHTMLContentInfo import URLHTMLContentInfo +from llm_api_logic.OpenAIRecordClassifier import OpenAIRecordClassifier + + +@pytest.mark.asyncio +async def test_openai_record_classifier(): + from collector_db.DTOs.URLHTMLContentInfo import HTMLContentType as hct + + d = { + hct.TITLE: "Oath of Office for Newly Promoted Corporal Lumpkin with Acworth Police – City of Acworth, GA", + hct.DESCRIPTION: "At the Thursday, November 2 regular city council meeting, Chief Evans administered the oath of office and swearing in of Corporal Cody Lumpkin. Corporal Lumpkin was surrounded by his family and members of the Acworth Police Department for the occasion. Corporal Lumpkin began employment with the Acworth Police Department on June 8,", + hct.H3: ["Oath of Office for Newly Promoted Corporal Lumpkin with Acworth Police"], + hct.H4: ["Share this on Social Media"], + hct.DIV: "PHONE DIRECTORY RESOURCES Search for: Search Button NEWS DEPARTMENTS GOVERNANCE & DEVELOPMENT Administration Development Clerks Office Court Services DDA, Tourism, and Historic Preservation OPERATIONS Parks, Recreation, and Community Resources Power, Public Works, and Stormwater SUPPORT SERVICES Customer Service Human Resources Finance Information Technology PUBLIC SAFETY Acworth Police RESIDENTS Public Art Master Plan Application for Boards & Commissions Board of Aldermen Customer Service Parks, Recreation, and Community Resources Historic Acworth Master Fee Schedule E-News Sign Up Online Payments BUSINESS Bids & Projects E-Verify Permits, Applications, & Ordinances City Code of Ordinances Master Fee Schedule Start a Business EVENTS VISIT ACWORTH NEWS DEPARTMENTS GOVERNANCE & DEVELOPMENT Administration Development Clerks Office Court Services DDA, Tourism, and Historic Preservation OPERATIONS Parks, Recreation, and Community Resources Power, Public Works, and Stormwater SUPPORT SERVICES Customer Service Human Resources Finance Information Technology PUBLIC SAFETY Acworth Police RESIDENTS Public Art Master Plan Application for Boards & Commissions Board of Aldermen Customer Service Parks, Recreation, and Community Resources Historic Acworth Master Fee Schedule E-News Sign Up Online Payments BUSINESS Bids & Projects E-Verify Permits, Applications, & Ordinances City Code of Ordinances Master Fee Schedule Start a Business EVENTS VISIT ACWORTH Oath of Office for Newly Promoted Corporal Lumpkin with Acworth Police Published On: November 3, 2023 At the Thursday, November 2 regular city council meeting, Chief Evans administered the oath of office and swearing in of Corporal Cody Lumpkin.  Corporal Lumpkin was surrounded by his family and members of the Acworth Police Department for the occasion.  Corporal Lumpkin began employment with the Acworth Police Department on June 8 , 2015, and has served as a patrol officer in addition to time spent time in Special Operations prior to his recent promotion. Share this on Social Media 4415 Center Street, Acworth GA 30101 Phone Directory Contact Us © 2025 City of Acworth Acworth is located in the foothills of the North Georgia mountains and is nestled along the banks of Lake Acworth and Lake Allatoona, hence its nickname “The Lake City.” The city boasts a rich history, a charming downtown, abundant outdoor recreational activities, a vibrant restaurant scene, and an active festival and events calendar. Acworth is one of the best, family-friendly destinations in the Atlanta region. Come discover why You’re Welcome in Acworth! ESS | Webmail | Handbook | Peak | Laserfiche | Login ", + } + content_infos = [] + for content_type, value in d.items(): + content_info = URLHTMLContentInfo(content_type=content_type, content=value) + content_infos.append(content_info) + + classifier = OpenAIRecordClassifier() + result = await classifier.classify_url(content_infos) + print(type(result)) + print(result) \ No newline at end of file diff --git a/tests/manual/pdap_client/__init__.py b/tests/manual/pdap_client/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/manual/pdap_client/test_access_manager.py b/tests/manual/pdap_client/test_access_manager.py new file mode 100644 index 00000000..b1245eca --- /dev/null +++ b/tests/manual/pdap_client/test_access_manager.py @@ -0,0 +1,23 @@ +import pytest +from aiohttp import ClientSession + +from pdap_access_manager import AccessManager +from util.helper_functions import get_from_env + + +@pytest.mark.asyncio +async def test_refresh_session(): + async with ClientSession() as session: + access_manager = AccessManager( + email=get_from_env("PDAP_PROD_EMAIL"), + password=get_from_env("PDAP_PROD_PASSWORD"), + api_key=get_from_env("PDAP_API_KEY", allow_none=True), + session=session + ) + old_access_token = await access_manager.access_token + old_refresh_token = await access_manager.refresh_token + await access_manager.refresh_access_token() + new_access_token = await access_manager.access_token + new_refresh_token = await access_manager.refresh_token + assert old_access_token != new_access_token + assert old_refresh_token != new_refresh_token diff --git a/tests/manual/pdap_client/test_pdap_client.py b/tests/manual/pdap_client/test_pdap_client.py new file mode 100644 index 00000000..5d10037c --- /dev/null +++ b/tests/manual/pdap_client/test_pdap_client.py @@ -0,0 +1,39 @@ +import pytest +from aiohttp import ClientSession + +from pdap_access_manager import AccessManager +from pdap_api_client.PDAPClient import PDAPClient +from util.helper_functions import get_from_env + + +@pytest.mark.asyncio +async def test_match_agency(): + + async with ClientSession() as session: + access_manager = AccessManager( + email=get_from_env("PDAP_PROD_EMAIL"), + password=get_from_env("PDAP_PROD_PASSWORD"), + api_key=get_from_env("PDAP_API_KEY", allow_none=True), + session=session + ) + pdap_client = PDAPClient(access_manager=access_manager) + + response = await pdap_client.match_agency(name="police") + + print(response) + +@pytest.mark.asyncio +async def test_check_for_duplicate(): + + async with ClientSession() as session: + access_manager = AccessManager( + email=get_from_env("PDAP_PROD_EMAIL"), + password=get_from_env("PDAP_PROD_PASSWORD"), + api_key=get_from_env("PDAP_API_KEY", allow_none=True), + session=session + ) + pdap_client = PDAPClient(access_manager=access_manager) + + response = await pdap_client.is_url_duplicate(url_to_check="https://example.com") + + print(response) \ No newline at end of file diff --git a/tests/manual/source_collectors/test_autogoogler_collector.py b/tests/manual/source_collectors/test_autogoogler_collector.py index e2c2b8e1..c9942106 100644 --- a/tests/manual/source_collectors/test_autogoogler_collector.py +++ b/tests/manual/source_collectors/test_autogoogler_collector.py @@ -1,21 +1,23 @@ -from unittest.mock import MagicMock +from unittest.mock import MagicMock, AsyncMock -from collector_db.DatabaseClient import DatabaseClient -from core.CoreLogger import CoreLogger +import pytest + +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from core.AsyncCoreLogger import AsyncCoreLogger from source_collectors.auto_googler.AutoGooglerCollector import AutoGooglerCollector from source_collectors.auto_googler.DTOs import AutoGooglerInputDTO - -def test_autogoogler_collector(): +@pytest.mark.asyncio +async def test_autogoogler_collector(): collector = AutoGooglerCollector( batch_id=1, dto=AutoGooglerInputDTO( urls_per_result=5, queries=["police"], ), - logger = MagicMock(spec=CoreLogger), - db_client=MagicMock(spec=DatabaseClient), + logger = AsyncMock(spec=AsyncCoreLogger), + adb_client=AsyncMock(spec=AsyncDatabaseClient), raise_error=True ) - collector.run() + await collector.run() print(collector.data) \ No newline at end of file diff --git a/tests/manual/source_collectors/test_ckan_collector.py b/tests/manual/source_collectors/test_ckan_collector.py index 0fbebfa4..f9deaf02 100644 --- a/tests/manual/source_collectors/test_ckan_collector.py +++ b/tests/manual/source_collectors/test_ckan_collector.py @@ -1,9 +1,10 @@ -from unittest.mock import MagicMock +from unittest.mock import MagicMock, AsyncMock +import pytest from marshmallow import Schema, fields -from collector_db.DatabaseClient import DatabaseClient -from core.CoreLogger import CoreLogger +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from core.AsyncCoreLogger import AsyncCoreLogger from source_collectors.ckan.CKANCollector import CKANCollector from source_collectors.ckan.DTOs import CKANInputDTO from source_collectors.ckan.search_terms import package_search, group_search, organization_search @@ -18,8 +19,8 @@ class CKANSchema(Schema): data_portal_type = fields.String() source_last_updated = fields.String() - -def test_ckan_collector_default(): +@pytest.mark.asyncio +async def test_ckan_collector_default(): collector = CKANCollector( batch_id=1, dto=CKANInputDTO( @@ -29,16 +30,22 @@ def test_ckan_collector_default(): "organization_search": organization_search } ), - logger=MagicMock(spec=CoreLogger), - db_client=MagicMock(spec=DatabaseClient), + logger=AsyncMock(spec=AsyncCoreLogger), + adb_client=AsyncMock(spec=AsyncDatabaseClient), raise_error=True - ) - collector.run() + await collector.run() schema = CKANSchema(many=True) schema.load(collector.data["results"]) + print("Printing results") + print(collector.data["results"]) -def test_ckan_collector_custom(): +@pytest.mark.asyncio +async def test_ckan_collector_custom(): + """ + Use this to test how CKAN behaves when using + something other than the default options provided + """ collector = CKANCollector( batch_id=1, dto=CKANInputDTO( @@ -72,10 +79,10 @@ def test_ckan_collector_custom(): ] } ), - logger=MagicMock(spec=CoreLogger), - db_client=MagicMock(spec=DatabaseClient), + logger=AsyncMock(spec=AsyncCoreLogger), + adb_client=AsyncMock(spec=AsyncDatabaseClient), raise_error=True ) - collector.run() + await collector.run() schema = CKANSchema(many=True) schema.load(collector.data["results"]) \ No newline at end of file diff --git a/tests/manual/source_collectors/test_common_crawler_collector.py b/tests/manual/source_collectors/test_common_crawler_collector.py index 9a7bc5d4..cb1c4f78 100644 --- a/tests/manual/source_collectors/test_common_crawler_collector.py +++ b/tests/manual/source_collectors/test_common_crawler_collector.py @@ -1,9 +1,10 @@ -from unittest.mock import MagicMock +from unittest.mock import MagicMock, AsyncMock +import pytest from marshmallow import Schema, fields -from collector_db.DatabaseClient import DatabaseClient -from core.CoreLogger import CoreLogger +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from core.AsyncCoreLogger import AsyncCoreLogger from source_collectors.common_crawler.CommonCrawlerCollector import CommonCrawlerCollector from source_collectors.common_crawler.DTOs import CommonCrawlerInputDTO @@ -11,12 +12,15 @@ class CommonCrawlerSchema(Schema): urls = fields.List(fields.String()) -def test_common_crawler_collector(): +@pytest.mark.asyncio +async def test_common_crawler_collector(): collector = CommonCrawlerCollector( batch_id=1, dto=CommonCrawlerInputDTO(), - logger=MagicMock(spec=CoreLogger), - db_client=MagicMock(spec=DatabaseClient) + logger=AsyncMock(spec=AsyncCoreLogger), + adb_client=AsyncMock(spec=AsyncDatabaseClient), + raise_error=True ) - collector.run() + await collector.run() + print(collector.data) CommonCrawlerSchema().load(collector.data) diff --git a/tests/manual/source_collectors/test_muckrock_collectors.py b/tests/manual/source_collectors/test_muckrock_collectors.py index 00e1d57e..49bfa5fb 100644 --- a/tests/manual/source_collectors/test_muckrock_collectors.py +++ b/tests/manual/source_collectors/test_muckrock_collectors.py @@ -1,16 +1,20 @@ -from unittest.mock import MagicMock +from unittest.mock import MagicMock, AsyncMock -from collector_db.DatabaseClient import DatabaseClient -from core.CoreLogger import CoreLogger +import pytest + +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from core.AsyncCoreLogger import AsyncCoreLogger from source_collectors.muckrock.DTOs import MuckrockSimpleSearchCollectorInputDTO, \ MuckrockCountySearchCollectorInputDTO, MuckrockAllFOIARequestsCollectorInputDTO from source_collectors.muckrock.classes.MuckrockCollector import MuckrockSimpleSearchCollector, \ MuckrockCountyLevelSearchCollector, MuckrockAllFOIARequestsCollector from source_collectors.muckrock.schemas import MuckrockURLInfoSchema -from test_automated.integration.core.helpers import ALLEGHENY_COUNTY_MUCKROCK_ID, ALLEGHENY_COUNTY_TOWN_NAMES +from tests.test_automated.integration.core.helpers.constants import ALLEGHENY_COUNTY_MUCKROCK_ID, \ + ALLEGHENY_COUNTY_TOWN_NAMES -def test_muckrock_simple_search_collector(): +@pytest.mark.asyncio +async def test_muckrock_simple_search_collector(): collector = MuckrockSimpleSearchCollector( batch_id=1, @@ -18,44 +22,51 @@ def test_muckrock_simple_search_collector(): search_string="police", max_results=10 ), - logger=MagicMock(spec=CoreLogger), - db_client=MagicMock(spec=DatabaseClient), + logger=AsyncMock(spec=AsyncCoreLogger), + adb_client=AsyncMock(spec=AsyncDatabaseClient), raise_error=True ) - collector.run() + await collector.run() schema = MuckrockURLInfoSchema(many=True) schema.load(collector.data["urls"]) assert len(collector.data["urls"]) >= 10 + print(collector.data) -def test_muckrock_county_level_search_collector(): +@pytest.mark.asyncio +async def test_muckrock_county_level_search_collector(): collector = MuckrockCountyLevelSearchCollector( batch_id=1, dto=MuckrockCountySearchCollectorInputDTO( parent_jurisdiction_id=ALLEGHENY_COUNTY_MUCKROCK_ID, town_names=ALLEGHENY_COUNTY_TOWN_NAMES ), - logger=MagicMock(spec=CoreLogger), - db_client=MagicMock(spec=DatabaseClient) + logger=AsyncMock(spec=AsyncCoreLogger), + adb_client=AsyncMock(spec=AsyncDatabaseClient), + raise_error=True ) - collector.run() + await collector.run() schema = MuckrockURLInfoSchema(many=True) schema.load(collector.data["urls"]) assert len(collector.data["urls"]) >= 10 + print(collector.data) -def test_muckrock_full_search_collector(): +@pytest.mark.asyncio +async def test_muckrock_full_search_collector(): collector = MuckrockAllFOIARequestsCollector( batch_id=1, dto=MuckrockAllFOIARequestsCollectorInputDTO( start_page=1, total_pages=2 ), - logger=MagicMock(spec=CoreLogger), - db_client=MagicMock(spec=DatabaseClient) + logger=AsyncMock(spec=AsyncCoreLogger), + adb_client=AsyncMock(spec=AsyncDatabaseClient), + raise_error=True ) - collector.run() + await collector.run() assert len(collector.data["urls"]) >= 1 schema = MuckrockURLInfoSchema(many=True) - schema.load(collector.data["urls"]) \ No newline at end of file + schema.load(collector.data["urls"]) + print(collector.data) \ No newline at end of file diff --git a/tests/manual/unsorted/test_identifier_unit.py b/tests/manual/unsorted/test_identifier_unit.py deleted file mode 100644 index a6dcc1fb..00000000 --- a/tests/manual/unsorted/test_identifier_unit.py +++ /dev/null @@ -1,275 +0,0 @@ -import tempfile -from unittest.mock import patch - -import pytest -import requests_mock - -from agency_identifier.identifier import * - - -@pytest.fixture -def mock_env(monkeypatch): - monkeypatch.setenv("VUE_APP_PDAP_API_KEY", "test_api_key") - - -def test_get_page_data_success(mock_env): - with requests_mock.Mocker() as m: - m.get("https://data-sources.pdap.io/api/agencies/1", json={"data": "test_data"}, status_code=200) - data = get_page_data(1) - assert data == "test_data" - - -def test_get_page_data_failure(mock_env): - with requests_mock.Mocker() as m: - m.get("https://data-sources.pdap.io/api/agencies/1", status_code=404) - with pytest.raises(Exception): - get_page_data(1) - - -@pytest.mark.parametrize("url,expected", [ - ("http://www.example.com", "example.com"), - ("https://example.com", "example.com"), - ("example.com", "example.com"), - ("www.example.com", "example.com"), -]) -def test_parse_hostname(url, expected): - assert parse_hostname(url) == expected - - -@pytest.mark.parametrize("url", [ - "http:///www.example.com", # Invalid URL - "://example.com", # Missing scheme -]) -def test_parse_hostname_failure(url): - with pytest.raises(Exception): - parse_hostname(url) - - -@pytest.mark.parametrize("url,expected", [ - ("http://www.example.com", "example.com/"), - ("https://example.com", "example.com/"), - ("http://example.com/path/to/page", "example.com/path/to/page/"), - ("www.example.com", "example.com/"), - ("example.com/", "example.com/"), -]) -def test_remove_http(url, expected): - assert remove_http(url) == expected - - -@pytest.fixture -def agencies_and_hostnames(): - return ( - [{"name": "Agency 1", "homepage_url": "https://agency1.com"}], - ["agency1.com"] - ) - - -def test_match_agencies_found(agencies_and_hostnames): - agencies, agency_hostnames = agencies_and_hostnames - match = match_agencies(agencies, agency_hostnames, "http://www.agency1.com/page") - assert match["status"] == "Match found" - assert match["agency"]["name"] == "Agency 1" - - -def test_match_agencies_no_match(agencies_and_hostnames): - agencies, agency_hostnames = agencies_and_hostnames - match = match_agencies(agencies, agency_hostnames, "http://www.nonexistentagency.com") - assert match["status"] == "No match found" - assert match["agency"] == [] - -@pytest.fixture -def agencies_with_same_hostname(): - return ( - [ - {"name": "Agency 1", "homepage_url": "http://agency.com/path1"}, - {"name": "Agency 2", "homepage_url": "http://agency.com/path2"} - ], - ["agency.com", "agency.com"] - ) - -def test_match_agencies_multiple_found(agencies_with_same_hostname): - agencies, agency_hostnames = agencies_with_same_hostname - # A URL that matches the first agency more closely - match = match_agencies(agencies, agency_hostnames, "http://agency.com/path1/page") - assert match["status"] == "Match found" - assert match["agency"]["name"] == "Agency 1" - - # A URL that doesn't closely match either agency's homepage URL path - contested_match = match_agencies(agencies, agency_hostnames, "http://agency.com/otherpath/page") - assert contested_match["status"] == "Contested match" - assert contested_match["agency"] == [] - - # A URL that matches the second agency more closely - match_second = match_agencies(agencies, agency_hostnames, "http://agency.com/path2/anotherpage") - assert match_second["status"] == "Match found" - assert match_second["agency"]["name"] == "Agency 2" - -@patch('agency_identifier.identifier.get_page_data') -def test_get_agencies_data(mock_get_page_data, mock_env): - # Mock get_page_data to return a dictionary on the first call and an empty dictionary on the second call - mock_get_page_data.side_effect = [ - [{"name": "Agency 1", "homepage_url": "https://agency1.com", "id": "1"}], # First page data - [] # Indicates no more pages - ] - - df = get_agencies_data() - assert not df.is_empty() - assert len(df) == 1 - assert df["name"][0] == "Agency 1" - assert df["homepage_url"][0] == "https://agency1.com" - - -# Sample data to simulate what `match_urls_to_agencies_and_clean_data` might return -sample_agencies_data = polars.DataFrame({ - "url": ["http://agency1.com", "http://agency2.com", "http://nonexistentagency.com"], - "homepage_url": ["http://agency1.com", "http://agency2.com", None], - "hostname": ["agency1.com", "agency2.com", None], -}) - -# Sample input URLs DataFrame -sample_urls_df = polars.DataFrame({ - "url": ["http://agency1.com/page1", "http://agency2.com/page2", "http://nonexistentagency.com/page"] -}) - - -@pytest.fixture -def mock_match_urls_to_agencies_and_clean_data(): - with patch('agency_identifier.identifier.match_urls_to_agencies_and_clean_data') as mock: - mock.return_value = sample_agencies_data - yield mock - - -def test_process_data(mock_match_urls_to_agencies_and_clean_data): - processed_df = process_data(sample_urls_df) - - # Verify that the mock was called once with the sample_urls_df - mock_match_urls_to_agencies_and_clean_data.assert_called_once_with(sample_urls_df) - - # Check that the processed DataFrame has filtered out the unmatched URLs - assert len(processed_df) == 2 # Expecting only matched URLs to be present - - # Check if the 'hostname' column exists and has no null values in the result - assert "hostname" in processed_df.columns - assert processed_df.filter(polars.col("hostname").is_null()).height == 0 - - # You might also want to check specific values if necessary - assert processed_df["url"].to_list() == ["http://agency1.com", "http://agency2.com"] - - -# Sample data to simulate what `get_agencies_data` might return -sample_get_agencies_data = polars.DataFrame({ - "homepage_url": ["http://agency1.com", "http://agency2.com"], - "name": ["Agency 1", "Agency 2"], - "count_data_sources": [10, 15], - "hostname": ["agency1.com", "agency2.com"], # Assume this is added by the function -}) - - -@pytest.fixture -def mock_get_agencies_data(): - with patch('agency_identifier.identifier.get_agencies_data') as mock: - mock.return_value = sample_get_agencies_data - yield mock - - -def test_match_urls_to_agencies_and_clean_data(mock_get_agencies_data): - matched_df = match_urls_to_agencies_and_clean_data(sample_urls_df) - - # Verify that `get_agencies_data` was called - mock_get_agencies_data.assert_called_once() - - # Verify the structure and content of the matched DataFrame - # Expect that each URL is matched with the correct agency based on the hostname - # Additionally, check for the addition of any new columns or transformations you apply - assert "homepage_url" in matched_df.columns - assert len(matched_df) == len(sample_urls_df) # Ensure all URLs are processed - - # Verify that URLs are correctly matched or not matched to agencies - # This assumes that the function annotates the DataFrame with match results - assert matched_df.filter(polars.col("url") == "http://agency1.com/page1").select("name")["name"][0] == "Agency 1" - assert matched_df.filter(polars.col("url") == "http://nonexistentagency.com/page").select("name")["name"][0] == "" - - -def test_read_data_success(): - # Create a temporary file with some CSV content - with tempfile.NamedTemporaryFile(mode='w', delete=False) as tmp: - tmp.write("column1,column2\nvalue1,value2") - tmp_path = tmp.name - - # Attempt to read the file with read_data - try: - df = read_data(tmp_path) - assert not df.is_empty() - assert "column1" in df.columns - assert df.shape == (1, 2) - finally: - # Clean up the temporary file - os.remove(tmp_path) - -def test_read_data_failure(): - # Test reading a non-existent file should raise an exception - with pytest.raises(Exception): - read_data("non_existent_file.csv") - - -def test_write_data_success(): - # Create a DataFrame to write - df = polars.DataFrame({"column1": ["value1"], "column2": ["value2"]}) - - # Use a temporary file to write the DataFrame - with tempfile.NamedTemporaryFile(mode='w', delete=False) as tmp: - tmp_path = tmp.name - - # Write the DataFrame and verify the file contents - try: - write_data(df, tmp_path) - - # Read back the file to verify contents - with open(tmp_path, 'r') as f: - content = f.read() - assert "column1,column2" in content - assert "value1,value2" in content - finally: - # Clean up the temporary file - os.remove(tmp_path) - - -def test_write_data_failure(monkeypatch): - # Simulate an error by patching the `write_csv` method to raise an exception - with monkeypatch.context() as m: - m.setattr(polars.DataFrame, "write_csv", - lambda self, file_path: (_ for _ in ()).throw(Exception("Mock write failure"))) - with pytest.raises(Exception) as exc_info: - df = polars.DataFrame({"column1": ["value1"], "column2": ["value2"]}) - write_data(df, "path/to/non_writable_directory/file.csv") - assert "Mock write failure" in str(exc_info.value) - -@patch('agency_identifier.identifier.write_data') -@patch('agency_identifier.identifier.process_data') -@patch('agency_identifier.identifier.read_data') -def test_process_and_write_data_success(mock_read_data, mock_process_data, mock_write_data): - # Setup mock return values - mock_read_data.return_value = polars.DataFrame({"url": ["http://example.com"]}) - processed_df = polars.DataFrame({"url": ["http://example.com"], "processed": [True]}) - mock_process_data.return_value = processed_df - - # Call the function with mocked input and output file paths - process_and_write_data("input_file.csv", "output_file.csv") - - # Verify that read_data and write_data were called correctly - mock_read_data.assert_called_once_with("input_file.csv") - mock_process_data.assert_called_once_with(mock_read_data.return_value) - mock_write_data.assert_called_once_with(processed_df, "output_file.csv") - -@pytest.mark.parametrize("side_effect,expected_exception", [ - (FileNotFoundError, FileNotFoundError), - (PermissionError, PermissionError), -]) -@patch('agency_identifier.identifier.write_data') -@patch('agency_identifier.identifier.process_data') -@patch('agency_identifier.identifier.read_data') -def test_process_and_write_data_failure(mock_read_data, mock_process_data, mock_write_data, side_effect, expected_exception): - mock_read_data.side_effect = side_effect - - with pytest.raises(expected_exception): - process_and_write_data("input_file.csv", "output_file.csv") \ No newline at end of file diff --git a/tests/test_alembic/conftest.py b/tests/test_alembic/conftest.py index 11b75b92..8cd1d0ab 100644 --- a/tests/test_alembic/conftest.py +++ b/tests/test_alembic/conftest.py @@ -4,7 +4,7 @@ from sqlalchemy.orm import scoped_session, sessionmaker from collector_db.helper_functions import get_postgres_connection_string -from tests.test_alembic.AlembicRunner import AlembicRunner +from tests.helpers.AlembicRunner import AlembicRunner @pytest.fixture() diff --git a/tests/test_alembic/helpers.py b/tests/test_alembic/helpers.py index d66854f2..dfebce07 100644 --- a/tests/test_alembic/helpers.py +++ b/tests/test_alembic/helpers.py @@ -1,7 +1,9 @@ +from typing import Optional + from sqlalchemy import text from sqlalchemy.orm import Session -from tests.test_alembic.AlembicRunner import AlembicRunner +from tests.helpers.AlembicRunner import AlembicRunner def get_enum_values(enum_name: str, session: Session) -> list[str]: @@ -9,12 +11,24 @@ def get_enum_values(enum_name: str, session: Session) -> list[str]: def table_creation_check( alembic_runner: AlembicRunner, - table_name: str, - start_revision: str, - end_revision: str + tables: list[str], + end_revision: str, + start_revision: Optional[str] = None, + ): - alembic_runner.upgrade(start_revision) - assert table_name not in alembic_runner.inspector.get_table_names() + if start_revision is not None: + alembic_runner.upgrade(start_revision) + for table_name in tables: + assert table_name not in alembic_runner.inspector.get_table_names() alembic_runner.upgrade(end_revision) alembic_runner.reflect() - assert table_name in alembic_runner.inspector.get_table_names() \ No newline at end of file + for table_name in tables: + assert table_name in alembic_runner.inspector.get_table_names() + +def columns_in_table( + alembic_runner: AlembicRunner, + table_name: str, + columns_to_check: list[str], +) -> bool: + current_columns = [col["name"] for col in alembic_runner.inspector.get_columns(table_name)] + return all(column in current_columns for column in columns_to_check) diff --git a/tests/test_alembic/test_revisions.py b/tests/test_alembic/test_revisions.py index 75df5f0c..9bc287d1 100644 --- a/tests/test_alembic/test_revisions.py +++ b/tests/test_alembic/test_revisions.py @@ -15,6 +15,7 @@ from sqlalchemy import text +from tests.test_alembic.helpers import columns_in_table from tests.test_alembic.helpers import get_enum_values, table_creation_check @@ -163,39 +164,38 @@ def test_convert_batch_strategy_status_to_enum(alembic_runner): "aborted" ] d = {} - with alembic_runner.session() as session: - for strategy, status in product(existing_strategy_strings, existing_status_strings): - # Execute inserts and store each ID - id_ = session.execute(text( - f""" - INSERT INTO BATCHES - (strategy, user_id, status, total_url_count, original_url_count, duplicate_url_count) - VALUES( - '{strategy}', - 1, - '{status}', - 0, - 0, - 0 - ) - RETURNING ID; - """ - )).scalar() - d[id_] = [strategy, status] - session.commit() + for strategy, status in product(existing_strategy_strings, existing_status_strings): + # Execute inserts and store each ID + query = f""" + INSERT INTO BATCHES + (strategy, user_id, status, total_url_count, original_url_count, duplicate_url_count) + VALUES( + '{strategy}', + 1, + '{status}', + 0, + 0, + 0 + ) + RETURNING ID; + """ + + id_ = alembic_runner.execute(query)[0][0] + d[id_] = [strategy, status] alembic_runner.upgrade('db6d60feda7d') - with alembic_runner.session() as session: - # Assert all strategies and statuses remain the same - for id_ in d.keys(): - strategy, status = d[id_] - result = session.execute(text( - f""" - SELECT strategy, status FROM BATCHES WHERE id = {id_}; - """ - )).fetchone() - assert result[0] == strategy - assert result [1] == status + + # Assert all strategies and statuses remain the same + for id_ in d.keys(): + strategy, status = d[id_] + + result = alembic_runner.execute( + f""" + SELECT strategy, status FROM BATCHES WHERE id = {id_}; + """ + )[0] + assert result[0] == strategy + assert result [1] == status def test_convert_url_outcome_to_enum(alembic_runner): @@ -208,50 +208,49 @@ def test_convert_url_outcome_to_enum(alembic_runner): 'duplicate', ] d = {} - with alembic_runner.session() as session: - batch_id = session.execute(text( - """INSERT INTO BATCHES - (strategy, user_id, status, total_url_count, original_url_count, duplicate_url_count) - VALUES( - 'ckan', - 1, - 'in-process', - 0, - 0, - 0 - ) - RETURNING ID; + # with alembic_runner.session() as session: + batch_id = alembic_runner.execute( + """INSERT INTO BATCHES + (strategy, user_id, status, total_url_count, original_url_count, duplicate_url_count) + VALUES( + 'ckan', + 1, + 'in-process', + 0, + 0, + 0 + ) + RETURNING ID; + """ + )[0][0] + + + for outcome in existing_outcome_strings: + id_ = alembic_runner.execute( + f""" + INSERT INTO URLS + (batch_id, url, collector_metadata, outcome) + VALUES ( + '{batch_id}', + 'https://example.com/{outcome}', + '{{}}', + '{outcome}' + ) + RETURNING ID; """ - )).scalar() - - for outcome in existing_outcome_strings: - id_ = session.execute(text( - f""" - INSERT INTO URLS - (batch_id, url, collector_metadata, outcome) - VALUES ( - '{batch_id}', - 'https://example.com/{outcome}', - '{{}}', - '{outcome}' - ) - RETURNING ID; - """ - )).scalar() - d[id_] = outcome - session.commit() + )[0][0] + d[id_] = outcome alembic_runner.upgrade('e27c5f8409a3') - with alembic_runner.session() as session: - for id_ in d.keys(): - outcome = d[id_] + for id_ in d.keys(): + outcome = d[id_] - result = session.execute(text( - f"""SELECT OUTCOME FROM URLS WHERE ID = {id_};""" - )).scalar() + result = alembic_runner.execute( + f"""SELECT OUTCOME FROM URLS WHERE ID = {id_};""" + )[0][0] - assert result == outcome + assert result == outcome def test_create_htmlcontent_and_rooturl_tables(alembic_runner): alembic_runner.upgrade('e27c5f8409a3') @@ -298,7 +297,84 @@ def test_add_in_label_studio_metadata_status(alembic_runner): def test_create_metadata_annotation_table(alembic_runner): table_creation_check( alembic_runner, - "metadata_annotations", + ["metadata_annotations"], start_revision="108dac321086", end_revision="dcd158092de0" - ) \ No newline at end of file + ) + +def test_add_task_tables_and_linking_logic(alembic_runner): + alembic_runner.upgrade("dcd158092de0") + assert not columns_in_table( + alembic_runner, + table_name="url_error_info", + columns_to_check=["task_id"], + ) + assert not columns_in_table( + alembic_runner, + table_name="url_metadata", + columns_to_check=["notes"], + ) + table_creation_check( + alembic_runner, + tables=[ + "tasks", + "task_errors", + "link_task_urls" + ], + end_revision="072b32a45b1c" + ) + assert columns_in_table( + alembic_runner, + table_name="url_error_info", + columns_to_check=["task_id"], + ) + assert columns_in_table( + alembic_runner, + table_name="url_metadata", + columns_to_check=["notes"], + ) + +def test_add_url_agency_suggestions(alembic_runner): + table_creation_check( + alembic_runner, + tables=[ + "url_agency_suggestions" + ], + start_revision="072b32a45b1c", + end_revision="19bf57df581a" + ) + +def test_add_user_url_agency_suggestions(alembic_runner): + def column_check() -> bool: + return columns_in_table( + alembic_runner, + table_name="url_agency_suggestions", + columns_to_check=["user_id"] + ) + + alembic_runner.upgrade("19bf57df581a") + assert not column_check() + alembic_runner.reflect() + alembic_runner.upgrade("8c44e02733ae") + assert column_check() + +def test_revise_agency_suggestions(alembic_runner): + + tables_to_check = [ + "user_url_agency_suggestions", + "automated_url_agency_suggestions", + "agencies", + "confirmed_url_agency" + ] + + alembic_runner.upgrade("8c44e02733ae") + assert alembic_runner.table_exists("url_agency_suggestions") + assert not alembic_runner.tables_exist(tables_to_check) + alembic_runner.upgrade("d7eb670edaf0") + assert not alembic_runner.table_exists("url_agency_suggestions") + assert alembic_runner.tables_exist(tables_to_check) + +def test_full_upgrade_downgrade(alembic_runner): + # Both should run without error + alembic_runner.upgrade("head") + alembic_runner.downgrade("base") \ No newline at end of file diff --git a/tests/test_automated/integration/api/conftest.py b/tests/test_automated/integration/api/conftest.py index 8f80716f..73f0c8ab 100644 --- a/tests/test_automated/integration/api/conftest.py +++ b/tests/test_automated/integration/api/conftest.py @@ -1,13 +1,19 @@ +import asyncio from dataclasses import dataclass from typing import Generator -from unittest.mock import MagicMock +from unittest.mock import MagicMock, AsyncMock import pytest +import pytest_asyncio from starlette.testclient import TestClient from api.main import app +from core.AsyncCore import AsyncCore +from api.routes.review import requires_final_review_permission +from core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse from core.SourceCollectorCore import SourceCollectorCore -from security_manager.SecurityManager import get_access_info, AccessInfo, Permissions +from core.enums import BatchStatus +from security_manager.SecurityManager import get_access_info, AccessInfo, Permissions, require_permission from tests.helpers.DBDataCreator import DBDataCreator from tests.test_automated.integration.api.helpers.RequestValidator import RequestValidator @@ -16,32 +22,75 @@ class APITestHelper: request_validator: RequestValidator core: SourceCollectorCore + async_core: AsyncCore db_data_creator: DBDataCreator mock_huggingface_interface: MagicMock - mock_label_studio_interface: MagicMock + + def adb_client(self): + return self.db_data_creator.adb_client + + async def wait_for_all_batches_to_complete(self): + for i in range(20): + data: GetBatchStatusResponse = self.request_validator.get_batch_statuses( + status=BatchStatus.IN_PROCESS + ) + if len(data.results) == 0: + return + print("Waiting...") + await asyncio.sleep(0.1) + raise ValueError("Batches did not complete in expected time") MOCK_USER_ID = 1 +def disable_task_trigger(ath: APITestHelper) -> None: + ath.async_core.collector_manager.post_collection_function_trigger = AsyncMock() + + + +async def fail_task_trigger() -> None: + raise Exception( + "Task Trigger is set to fail in tests by default, to catch unintentional calls." + "If this is not intended, either replace with a Mock or the expected task function." + ) def override_access_info() -> AccessInfo: - return AccessInfo(user_id=MOCK_USER_ID, permissions=[Permissions.SOURCE_COLLECTOR]) + return AccessInfo( + user_id=MOCK_USER_ID, + permissions=[ + Permissions.SOURCE_COLLECTOR, + Permissions.SOURCE_COLLECTOR_FINAL_REVIEW + ] + ) -@pytest.fixture -def client(db_client_test) -> Generator[TestClient, None, None]: +@pytest.fixture(scope="session") +def client() -> Generator[TestClient, None, None]: + # Mock environment with TestClient(app) as c: app.dependency_overrides[get_access_info] = override_access_info - core: SourceCollectorCore = c.app.state.core - # core.shutdown() + app.dependency_overrides[requires_final_review_permission] = override_access_info + async_core: AsyncCore = c.app.state.async_core + + # Interfaces to the web should be mocked + task_manager = async_core.task_manager + task_manager.huggingface_interface = AsyncMock() + task_manager.url_request_interface = AsyncMock() + task_manager.discord_poster = AsyncMock() + # Disable Logger + task_manager.logger.disabled = True + # Set trigger to fail immediately if called, to force it to be manually specified in tests + task_manager.task_trigger._func = fail_task_trigger yield c - core.shutdown() -@pytest.fixture -def api_test_helper(client: TestClient, db_data_creator, monkeypatch) -> APITestHelper: + # Reset environment variables back to original state + - return APITestHelper( +@pytest_asyncio.fixture +async def api_test_helper(client: TestClient, db_data_creator, monkeypatch) -> APITestHelper: + yield APITestHelper( request_validator=RequestValidator(client=client), core=client.app.state.core, + async_core=client.app.state.async_core, db_data_creator=db_data_creator, mock_huggingface_interface=MagicMock(), - mock_label_studio_interface=MagicMock() - ) \ No newline at end of file + ) + await client.app.state.async_core.collector_manager.logger.clear_log_queue() diff --git a/tests/test_automated/integration/api/helpers/RequestValidator.py b/tests/test_automated/integration/api/helpers/RequestValidator.py index 7a0e9a6a..9207305a 100644 --- a/tests/test_automated/integration/api/helpers/RequestValidator.py +++ b/tests/test_automated/integration/api/helpers/RequestValidator.py @@ -1,28 +1,52 @@ from http import HTTPStatus from typing import Optional, Annotated +from fastapi import HTTPException from pydantic import BaseModel from starlette.testclient import TestClient from collector_db.DTOs.BatchInfo import BatchInfo +from collector_db.DTOs.GetTaskStatusResponseInfo import GetTaskStatusResponseInfo +from collector_db.DTOs.TaskInfo import TaskInfo +from collector_db.enums import TaskType from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO from collector_manager.enums import CollectorType +from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo +from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewBaseInfo from core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse from core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse from core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse -from core.DTOs.GetNextURLForRelevanceAnnotationResponse import GetNextURLForRelevanceAnnotationResponse +from core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO +from core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO +from core.DTOs.GetMetricsBatchesBreakdownResponseDTO import GetMetricsBatchesBreakdownResponseDTO +from core.DTOs.GetMetricsURLsAggregatedResponseDTO import GetMetricsURLsAggregatedResponseDTO +from core.DTOs.GetMetricsURLsBreakdownPendingResponseDTO import GetMetricsURLsBreakdownPendingResponseDTO +from core.DTOs.GetMetricsURLsBreakdownSubmittedResponseDTO import GetMetricsURLsBreakdownSubmittedResponseDTO +from core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo +from core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo +from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAnnotationResponse, \ + URLAgencyAnnotationPostInfo +from core.DTOs.GetNextURLForAllAnnotationResponse import GetNextURLForAllAnnotationResponse +from core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewOuterResponse +from core.DTOs.GetTasksResponse import GetTasksResponse from core.DTOs.GetURLsByBatchResponse import GetURLsByBatchResponse from core.DTOs.GetURLsResponseInfo import GetURLsResponseInfo -from core.DTOs.LabelStudioExportResponseInfo import LabelStudioExportResponseInfo -from core.DTOs.MessageCountResponse import MessageCountResponse +from core.DTOs.ManualBatchInputDTO import ManualBatchInputDTO +from core.DTOs.ManualBatchResponseDTO import ManualBatchResponseDTO from core.DTOs.MessageResponse import MessageResponse -from core.DTOs.RelevanceAnnotationInfo import RelevanceAnnotationPostInfo +from core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo +from core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo +from core.DTOs.SearchURLResponse import SearchURLResponse from core.enums import BatchStatus from util.helper_functions import update_if_not_none class ExpectedResponseInfo(BaseModel): - status_code: Annotated[HTTPStatus, "The expected status code"] = HTTPStatus.OK + status_code: Annotated[ + HTTPStatus, + "The expected status code" + ] = HTTPStatus.OK + message: Optional[str] = None class RequestValidator: """ @@ -54,6 +78,31 @@ def open( assert response.status_code == expected_response.status_code, response.text return response.json() + def open_v2( + self, + method: str, + url: str, + params: Optional[dict] = None, + **kwargs + ) -> dict: + """ + Variation on open that raises an exception rather than check the status code + """ + if params: + kwargs["params"] = params + response = self.client.request( + method=method, + url=url, + headers={"Authorization": "Bearer token"}, # Fake authentication that is overridden during testing + **kwargs + ) + if response.status_code != HTTPStatus.OK: + raise HTTPException( + status_code=response.status_code, + detail=response.json() + ) + return response.json() + def get( self, url: str, @@ -84,6 +133,33 @@ def post( **kwargs ) + def post_v2( + self, + url: str, + params: Optional[dict] = None, + **kwargs + ) -> dict: + return self.open_v2( + method="POST", + url=url, + params=params, + **kwargs + ) + + def get_v2( + self, + url: str, + params: Optional[dict] = None, + **kwargs + ) -> dict: + return self.open_v2( + method="GET", + url=url, + params=params, + **kwargs + ) + + def put( self, url: str, @@ -112,13 +188,19 @@ def delete( expected_response=expected_response, **kwargs) - def get_batch_statuses(self, collector_type: Optional[CollectorType] = None, status: Optional[BatchStatus] = None) -> GetBatchStatusResponse: + def get_batch_statuses( + self, + collector_type: Optional[CollectorType] = None, + status: Optional[BatchStatus] = None, + has_pending_urls: Optional[bool] = None + ) -> GetBatchStatusResponse: params = {} update_if_not_none( target=params, source={ "collector_type": collector_type.value if collector_type else None, - "status": status.value if status else None + "status": status.value if status else None, + "has_pending_urls": has_pending_urls } ) data = self.get( @@ -160,45 +242,211 @@ def get_batch_logs(self, batch_id: int) -> GetBatchLogsResponse: ) return GetBatchLogsResponse(**data) - def export_batch_to_label_studio(self, batch_id: int) -> LabelStudioExportResponseInfo: - data = self.post( - url=f"/label-studio/export-batch/{batch_id}" - ) - return LabelStudioExportResponseInfo(**data) - def abort_batch(self, batch_id: int) -> MessageResponse: data = self.post( url=f"/batch/{batch_id}/abort" ) return MessageResponse(**data) - def process_relevancy(self) -> MessageCountResponse: - # TODO: Delete - data = self.post( - url=f"process/relevancy" + def get_next_relevance_annotation(self) -> GetNextRelevanceAnnotationResponseOuterInfo: + data = self.get( + url=f"/annotate/relevance" ) - return MessageCountResponse(**data) + return GetNextRelevanceAnnotationResponseOuterInfo(**data) - def get_next_relevance_annotation(self) -> GetNextURLForRelevanceAnnotationResponse: + def get_next_record_type_annotation(self) -> GetNextRecordTypeAnnotationResponseOuterInfo: data = self.get( - url=f"/annotate/relevance" + url=f"/annotate/record-type" + ) + return GetNextRecordTypeAnnotationResponseOuterInfo(**data) + + def post_record_type_annotation_and_get_next( + self, + url_id: int, + record_type_annotation_post_info: RecordTypeAnnotationPostInfo + ) -> GetNextRecordTypeAnnotationResponseOuterInfo: + data = self.post_v2( + url=f"/annotate/record-type/{url_id}", + json=record_type_annotation_post_info.model_dump(mode='json') ) - return GetNextURLForRelevanceAnnotationResponse(**data) + return GetNextRecordTypeAnnotationResponseOuterInfo(**data) def post_relevance_annotation_and_get_next( self, - metadata_id: int, + url_id: int, relevance_annotation_post_info: RelevanceAnnotationPostInfo - ) -> GetNextURLForRelevanceAnnotationResponse: + ) -> GetNextRelevanceAnnotationResponseOuterInfo: + data = self.post_v2( + url=f"/annotate/relevance/{url_id}", + json=relevance_annotation_post_info.model_dump(mode='json') + ) + return GetNextRelevanceAnnotationResponseOuterInfo(**data) + + async def get_next_agency_annotation(self) -> GetNextURLForAgencyAnnotationResponse: + data = self.get( + url=f"/annotate/agency" + ) + return GetNextURLForAgencyAnnotationResponse(**data) + + async def post_agency_annotation_and_get_next( + self, + url_id: int, + agency_annotation_post_info: URLAgencyAnnotationPostInfo + ) -> GetNextURLForAgencyAnnotationResponse: data = self.post( - url=f"/annotate/relevance/{metadata_id}", - json=relevance_annotation_post_info.model_dump() + url=f"/annotate/agency/{url_id}", + json=agency_annotation_post_info.model_dump(mode='json') ) - return GetNextURLForRelevanceAnnotationResponse(**data) + return GetNextURLForAgencyAnnotationResponse(**data) def get_urls(self, page: int = 1, errors: bool = False) -> GetURLsResponseInfo: data = self.get( url=f"/url", params={"page": page, "errors": errors} ) - return GetURLsResponseInfo(**data) \ No newline at end of file + return GetURLsResponseInfo(**data) + + def get_task_info(self, task_id: int) -> TaskInfo: + data = self.get( + url=f"/task/{task_id}" + ) + return TaskInfo(**data) + + def get_tasks( + self, + page: int = 1, + task_type: Optional[TaskType] = None, + task_status: Optional[BatchStatus] = None + ) -> GetTasksResponse: + params = {"page": page} + update_if_not_none( + target=params, + source={ + "task_type": task_type.value if task_type else None, + "task_status": task_status.value if task_status else None + } + ) + data = self.get( + url=f"/task", + params=params + ) + return GetTasksResponse(**data) + + async def review_next_source(self) -> GetNextURLForFinalReviewOuterResponse: + data = self.get( + url=f"/review/next-source" + ) + return GetNextURLForFinalReviewOuterResponse(**data) + + async def approve_and_get_next_source_for_review( + self, + approval_info: FinalReviewApprovalInfo + ) -> GetNextURLForFinalReviewOuterResponse: + data = self.post( + url=f"/review/approve-source", + json=approval_info.model_dump(mode='json') + ) + return GetNextURLForFinalReviewOuterResponse(**data) + + async def reject_and_get_next_source_for_review( + self, + review_info: FinalReviewBaseInfo + ) -> GetNextURLForFinalReviewOuterResponse: + data = self.post( + url=f"/review/reject-source", + json=review_info.model_dump(mode='json') + ) + return GetNextURLForFinalReviewOuterResponse(**data) + + async def get_current_task_status(self) -> GetTaskStatusResponseInfo: + data = self.get( + url=f"/task/status" + ) + return GetTaskStatusResponseInfo(**data) + + async def get_next_url_for_all_annotations( + self, + batch_id: Optional[int] = None + ) -> GetNextURLForAllAnnotationResponse: + params = {} + update_if_not_none( + target=params, + source={"batch_id": batch_id} + ) + data = self.get( + url=f"/annotate/all", + params=params + ) + return GetNextURLForAllAnnotationResponse(**data) + + async def post_all_annotations_and_get_next( + self, + url_id: int, + all_annotations_post_info: AllAnnotationPostInfo, + batch_id: Optional[int] = None, + ) -> GetNextURLForAllAnnotationResponse: + params = {} + update_if_not_none( + target=params, + source={"batch_id": batch_id} + ) + data = self.post( + url=f"/annotate/all/{url_id}", + params=params, + json=all_annotations_post_info.model_dump(mode='json') + ) + return GetNextURLForAllAnnotationResponse(**data) + + async def submit_manual_batch( + self, + dto: ManualBatchInputDTO, + ) -> ManualBatchResponseDTO: + data = self.post_v2( + url="/collector/manual", + json=dto.model_dump(mode='json'), + ) + return ManualBatchResponseDTO(**data) + + async def search_url(self, url: str) -> SearchURLResponse: + data = self.get( + url=f"/search/url", + params={"url": url} + ) + return SearchURLResponse(**data) + + async def get_batches_aggregated_metrics(self) -> GetMetricsBatchesAggregatedResponseDTO: + data = self.get_v2( + url="/metrics/batches/aggregated" + ) + return GetMetricsBatchesAggregatedResponseDTO(**data) + + async def get_batches_breakdown_metrics(self, page: int) -> GetMetricsBatchesBreakdownResponseDTO: + data = self.get_v2( + url="/metrics/batches/breakdown", + params={"page": page} + ) + return GetMetricsBatchesBreakdownResponseDTO(**data) + + async def get_urls_breakdown_submitted_metrics(self) -> GetMetricsURLsBreakdownSubmittedResponseDTO: + data = self.get_v2( + url="/metrics/urls/breakdown/submitted" + ) + return GetMetricsURLsBreakdownSubmittedResponseDTO(**data) + + async def get_urls_breakdown_pending_metrics(self) -> GetMetricsURLsBreakdownPendingResponseDTO: + data = self.get_v2( + url="/metrics/urls/breakdown/pending" + ) + return GetMetricsURLsBreakdownPendingResponseDTO(**data) + + async def get_backlog_metrics(self) -> GetMetricsBacklogResponseDTO: + data = self.get_v2( + url="/metrics/backlog" + ) + return GetMetricsBacklogResponseDTO(**data) + + async def get_urls_aggregated_metrics(self) -> GetMetricsURLsAggregatedResponseDTO: + data = self.get_v2( + url="/metrics/urls/aggregate", + ) + return GetMetricsURLsAggregatedResponseDTO(**data) \ No newline at end of file diff --git a/tests/test_automated/integration/api/test_annotate.py b/tests/test_automated/integration/api/test_annotate.py index 5b8730cf..03088cd7 100644 --- a/tests/test_automated/integration/api/test_annotate.py +++ b/tests/test_automated/integration/api/test_annotate.py @@ -1,79 +1,714 @@ +from http import HTTPStatus + import pytest +from fastapi import HTTPException from collector_db.DTOs.InsertURLsInfo import InsertURLsInfo -from collector_db.enums import URLMetadataAttributeType, ValidationStatus, ValidationSource -from core.DTOs.GetNextURLForRelevanceAnnotationResponse import GetNextURLForRelevanceAnnotationResponse -from core.DTOs.RelevanceAnnotationInfo import RelevanceAnnotationPostInfo +from collector_db.DTOs.URLMapping import URLMapping +from collector_db.models import UserUrlAgencySuggestion, UserRelevantSuggestion, UserRecordTypeSuggestion +from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo +from core.DTOs.GetNextRecordTypeAnnotationResponseInfo import GetNextRecordTypeAnnotationResponseOuterInfo +from core.DTOs.GetNextRelevanceAnnotationResponseInfo import GetNextRelevanceAnnotationResponseOuterInfo +from core.DTOs.GetNextURLForAgencyAnnotationResponse import URLAgencyAnnotationPostInfo +from core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo +from core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo +from core.classes.ErrorManager import ErrorTypes +from core.enums import RecordType, SuggestionType +from core.exceptions import FailedValidationException +from tests.helpers.complex_test_data_functions import AnnotateAgencySetupInfo, setup_for_annotate_agency, \ + setup_for_get_next_url_for_final_review +from html_tag_collector.DataClassTags import ResponseHTMLInfo +from tests.helpers.DBDataCreator import BatchURLCreationInfo from tests.test_automated.integration.api.conftest import MOCK_USER_ID +def check_url_mappings_match( + map_1: URLMapping, + map_2: URLMapping +): + assert map_1.url_id == map_2.url_id + assert map_2.url == map_2.url + +def check_html_info_not_empty( + html_info: ResponseHTMLInfo +): + assert not html_info_empty(html_info) + +def html_info_empty( + html_info: ResponseHTMLInfo +) -> bool: + return html_info.description == "" and html_info.title == "" @pytest.mark.asyncio -async def test_annotate(api_test_helper): +async def test_annotate_relevancy(api_test_helper): ath = api_test_helper - # Create batch with status `in-process` and strategy `example` batch_id = ath.db_data_creator.batch() + # Create 2 URLs with outcome `pending` iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2) url_1 = iui.url_mappings[0] url_2 = iui.url_mappings[1] - kwargs = { - "attribute": URLMetadataAttributeType.RELEVANT, - "validation_status": ValidationStatus.PENDING_VALIDATION, - "validation_source": ValidationSource.MACHINE_LEARNING - } - # Add `Relevancy` attribute with value `True` to 1st URL - await ath.db_data_creator.metadata( - url_ids=[url_1.url_id], - **kwargs + await ath.db_data_creator.auto_relevant_suggestions( + url_id=url_1.url_id, + relevant=True ) - # and `Relevancy` attribute with value `False` to 2nd other URL - await ath.db_data_creator.metadata( - url_ids=[url_2.url_id], - **kwargs + + # Add 'Relevancy' attribute with value `False` to 2nd URL + await ath.db_data_creator.auto_relevant_suggestions( + url_id=url_2.url_id, + relevant=False ) # Add HTML data to both await ath.db_data_creator.html_data([url_1.url_id, url_2.url_id]) - # Call `GET` `/annotate/url` and receive next URL - request_info_1: GetNextURLForRelevanceAnnotationResponse = ath.request_validator.get_next_relevance_annotation() + # Call `GET` `/annotate/relevance` and receive next URL + request_info_1: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_relevance_annotation() inner_info_1 = request_info_1.next_annotation - # Validate presence of HTML data in `html` field - assert inner_info_1.html_info.description != "" - assert inner_info_1.html_info.title != "" + check_url_mappings_match(inner_info_1.url_info, url_1) + check_html_info_not_empty(inner_info_1.html_info) + + # Validate that the correct relevant value is returned + assert inner_info_1.suggested_relevant is True - post_info = RelevanceAnnotationPostInfo( - is_relevant=True + # A second user should see the same URL + + + # Annotate with value 'False' and get next URL + request_info_2: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next( + url_id=inner_info_1.url_info.url_id, + relevance_annotation_post_info=RelevanceAnnotationPostInfo( + is_relevant=False + ) ) - # Call `POST` `/annotate/url` with finished annotation, and receive next URL - request_info_2 = ath.request_validator.post_relevance_annotation_and_get_next( - metadata_id=inner_info_1.metadata_id, - relevance_annotation_post_info=post_info + + inner_info_2 = request_info_2.next_annotation + + check_url_mappings_match( + inner_info_2.url_info, + url_2 + ) + check_html_info_not_empty(inner_info_2.html_info) + + request_info_3: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next( + url_id=inner_info_2.url_info.url_id, + relevance_annotation_post_info=RelevanceAnnotationPostInfo( + is_relevant=True + ) ) + + assert request_info_3.next_annotation is None + + # Get all URL annotations. Confirm they exist for user + adb_client = ath.adb_client() + results: list[UserRelevantSuggestion] = await adb_client.get_all(UserRelevantSuggestion) + result_1 = results[0] + result_2 = results[1] + + assert result_1.url_id == inner_info_1.url_info.url_id + assert result_1.relevant is False + + assert result_2.url_id == inner_info_2.url_info.url_id + assert result_2.relevant is True + + # If user submits annotation for same URL, the URL should be overwritten + request_info_4: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next( + url_id=inner_info_1.url_info.url_id, + relevance_annotation_post_info=RelevanceAnnotationPostInfo( + is_relevant=True + ) + ) + + assert request_info_4.next_annotation is None + + results: list[UserRelevantSuggestion] = await adb_client.get_all(UserRelevantSuggestion) + assert len(results) == 2 + + for result in results: + if result.url_id == inner_info_1.url_info.url_id: + assert results[0].relevant is True + + +@pytest.mark.asyncio +async def test_annotate_relevancy_already_annotated_by_different_user( + api_test_helper +): + ath = api_test_helper + + creation_info: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( + url_count=1 + ) + + await ath.db_data_creator.user_relevant_suggestion( + url_id=creation_info.url_ids[0], + user_id=2, + relevant=True + ) + + # Annotate with different user (default is 1) and get conflict error + try: + response = await ath.request_validator.post_relevance_annotation_and_get_next( + url_id=creation_info.url_ids[0], + relevance_annotation_post_info=RelevanceAnnotationPostInfo( + is_relevant=False + ) + ) + except HTTPException as e: + assert e.status_code == HTTPStatus.CONFLICT + assert e.detail["detail"]["code"] == ErrorTypes.ANNOTATION_EXISTS.value + assert e.detail["detail"]["message"] == f"Annotation of type RELEVANCE already exists for url {creation_info.url_ids[0]}" + + +@pytest.mark.asyncio +async def test_annotate_relevancy_no_html(api_test_helper): + ath = api_test_helper + + batch_id = ath.db_data_creator.batch() + + # Create 2 URLs with outcome `pending` + iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2) + + url_1 = iui.url_mappings[0] + url_2 = iui.url_mappings[1] + + # Add `Relevancy` attribute with value `True` to 1st URL + await ath.db_data_creator.auto_relevant_suggestions( + url_id=url_1.url_id, + relevant=True + ) + + # Add 'Relevancy' attribute with value `False` to 2nd URL + await ath.db_data_creator.auto_relevant_suggestions( + url_id=url_2.url_id, + relevant=False + ) + + # Call `GET` `/annotate/relevance` and receive next URL + request_info_1: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_relevance_annotation() + inner_info_1 = request_info_1.next_annotation + + check_url_mappings_match(inner_info_1.url_info, url_1) + assert html_info_empty(inner_info_1.html_info) + +@pytest.mark.asyncio +async def test_annotate_record_type(api_test_helper): + ath = api_test_helper + + batch_id = ath.db_data_creator.batch() + + # Create 2 URLs with outcome `pending` + iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2) + + url_1 = iui.url_mappings[0] + url_2 = iui.url_mappings[1] + + # Add record type attribute with value `Accident Reports` to 1st URL + await ath.db_data_creator.auto_record_type_suggestions( + url_id=url_1.url_id, + record_type=RecordType.ACCIDENT_REPORTS + ) + + # Add 'Record Type' attribute with value `Dispatch Recordings` to 2nd URL + await ath.db_data_creator.auto_record_type_suggestions( + url_id=url_2.url_id, + record_type=RecordType.DISPATCH_RECORDINGS + ) + + # Add HTML data to both + await ath.db_data_creator.html_data([url_1.url_id, url_2.url_id]) + + # Call `GET` `/annotate/record-type` and receive next URL + request_info_1: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_record_type_annotation() + inner_info_1 = request_info_1.next_annotation + + check_url_mappings_match(inner_info_1.url_info, url_1) + check_html_info_not_empty(inner_info_1.html_info) + + # Validate that the correct record type is returned + assert inner_info_1.suggested_record_type == RecordType.ACCIDENT_REPORTS + + # Annotate with value 'Personnel Records' and get next URL + request_info_2: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.post_record_type_annotation_and_get_next( + url_id=inner_info_1.url_info.url_id, + record_type_annotation_post_info=RecordTypeAnnotationPostInfo( + record_type=RecordType.PERSONNEL_RECORDS + ) + ) + inner_info_2 = request_info_2.next_annotation - # Confirm 2nd URL is distinct from 1st - assert inner_info_1.url != inner_info_2.url - # Validate presence of appropriate HTML data in `html` field - assert inner_info_2.html_info.description != "" - assert inner_info_2.html_info.title != "" + check_url_mappings_match(inner_info_2.url_info, url_2) + check_html_info_not_empty(inner_info_2.html_info) + + request_info_3: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.post_record_type_annotation_and_get_next( + url_id=inner_info_2.url_info.url_id, + record_type_annotation_post_info=RecordTypeAnnotationPostInfo( + record_type=RecordType.ANNUAL_AND_MONTHLY_REPORTS + ) + ) + + assert request_info_3.next_annotation is None + + # Get all URL annotations. Confirm they exist for user + adb_client = ath.adb_client() + results: list[UserRecordTypeSuggestion] = await adb_client.get_all(UserRecordTypeSuggestion) + result_1 = results[0] + result_2 = results[1] + + assert result_1.url_id == inner_info_1.url_info.url_id + assert result_1.record_type == RecordType.PERSONNEL_RECORDS.value + + assert result_2.url_id == inner_info_2.url_info.url_id + assert result_2.record_type == RecordType.ANNUAL_AND_MONTHLY_REPORTS.value + + # If user submits annotation for same URL, the URL should be overwritten + + request_info_4: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.post_record_type_annotation_and_get_next( + url_id=inner_info_1.url_info.url_id, + record_type_annotation_post_info=RecordTypeAnnotationPostInfo( + record_type=RecordType.BOOKING_REPORTS + ) + ) + + assert request_info_4.next_annotation is None + + results: list[UserRecordTypeSuggestion] = await adb_client.get_all(UserRecordTypeSuggestion) + assert len(results) == 2 + + for result in results: + if result.url_id == inner_info_1.url_info.url_id: + assert result.record_type == RecordType.BOOKING_REPORTS.value + +@pytest.mark.asyncio +async def test_annotate_record_type_already_annotated_by_different_user( + api_test_helper +): + ath = api_test_helper + + creation_info: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( + url_count=1 + ) - # Validation annotation is present in database - results = await api_test_helper.db_data_creator.adb_client.get_annotations_for_metadata_id( - metadata_id=inner_info_1.metadata_id + await ath.db_data_creator.user_record_type_suggestion( + url_id=creation_info.url_ids[0], + user_id=2, + record_type=RecordType.ACCIDENT_REPORTS ) - assert len(results) == 1 - assert results[0].user_id == MOCK_USER_ID - assert results[0].value == "True" - # Submit this one in turn, and no subsequent annotation info should be returned - request_info_3 = ath.request_validator.post_relevance_annotation_and_get_next( - metadata_id=inner_info_2.metadata_id, - relevance_annotation_post_info=post_info + # Annotate with different user (default is 1) and get conflict error + try: + response = await ath.request_validator.post_record_type_annotation_and_get_next( + url_id=creation_info.url_ids[0], + record_type_annotation_post_info=RecordTypeAnnotationPostInfo( + record_type=RecordType.ANNUAL_AND_MONTHLY_REPORTS + ) + ) + except HTTPException as e: + assert e.status_code == HTTPStatus.CONFLICT + assert e.detail["detail"]["code"] == ErrorTypes.ANNOTATION_EXISTS.value + assert e.detail["detail"]["message"] == f"Annotation of type RECORD_TYPE already exists for url {creation_info.url_ids[0]}" + + +@pytest.mark.asyncio +async def test_annotate_record_type_no_html_info(api_test_helper): + ath = api_test_helper + + batch_id = ath.db_data_creator.batch() + + # Create 2 URLs with outcome `pending` + iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2) + + url_1 = iui.url_mappings[0] + url_2 = iui.url_mappings[1] + + # Add record type attribute with value `Accident Reports` to 1st URL + await ath.db_data_creator.auto_record_type_suggestions( + url_id=url_1.url_id, + record_type=RecordType.ACCIDENT_REPORTS + ) + + # Add 'Record Type' attribute with value `Dispatch Recordings` to 2nd URL + await ath.db_data_creator.auto_record_type_suggestions( + url_id=url_2.url_id, + record_type=RecordType.DISPATCH_RECORDINGS + ) + + # Call `GET` `/annotate/record-type` and receive next URL + request_info_1: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_record_type_annotation() + inner_info_1 = request_info_1.next_annotation + + check_url_mappings_match(inner_info_1.url_info, url_1) + assert html_info_empty(inner_info_1.html_info) + +@pytest.mark.asyncio +async def test_annotate_agency_multiple_auto_suggestions(api_test_helper): + """ + Test Scenario: Multiple Auto Suggestions + A URL has multiple Agency Auto Suggestion and has not been annotated by the User + The user should receive all of the auto suggestions with full detail + """ + ath = api_test_helper + buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( + url_count=1, + with_html_content=True + ) + await ath.db_data_creator.auto_suggestions( + url_ids=buci.url_ids, + num_suggestions=2, + suggestion_type=SuggestionType.AUTO_SUGGESTION + ) + + # User requests next annotation + response = await ath.request_validator.get_next_agency_annotation() + + assert response.next_annotation + next_annotation = response.next_annotation + # Check that url_id matches the one we inserted + assert next_annotation.url_id == buci.url_ids[0] + + # Check that html data is present + assert next_annotation.html_info.description != "" + assert next_annotation.html_info.title != "" + + # Check that two agency_suggestions exist + assert len(next_annotation.agency_suggestions) == 2 + + for agency_suggestion in next_annotation.agency_suggestions: + assert agency_suggestion.suggestion_type == SuggestionType.AUTO_SUGGESTION + assert agency_suggestion.pdap_agency_id is not None + assert agency_suggestion.agency_name is not None + assert agency_suggestion.state is not None + assert agency_suggestion.county is not None + assert agency_suggestion.locality is not None + + +@pytest.mark.asyncio +async def test_annotate_agency_multiple_auto_suggestions_no_html(api_test_helper): + """ + Test Scenario: Multiple Auto Suggestions + A URL has multiple Agency Auto Suggestion and has not been annotated by the User + The user should receive all of the auto suggestions with full detail + """ + ath = api_test_helper + buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( + url_count=1, + with_html_content=False + ) + await ath.db_data_creator.auto_suggestions( + url_ids=buci.url_ids, + num_suggestions=2, + suggestion_type=SuggestionType.AUTO_SUGGESTION + ) + + # User requests next annotation + response = await ath.request_validator.get_next_agency_annotation() + + assert response.next_annotation + next_annotation = response.next_annotation + # Check that url_id matches the one we inserted + assert next_annotation.url_id == buci.url_ids[0] + + # Check that html data is not present + assert next_annotation.html_info.description == "" + assert next_annotation.html_info.title == "" + +@pytest.mark.asyncio +async def test_annotate_agency_single_unknown_auto_suggestion(api_test_helper): + """ + Test Scenario: Single Unknown Auto Suggestion + A URL has a single Unknown Agency Auto Suggestion and has not been annotated by the User + The user should receive a single Unknown Auto Suggestion lacking other detail + """ + ath = api_test_helper + buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( + url_count=1, + with_html_content=True + ) + await ath.db_data_creator.auto_suggestions( + url_ids=buci.url_ids, + num_suggestions=1, + suggestion_type=SuggestionType.UNKNOWN + ) + response = await ath.request_validator.get_next_agency_annotation() + + assert response.next_annotation + next_annotation = response.next_annotation + # Check that url_id matches the one we inserted + assert next_annotation.url_id == buci.url_ids[0] + + # Check that html data is present + assert next_annotation.html_info.description != "" + assert next_annotation.html_info.title != "" + + # Check that one agency_suggestion exists + assert len(next_annotation.agency_suggestions) == 1 + + agency_suggestion = next_annotation.agency_suggestions[0] + + assert agency_suggestion.suggestion_type == SuggestionType.UNKNOWN + assert agency_suggestion.pdap_agency_id is None + assert agency_suggestion.agency_name is None + assert agency_suggestion.state is None + assert agency_suggestion.county is None + assert agency_suggestion.locality is None + + +@pytest.mark.asyncio +async def test_annotate_agency_single_confirmed_agency(api_test_helper): + """ + Test Scenario: Single Confirmed Agency + A URL has a single Confirmed Agency and has not been annotated by the User + The user should not receive this URL to annotate + """ + ath = api_test_helper + buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( + url_count=1, + with_html_content=True + ) + await ath.db_data_creator.confirmed_suggestions( + url_ids=buci.url_ids, + ) + response = await ath.request_validator.get_next_agency_annotation() + assert response.next_annotation is None + +@pytest.mark.asyncio +async def test_annotate_agency_other_user_annotation(api_test_helper): + """ + Test Scenario: Other User Annotation + A URL has been annotated by another User + Our user should still receive this URL to annotate + """ + ath = api_test_helper + setup_info: AnnotateAgencySetupInfo = await setup_for_annotate_agency( + db_data_creator=ath.db_data_creator, + url_count=1 + ) + url_ids = setup_info.url_ids + + response = await ath.request_validator.get_next_agency_annotation() + + assert response.next_annotation + next_annotation = response.next_annotation + # Check that url_id matches the one we inserted + assert next_annotation.url_id == url_ids[0] + + # Check that html data is present + assert next_annotation.html_info.description != "" + assert next_annotation.html_info.title != "" + + # Check that one agency_suggestion exists + assert len(next_annotation.agency_suggestions) == 1 + + # Test that another user can insert a suggestion + await ath.db_data_creator.manual_suggestion( + user_id=MOCK_USER_ID + 1, + url_id=url_ids[0], + ) + + # After this, text that our user does not receive this URL + response = await ath.request_validator.get_next_agency_annotation() + assert response.next_annotation is None + +@pytest.mark.asyncio +async def test_annotate_agency_submit_and_get_next(api_test_helper): + """ + Test Scenario: Submit and Get Next (no other URL available) + A URL has been annotated by our User, and no other valid URLs have not been annotated + Our user should not receive another URL to annotate + Until another relevant URL is added + """ + ath = api_test_helper + setup_info: AnnotateAgencySetupInfo = await setup_for_annotate_agency( + db_data_creator=ath.db_data_creator, + url_count=2 + ) + url_ids = setup_info.url_ids + + # User should submit an annotation and receive the next + response = await ath.request_validator.post_agency_annotation_and_get_next( + url_id=url_ids[0], + agency_annotation_post_info=URLAgencyAnnotationPostInfo( + suggested_agency=await ath.db_data_creator.agency(), + is_new=False + ) + + ) + assert response.next_annotation is not None + + # User should submit this annotation and receive none for the next + response = await ath.request_validator.post_agency_annotation_and_get_next( + url_id=url_ids[1], + agency_annotation_post_info=URLAgencyAnnotationPostInfo( + suggested_agency=await ath.db_data_creator.agency(), + is_new=False + ) + ) + assert response.next_annotation is None + + +@pytest.mark.asyncio +async def test_annotate_agency_submit_new(api_test_helper): + """ + Test Scenario: Submit New + Our user receives an annotation and marks it as `NEW` + This should complete successfully + And within the database the annotation should be marked as `NEW` + """ + ath = api_test_helper + adb_client = ath.adb_client() + setup_info: AnnotateAgencySetupInfo = await setup_for_annotate_agency( + db_data_creator=ath.db_data_creator, + url_count=1 + ) + url_ids = setup_info.url_ids + + # User should submit an annotation and mark it as New + response = await ath.request_validator.post_agency_annotation_and_get_next( + url_id=url_ids[0], + agency_annotation_post_info=URLAgencyAnnotationPostInfo( + suggested_agency=await ath.db_data_creator.agency(), + is_new=True + ) + ) + assert response.next_annotation is None + + # Within database, the annotation should be marked as `NEW` + all_manual_suggestions = await adb_client.get_all(UserUrlAgencySuggestion) + assert len(all_manual_suggestions) == 1 + assert all_manual_suggestions[0].is_new + +@pytest.mark.asyncio +async def test_annotate_all(api_test_helper): + """ + Test the happy path workflow for the all-annotations endpoint + The user should be able to get a valid URL (filtering on batch id if needed), + submit a full annotation, and receive another URL + """ + ath = api_test_helper + adb_client = ath.adb_client() + setup_info_1 = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, include_user_annotations=False + ) + url_mapping_1 = setup_info_1.url_mapping + setup_info_2 = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, include_user_annotations=False + ) + url_mapping_2 = setup_info_2.url_mapping + + # First, get a valid URL to annotate + get_response_1 = await ath.request_validator.get_next_url_for_all_annotations() + + # Apply the second batch id as a filter and see that a different URL is returned + get_response_2 = await ath.request_validator.get_next_url_for_all_annotations( + batch_id=setup_info_2.batch_id + ) + + assert get_response_1.next_annotation.url_id != get_response_2.next_annotation.url_id + + # Annotate the first and submit + agency_id = await ath.db_data_creator.agency() + post_response_1 = await ath.request_validator.post_all_annotations_and_get_next( + url_id=url_mapping_1.url_id, + all_annotations_post_info=AllAnnotationPostInfo( + is_relevant=True, + record_type=RecordType.ACCIDENT_REPORTS, + agency=URLAgencyAnnotationPostInfo( + is_new=False, + suggested_agency=agency_id + ) + ) + ) + assert post_response_1.next_annotation is not None + + # Confirm the second is received + assert post_response_1.next_annotation.url_id == url_mapping_2.url_id + + # Upon submitting the second, confirm that no more URLs are returned through either POST or GET + post_response_2 = await ath.request_validator.post_all_annotations_and_get_next( + url_id=url_mapping_2.url_id, + all_annotations_post_info=AllAnnotationPostInfo( + is_relevant=False, + ) + ) + assert post_response_2.next_annotation is None + + get_response_3 = await ath.request_validator.get_next_url_for_all_annotations() + assert get_response_3.next_annotation is None + + + # Check that all annotations are present in the database + + # Should be two relevance annotations, one True and one False + all_relevance_suggestions = await adb_client.get_all(UserRelevantSuggestion) + assert len(all_relevance_suggestions) == 2 + assert all_relevance_suggestions[0].relevant == True + assert all_relevance_suggestions[1].relevant == False + + # Should be one agency + all_agency_suggestions = await adb_client.get_all(UserUrlAgencySuggestion) + assert len(all_agency_suggestions) == 1 + assert all_agency_suggestions[0].is_new == False + assert all_agency_suggestions[0].agency_id == agency_id + + # Should be one record type + all_record_type_suggestions = await adb_client.get_all(UserRecordTypeSuggestion) + assert len(all_record_type_suggestions) == 1 + assert all_record_type_suggestions[0].record_type == RecordType.ACCIDENT_REPORTS.value + +@pytest.mark.asyncio +async def test_annotate_all_post_batch_filtering(api_test_helper): + """ + Batch filtering should also work when posting annotations + """ + ath = api_test_helper + adb_client = ath.adb_client() + setup_info_1 = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, include_user_annotations=False + ) + url_mapping_1 = setup_info_1.url_mapping + setup_info_2 = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, include_user_annotations=False + ) + setup_info_3 = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, include_user_annotations=False + ) + url_mapping_3 = setup_info_3.url_mapping + + # Submit the first annotation, using the third batch id, and receive the third URL + post_response_1 = await ath.request_validator.post_all_annotations_and_get_next( + url_id=url_mapping_1.url_id, + batch_id=setup_info_3.batch_id, + all_annotations_post_info=AllAnnotationPostInfo( + is_relevant=True, + record_type=RecordType.ACCIDENT_REPORTS, + agency=URLAgencyAnnotationPostInfo( + is_new=True + ) + ) + ) + + assert post_response_1.next_annotation.url_id == url_mapping_3.url_id + + +@pytest.mark.asyncio +async def test_annotate_all_validation_error(api_test_helper): + """ + Validation errors in the PostInfo DTO should result in a 400 BAD REQUEST response + """ + ath = api_test_helper + setup_info_1 = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, include_user_annotations=False ) + url_mapping_1 = setup_info_1.url_mapping - assert request_info_3.next_annotation is None \ No newline at end of file + with pytest.raises(FailedValidationException) as e: + response = await ath.request_validator.post_all_annotations_and_get_next( + url_id=url_mapping_1.url_id, + all_annotations_post_info=AllAnnotationPostInfo( + is_relevant=False, + record_type=RecordType.ACCIDENT_REPORTS + ) + ) diff --git a/tests/test_automated/integration/api/test_batch.py b/tests/test_automated/integration/api/test_batch.py index 61c2a8b2..961b1a30 100644 --- a/tests/test_automated/integration/api/test_batch.py +++ b/tests/test_automated/integration/api/test_batch.py @@ -1,10 +1,88 @@ +import asyncio import time +import pytest + from collector_db.DTOs.BatchInfo import BatchInfo from collector_db.DTOs.InsertURLsInfo import InsertURLsInfo from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO +from collector_manager.enums import CollectorType, URLStatus from core.enums import BatchStatus +@pytest.mark.asyncio +async def test_get_batch_status_pending_url_filter(api_test_helper): + ath = api_test_helper + + # Add an errored out batch + batch_error = await ath.db_data_creator.batch_and_urls( + strategy=CollectorType.EXAMPLE, + url_count=2, + batch_status=BatchStatus.ERROR + ) + + # Add a batch with pending urls + batch_pending = await ath.db_data_creator.batch_and_urls( + strategy=CollectorType.EXAMPLE, + url_count=2, + batch_status=BatchStatus.READY_TO_LABEL, + with_html_content=True, + url_status=URLStatus.PENDING + ) + + # Add a batch with submitted URLs + batch_submitted = await ath.db_data_creator.batch_and_urls( + strategy=CollectorType.EXAMPLE, + url_count=2, + batch_status=BatchStatus.READY_TO_LABEL, + with_html_content=True, + url_status=URLStatus.SUBMITTED + ) + + # Add an aborted batch + batch_aborted = await ath.db_data_creator.batch_and_urls( + strategy=CollectorType.EXAMPLE, + url_count=2, + batch_status=BatchStatus.ABORTED + ) + + # Add a batch with validated URLs + batch_validated = await ath.db_data_creator.batch_and_urls( + strategy=CollectorType.EXAMPLE, + url_count=2, + batch_status=BatchStatus.READY_TO_LABEL, + with_html_content=True, + url_status=URLStatus.VALIDATED + ) + + # Test filter for pending URLs and only retrieve the second batch + pending_urls_results = ath.request_validator.get_batch_statuses( + has_pending_urls=True + ) + + assert len(pending_urls_results.results) == 1 + assert pending_urls_results.results[0].id == batch_pending.batch_id + + # Test filter without pending URLs and retrieve the other four batches + no_pending_urls_results = ath.request_validator.get_batch_statuses( + has_pending_urls=False + ) + + assert len(no_pending_urls_results.results) == 4 + for result in no_pending_urls_results.results: + assert result.id in [ + batch_error.batch_id, + batch_submitted.batch_id, + batch_validated.batch_id, + batch_aborted.batch_id + ] + + # Test no filter for pending URLs and retrieve all batches + no_filter_results = ath.request_validator.get_batch_statuses() + + assert len(no_filter_results.results) == 5 + + + def test_abort_batch(api_test_helper): ath = api_test_helper @@ -19,8 +97,6 @@ def test_abort_batch(api_test_helper): assert response.message == "Batch aborted." - time.sleep(3) - bi: BatchInfo = ath.request_validator.get_batch_info(batch_id=batch_id) assert bi.status == BatchStatus.ABORTED diff --git a/tests/test_automated/integration/api/test_duplicates.py b/tests/test_automated/integration/api/test_duplicates.py index 292df507..6c6c42ce 100644 --- a/tests/test_automated/integration/api/test_duplicates.py +++ b/tests/test_automated/integration/api/test_duplicates.py @@ -1,14 +1,22 @@ +import asyncio import time +import pytest + from collector_db.DTOs.BatchInfo import BatchInfo from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO +from tests.test_automated.integration.api.conftest import disable_task_trigger -def test_duplicates(api_test_helper): +@pytest.mark.asyncio +async def test_duplicates(api_test_helper): ath = api_test_helper + # Temporarily disable task trigger + disable_task_trigger(ath) + dto = ExampleInputDTO( - sleep_time=1 + sleep_time=0 ) batch_id_1 = ath.request_validator.example_collector( @@ -17,15 +25,14 @@ def test_duplicates(api_test_helper): assert batch_id_1 is not None - time.sleep(1) - batch_id_2 = ath.request_validator.example_collector( dto=dto )["batch_id"] assert batch_id_2 is not None - time.sleep(2) + await ath.wait_for_all_batches_to_complete() + bi_1: BatchInfo = ath.request_validator.get_batch_info(batch_id_1) bi_2: BatchInfo = ath.request_validator.get_batch_info(batch_id_2) diff --git a/tests/test_automated/integration/api/test_example_collector.py b/tests/test_automated/integration/api/test_example_collector.py index 2e7895d8..0b3cf30f 100644 --- a/tests/test_automated/integration/api/test_example_collector.py +++ b/tests/test_automated/integration/api/test_example_collector.py @@ -1,23 +1,41 @@ -import time -from unittest.mock import MagicMock +import asyncio +from unittest.mock import AsyncMock +import pytest + +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.DTOs.BatchInfo import BatchInfo from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO from collector_manager.ExampleCollector import ExampleCollector from collector_manager.enums import CollectorType +from core.AsyncCoreLogger import AsyncCoreLogger from core.DTOs.BatchStatusInfo import BatchStatusInfo from core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse from core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse from core.enums import BatchStatus +from tests.helpers.patch_functions import block_sleep +from tests.test_automated.integration.api.conftest import disable_task_trigger -def test_example_collector(api_test_helper): +@pytest.mark.asyncio +async def test_example_collector(api_test_helper, monkeypatch): ath = api_test_helper + barrier = await block_sleep(monkeypatch) + + # Temporarily disable task trigger + disable_task_trigger(ath) + + + logger = AsyncCoreLogger(adb_client=AsyncDatabaseClient(), flush_interval=1) + await logger.__aenter__() + ath.async_core.collector_manager.logger = logger + dto = ExampleInputDTO( - sleep_time=1 - ) + sleep_time=1 + ) + # Request Example Collector data = ath.request_validator.example_collector( dto=dto ) @@ -25,8 +43,14 @@ def test_example_collector(api_test_helper): assert batch_id is not None assert data["message"] == "Started example collector." - bsr: GetBatchStatusResponse = ath.request_validator.get_batch_statuses(status=BatchStatus.IN_PROCESS) + # Yield control so coroutine runs up to the barrier + await asyncio.sleep(0) + + # Check that batch currently shows as In Process + bsr: GetBatchStatusResponse = ath.request_validator.get_batch_statuses( + status=BatchStatus.IN_PROCESS + ) assert len(bsr.results) == 1 bsi: BatchStatusInfo = bsr.results[0] @@ -34,40 +58,57 @@ def test_example_collector(api_test_helper): assert bsi.strategy == CollectorType.EXAMPLE.value assert bsi.status == BatchStatus.IN_PROCESS - time.sleep(2) + # Release the barrier to resume execution + barrier.release() + + await ath.wait_for_all_batches_to_complete() - csr: GetBatchStatusResponse = ath.request_validator.get_batch_statuses(collector_type=CollectorType.EXAMPLE, status=BatchStatus.COMPLETE) + csr: GetBatchStatusResponse = ath.request_validator.get_batch_statuses( + collector_type=CollectorType.EXAMPLE, + status=BatchStatus.READY_TO_LABEL + ) assert len(csr.results) == 1 bsi: BatchStatusInfo = csr.results[0] assert bsi.id == batch_id assert bsi.strategy == CollectorType.EXAMPLE.value - assert bsi.status == BatchStatus.COMPLETE + assert bsi.status == BatchStatus.READY_TO_LABEL bi: BatchInfo = ath.request_validator.get_batch_info(batch_id=batch_id) - assert bi.status == BatchStatus.COMPLETE + assert bi.status == BatchStatus.READY_TO_LABEL assert bi.total_url_count == 2 assert bi.parameters == dto.model_dump() assert bi.strategy == CollectorType.EXAMPLE.value assert bi.user_id is not None # Flush early to ensure logs are written - ath.core.collector_manager.logger.flush_all() + await logger.flush_all() lr: GetBatchLogsResponse = ath.request_validator.get_batch_logs(batch_id=batch_id) - assert len(lr.logs) > 0 -def test_example_collector_error(api_test_helper, monkeypatch): + # Check that task was triggered + ath.async_core.collector_manager.\ + post_collection_function_trigger.\ + trigger_or_rerun.assert_called_once() + + await logger.__aexit__(None, None, None) + +@pytest.mark.asyncio +async def test_example_collector_error(api_test_helper, monkeypatch): """ Test that when an error occurs in a collector, the batch is properly update """ ath = api_test_helper + logger = AsyncCoreLogger(adb_client=AsyncDatabaseClient(), flush_interval=1) + await logger.__aenter__() + ath.async_core.collector_manager.logger = logger + # Patch the collector to raise an exception during run_implementation - mock = MagicMock() + mock = AsyncMock() mock.side_effect = Exception("Collector failed!") monkeypatch.setattr(ExampleCollector, 'run_implementation', mock) @@ -82,17 +123,20 @@ def test_example_collector_error(api_test_helper, monkeypatch): assert batch_id is not None assert data["message"] == "Started example collector." - time.sleep(1) + await ath.wait_for_all_batches_to_complete() bi: BatchInfo = ath.request_validator.get_batch_info(batch_id=batch_id) assert bi.status == BatchStatus.ERROR - - ath.core.core_logger.flush_all() + # Check there are logs + assert not logger.log_queue.empty() + await logger.flush_all() + assert logger.log_queue.empty() gbl: GetBatchLogsResponse = ath.request_validator.get_batch_logs(batch_id=batch_id) assert gbl.logs[-1].log == "Error: Collector failed!" + await logger.__aexit__(None, None, None) diff --git a/tests/test_automated/integration/api/test_manual_batch.py b/tests/test_automated/integration/api/test_manual_batch.py new file mode 100644 index 00000000..e9a101eb --- /dev/null +++ b/tests/test_automated/integration/api/test_manual_batch.py @@ -0,0 +1,162 @@ + +import pytest + +from collector_db.models import Batch, URL, URLOptionalDataSourceMetadata +from collector_manager.enums import CollectorType +from core.DTOs.ManualBatchInputDTO import ManualBatchInnerInputDTO, ManualBatchInputDTO +from core.enums import RecordType + + +@pytest.mark.asyncio +async def test_manual_batch(api_test_helper): + ath = api_test_helper + + manual_batch_name = "test_manual_batch" + + # Create 50 entries with just URL + dtos = [] + for i in range(50): + dto = ManualBatchInnerInputDTO( + url=f"https://example.com/{i}", + ) + dtos.append(dto) + + # Create 50 entries with URL and all optional fields + for i in range(50): + dto = ManualBatchInnerInputDTO( + url=f"https://example.com/{i+50}", + name=manual_batch_name, + description=f"Description {i}", + collector_metadata={ + "name": f"Name {i}", + }, + record_type=RecordType.ARREST_RECORDS, + record_formats=[f"Record Format {i}"], + data_portal_type=f"Data Portal Type {i}", + supplying_entity=f"Supplying Entity {i}" + ) + dtos.append(dto) + + input_dto = ManualBatchInputDTO( + name=manual_batch_name, + entries=dtos + ) + + # Submit batch successfully + response = await ath.request_validator.submit_manual_batch(input_dto) + + # Check 100 URLs in url attribute + assert len(response.urls) == 100 + + # Get batch from database + adb_client = ath.adb_client() + batches = await adb_client.get_all(Batch) + + # Confirm only one batch + assert len(batches) == 1 + + batch: Batch = batches[0] + # Assert batch id matches response's batch id + assert batch.id == response.batch_id + # Assert strategy of manual + assert batch.strategy == CollectorType.MANUAL.value + # Assert parameters has name value of `test_manual_batch` + assert batch.parameters["name"] == manual_batch_name + # Assert has expected user id + assert batch.user_id == 1 + + # Get URLs from database + urls: list[URL] = await adb_client.get_all(URL) + + # Confirm 100 URLs + assert len(urls) == 100 + + def check_attributes( + object: URL or URLOptionalDataSourceMetadata, + attributes: list[str], + attributes_are_none: bool + ): + for attr in attributes: + if attributes_are_none: + if getattr(object, attr) is not None: + return False + else: + if getattr(object, attr) is None: + return False + return True + + def check_url(url: URL, url_only: bool): + assert url.batch_id == batch.id + assert url.url is not None + other_attributes = ["name", "description", "collector_metadata", "record_type"] + return check_attributes(url, other_attributes, url_only) + + + # Confirm 50 have only name value + count_only_name = 0 + for url in urls: + if check_url(url, True): + count_only_name += 1 + assert count_only_name == 50 + # Confirm 50 have all optional fields + count_all = 0 + for url in urls: + if check_url(url, False): + count_all += 1 + assert count_all == 50 + + # Get Optional URL Metadata from Database + opt_metadata: list[URLOptionalDataSourceMetadata] = await adb_client.get_all(URLOptionalDataSourceMetadata) + + # Confirm 100 + assert len(opt_metadata) == 100 + + def check_opt_metadata(metadata: URLOptionalDataSourceMetadata, no_optional: bool): + assert metadata.url_id is not None + other_attributes = ["record_formats", "data_portal_type", "supplying_entity"] + return check_attributes(metadata, other_attributes, no_optional) + + # Confirm 50 have nothing but URL id + count_only_url_id = 0 + for metadata in opt_metadata: + if check_opt_metadata(metadata, True): + count_only_url_id += 1 + assert count_only_url_id == 50 + + # Confirm 50 have all optional fields + count_all = 0 + for metadata in opt_metadata: + if check_opt_metadata(metadata, False): + count_all += 1 + assert count_all == 50 + + # Insert another batch including good urls and one duplicate + more_dtos = [] + for i in range(49): + dto = ManualBatchInnerInputDTO( + url=f"https://example.com/{i+100}", + ) + more_dtos.append(dto) + + for i in range(2): + dto = ManualBatchInnerInputDTO( + url=f"https://example.com/{i+1}", + ) + more_dtos.append(dto) + + + duplicate_input_dto = ManualBatchInputDTO( + name=manual_batch_name, + entries=more_dtos + ) + + # Submit batch + response = await ath.request_validator.submit_manual_batch(duplicate_input_dto) + # Check duplicate URLs + assert len(response.duplicate_urls) == 2 + assert response.duplicate_urls == ['https://example.com/1', 'https://example.com/2'] + assert len(response.urls) == 49 + + # Check 149 URLs in database + urls: list[URL] = await adb_client.get_all(URL) + assert len(urls) == 149 diff --git a/tests/test_automated/integration/api/test_metrics.py b/tests/test_automated/integration/api/test_metrics.py new file mode 100644 index 00000000..b8eb6ca6 --- /dev/null +++ b/tests/test_automated/integration/api/test_metrics.py @@ -0,0 +1,478 @@ +import pendulum +import pytest + +from collector_manager.enums import URLStatus, CollectorType +from core.enums import BatchStatus, RecordType +from tests.helpers.test_batch_creation_parameters import TestBatchCreationParameters, TestURLCreationParameters, \ + AnnotationInfo + + +@pytest.mark.asyncio +async def test_get_batches_aggregated_metrics(api_test_helper): + ath = api_test_helper + # Create successful batches with URLs of different statuses + all_params = [] + for i in range(3): + params = TestBatchCreationParameters( + strategy=CollectorType.MANUAL, + urls=[ + TestURLCreationParameters( + count=1, + status=URLStatus.PENDING + ), + TestURLCreationParameters( + count=2, + status=URLStatus.SUBMITTED + ), + TestURLCreationParameters( + count=3, + status=URLStatus.REJECTED + ), + TestURLCreationParameters( + count=4, + status=URLStatus.ERROR + ), + TestURLCreationParameters( + count=5, + status=URLStatus.VALIDATED + ) + ] + ) + all_params.append(params) + + + # Create failed batches + for i in range(2): + params = TestBatchCreationParameters( + outcome=BatchStatus.ERROR + ) + all_params.append(params) + + for params in all_params: + await ath.db_data_creator.batch_v2(params) + + dto = await ath.request_validator.get_batches_aggregated_metrics() + assert dto.total_batches == 5 + inner_dto_example = dto.by_strategy[CollectorType.EXAMPLE] + assert inner_dto_example.count_urls == 0 + assert inner_dto_example.count_successful_batches == 0 + assert inner_dto_example.count_failed_batches == 2 + assert inner_dto_example.count_urls_pending == 0 + assert inner_dto_example.count_urls_submitted == 0 + assert inner_dto_example.count_urls_rejected == 0 + assert inner_dto_example.count_urls_errors == 0 + assert inner_dto_example.count_urls_validated == 0 + + inner_dto_manual = dto.by_strategy[CollectorType.MANUAL] + assert inner_dto_manual.count_urls == 45 + assert inner_dto_manual.count_successful_batches == 3 + assert inner_dto_manual.count_failed_batches == 0 + assert inner_dto_manual.count_urls_pending == 3 + assert inner_dto_manual.count_urls_submitted == 6 + assert inner_dto_manual.count_urls_rejected == 9 + assert inner_dto_manual.count_urls_errors == 12 + assert inner_dto_manual.count_urls_validated == 15 + + +@pytest.mark.asyncio +async def test_get_batches_breakdown_metrics(api_test_helper): + # Create a different batch for each month, with different URLs + today = pendulum.parse('2021-01-01') + ath = api_test_helper + + batch_1_params = TestBatchCreationParameters( + strategy=CollectorType.MANUAL, + urls=[ + TestURLCreationParameters( + count=1, + status=URLStatus.PENDING + ), + TestURLCreationParameters( + count=2, + status=URLStatus.SUBMITTED + ), + ] + ) + batch_1 = await ath.db_data_creator.batch_v2(batch_1_params) + batch_2_params = TestBatchCreationParameters( + strategy=CollectorType.EXAMPLE, + outcome=BatchStatus.ERROR, + created_at=today.subtract(weeks=1), + ) + batch_2 = await ath.db_data_creator.batch_v2(batch_2_params) + batch_3_params = TestBatchCreationParameters( + strategy=CollectorType.AUTO_GOOGLER, + created_at=today.subtract(weeks=2), + urls=[ + TestURLCreationParameters( + count=3, + status=URLStatus.REJECTED + ), + TestURLCreationParameters( + count=4, + status=URLStatus.ERROR + ), + TestURLCreationParameters( + count=5, + status=URLStatus.VALIDATED + ), + ] + ) + batch_3 = await ath.db_data_creator.batch_v2(batch_3_params) + + dto_1 = await ath.request_validator.get_batches_breakdown_metrics( + page=1 + ) + assert len(dto_1.batches) == 3 + dto_batch_1 = dto_1.batches[2] + assert dto_batch_1.batch_id == batch_1.batch_id + assert dto_batch_1.strategy == CollectorType.MANUAL + assert dto_batch_1.status == BatchStatus.READY_TO_LABEL + assert pendulum.instance(dto_batch_1.created_at) > today + assert dto_batch_1.count_url_total == 3 + assert dto_batch_1.count_url_pending == 1 + assert dto_batch_1.count_url_submitted == 2 + assert dto_batch_1.count_url_rejected == 0 + assert dto_batch_1.count_url_error == 0 + assert dto_batch_1.count_url_validated == 0 + + dto_batch_2 = dto_1.batches[1] + assert dto_batch_2.batch_id == batch_2.batch_id + assert dto_batch_2.status == BatchStatus.ERROR + assert dto_batch_2.strategy == CollectorType.EXAMPLE + assert pendulum.instance(dto_batch_2.created_at) == today.subtract(weeks=1) + assert dto_batch_2.count_url_total == 0 + assert dto_batch_2.count_url_submitted == 0 + assert dto_batch_2.count_url_pending == 0 + assert dto_batch_2.count_url_rejected == 0 + assert dto_batch_2.count_url_error == 0 + assert dto_batch_2.count_url_validated == 0 + + dto_batch_3 = dto_1.batches[0] + assert dto_batch_3.batch_id == batch_3.batch_id + assert dto_batch_3.status == BatchStatus.READY_TO_LABEL + assert dto_batch_3.strategy == CollectorType.AUTO_GOOGLER + assert pendulum.instance(dto_batch_3.created_at) == today.subtract(weeks=2) + assert dto_batch_3.count_url_total == 12 + assert dto_batch_3.count_url_pending == 0 + assert dto_batch_3.count_url_submitted == 0 + assert dto_batch_3.count_url_rejected == 3 + assert dto_batch_3.count_url_error == 4 + assert dto_batch_3.count_url_validated == 5 + + dto_2 = await ath.request_validator.get_batches_breakdown_metrics( + page=2 + ) + assert len(dto_2.batches) == 0 + +@pytest.mark.asyncio +async def test_get_urls_breakdown_submitted_metrics(api_test_helper): + # Create URLs with submitted status, broken down in different amounts by different weeks + # And ensure the URLs are + today = pendulum.parse('2021-01-01') + ath = api_test_helper + + batch_1_params = TestBatchCreationParameters( + strategy=CollectorType.MANUAL, + urls=[ + TestURLCreationParameters( + count=1, + status=URLStatus.PENDING + ), + TestURLCreationParameters( + count=2, + status=URLStatus.SUBMITTED + ), + ] + ) + batch_1 = await ath.db_data_creator.batch_v2(batch_1_params) + batch_2_params = TestBatchCreationParameters( + strategy=CollectorType.EXAMPLE, + urls=[ + TestURLCreationParameters( + count=3, + status=URLStatus.SUBMITTED + ) + ], + created_at=today.subtract(weeks=1), + ) + batch_2 = await ath.db_data_creator.batch_v2(batch_2_params) + batch_3_params = TestBatchCreationParameters( + strategy=CollectorType.AUTO_GOOGLER, + created_at=today.subtract(weeks=1), + urls=[ + TestURLCreationParameters( + count=3, + status=URLStatus.SUBMITTED + ), + TestURLCreationParameters( + count=4, + status=URLStatus.ERROR + ), + TestURLCreationParameters( + count=5, + status=URLStatus.VALIDATED + ), + ] + ) + batch_3 = await ath.db_data_creator.batch_v2(batch_3_params) + + dto = await ath.request_validator.get_urls_breakdown_submitted_metrics() + assert len(dto.entries) == 2 + + entry_1 = dto.entries[0] + assert entry_1.count_submitted == 6 + + entry_2 = dto.entries[1] + assert entry_2.count_submitted == 2 + + +@pytest.mark.asyncio +async def test_get_urls_breakdown_pending_metrics(api_test_helper): + # Build URLs, broken down into three separate weeks, + # with each week having a different number of pending URLs + # with a different number of kinds of annotations per URLs + + + today = pendulum.parse('2021-01-01') + ath = api_test_helper + + agency_id = await ath.db_data_creator.agency() + # Additionally, add some URLs that are submitted, + # validated, errored, and ensure they are not counted + batch_1_params = TestBatchCreationParameters( + strategy=CollectorType.MANUAL, + urls=[ + TestURLCreationParameters( + count=1, + status=URLStatus.PENDING, + annotation_info=AnnotationInfo( + user_relevant=False + ) + ), + TestURLCreationParameters( + count=2, + status=URLStatus.SUBMITTED + ), + ] + ) + batch_1 = await ath.db_data_creator.batch_v2(batch_1_params) + batch_2_params = TestBatchCreationParameters( + strategy=CollectorType.EXAMPLE, + urls=[ + TestURLCreationParameters( + count=3, + status=URLStatus.PENDING, + annotation_info=AnnotationInfo( + user_relevant=True, + user_record_type=RecordType.CALLS_FOR_SERVICE + ) + ) + ], + created_at=today.subtract(weeks=1), + ) + batch_2 = await ath.db_data_creator.batch_v2(batch_2_params) + batch_3_params = TestBatchCreationParameters( + strategy=CollectorType.AUTO_GOOGLER, + created_at=today.subtract(weeks=1), + urls=[ + TestURLCreationParameters( + count=3, + status=URLStatus.SUBMITTED + ), + TestURLCreationParameters( + count=4, + status=URLStatus.ERROR + ), + TestURLCreationParameters( + count=5, + status=URLStatus.PENDING, + annotation_info=AnnotationInfo( + user_relevant=True, + user_record_type=RecordType.INCARCERATION_RECORDS, + user_agency=agency_id + ) + ), + ] + ) + batch_3 = await ath.db_data_creator.batch_v2(batch_3_params) + + dto = await ath.request_validator.get_urls_breakdown_pending_metrics() + assert len(dto.entries) == 2 + + entry_1 = dto.entries[0] + assert entry_1.count_pending_total == 8 + assert entry_1.count_pending_relevant_user == 8 + assert entry_1.count_pending_record_type_user == 8 + assert entry_1.count_pending_agency_user == 5 + + entry_2 = dto.entries[1] + assert entry_2.count_pending_total == 1 + assert entry_2.count_pending_relevant_user == 1 + assert entry_2.count_pending_record_type_user == 0 + assert entry_2.count_pending_agency_user == 0 + +@pytest.mark.asyncio +async def test_get_urls_aggregate_metrics(api_test_helper): + ath = api_test_helper + today = pendulum.parse('2021-01-01') + + batch_0_params = TestBatchCreationParameters( + strategy=CollectorType.MANUAL, + created_at=today.subtract(days=1), + urls=[ + TestURLCreationParameters( + count=1, + status=URLStatus.PENDING, + ), + ] + ) + batch_0 = await ath.db_data_creator.batch_v2(batch_0_params) + oldest_url_id = batch_0.url_creation_infos[URLStatus.PENDING].url_mappings[0].url_id + + + batch_1_params = TestBatchCreationParameters( + strategy=CollectorType.MANUAL, + urls=[ + TestURLCreationParameters( + count=1, + status=URLStatus.PENDING, + ), + TestURLCreationParameters( + count=2, + status=URLStatus.SUBMITTED + ), + ] + ) + batch_1 = await ath.db_data_creator.batch_v2(batch_1_params) + + batch_2_params = TestBatchCreationParameters( + strategy=CollectorType.AUTO_GOOGLER, + urls=[ + TestURLCreationParameters( + count=4, + status=URLStatus.PENDING, + ), + TestURLCreationParameters( + count=2, + status=URLStatus.ERROR + ), + TestURLCreationParameters( + count=1, + status=URLStatus.VALIDATED + ), + TestURLCreationParameters( + count=5, + status=URLStatus.REJECTED + ), + ] + ) + batch_2 = await ath.db_data_creator.batch_v2(batch_2_params) + + dto = await ath.request_validator.get_urls_aggregated_metrics() + + assert dto.oldest_pending_url_id == oldest_url_id + assert dto.oldest_pending_url_created_at == today.subtract(days=1).in_timezone('UTC').naive() + assert dto.count_urls_pending == 6 + assert dto.count_urls_rejected == 5 + assert dto.count_urls_errors == 2 + assert dto.count_urls_validated == 1 + assert dto.count_urls_submitted == 2 + assert dto.count_urls_total == 16 + + + +@pytest.mark.asyncio +async def test_get_backlog_metrics(api_test_helper): + today = pendulum.parse('2021-01-01') + + ath = api_test_helper + adb_client = ath.adb_client() + + + # Populate the backlog table and test that backlog metrics returned on a monthly basis + # Ensure that multiple days in each month are added to the backlog table, with different values + + + batch_1_params = TestBatchCreationParameters( + strategy=CollectorType.MANUAL, + urls=[ + TestURLCreationParameters( + count=1, + status=URLStatus.PENDING, + annotation_info=AnnotationInfo( + user_relevant=False + ) + ), + TestURLCreationParameters( + count=2, + status=URLStatus.SUBMITTED + ), + ] + ) + batch_1 = await ath.db_data_creator.batch_v2(batch_1_params) + + await adb_client.populate_backlog_snapshot( + dt=today.subtract(months=3).naive() + ) + + await adb_client.populate_backlog_snapshot( + dt=today.subtract(months=2, days=3).naive() + ) + + batch_2_params = TestBatchCreationParameters( + strategy=CollectorType.AUTO_GOOGLER, + urls=[ + TestURLCreationParameters( + count=4, + status=URLStatus.PENDING, + annotation_info=AnnotationInfo( + user_relevant=False + ) + ), + TestURLCreationParameters( + count=2, + status=URLStatus.ERROR + ), + ] + ) + batch_2 = await ath.db_data_creator.batch_v2(batch_2_params) + + await adb_client.populate_backlog_snapshot( + dt=today.subtract(months=2).naive() + ) + + await adb_client.populate_backlog_snapshot( + dt=today.subtract(months=1, days=4).naive() + ) + + batch_3_params = TestBatchCreationParameters( + strategy=CollectorType.AUTO_GOOGLER, + urls=[ + TestURLCreationParameters( + count=7, + status=URLStatus.PENDING, + annotation_info=AnnotationInfo( + user_relevant=False + ) + ), + TestURLCreationParameters( + count=5, + status=URLStatus.VALIDATED + ), + ] + ) + batch_3 = await ath.db_data_creator.batch_v2(batch_3_params) + + await adb_client.populate_backlog_snapshot( + dt=today.subtract(months=1).naive() + ) + + dto = await ath.request_validator.get_backlog_metrics() + + assert len(dto.entries) == 3 + + # Test that the count closest to the beginning of the month is returned for each month + assert dto.entries[0].count_pending_total == 1 + assert dto.entries[1].count_pending_total == 5 + assert dto.entries[2].count_pending_total == 12 \ No newline at end of file diff --git a/tests/test_automated/integration/api/test_review.py b/tests/test_automated/integration/api/test_review.py new file mode 100644 index 00000000..1f427c61 --- /dev/null +++ b/tests/test_automated/integration/api/test_review.py @@ -0,0 +1,161 @@ +import pytest + +from collector_db.constants import PLACEHOLDER_AGENCY_NAME +from collector_db.models import URL, URLOptionalDataSourceMetadata, ConfirmedURLAgency, Agency +from collector_manager.enums import URLStatus +from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewBaseInfo +from core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewOuterResponse +from core.enums import RecordType +from tests.helpers.complex_test_data_functions import setup_for_get_next_url_for_final_review + + +@pytest.mark.asyncio +async def test_review_next_source(api_test_helper): + ath = api_test_helper + + setup_info = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, + include_user_annotations=True + ) + url_mapping = setup_info.url_mapping + + await ath.db_data_creator.agency_auto_suggestions( + url_id=url_mapping.url_id, + count=3 + ) + confirmed_agency_id = await ath.db_data_creator.agency_confirmed_suggestion(url_id=url_mapping.url_id) + + outer_result = await ath.request_validator.review_next_source() + + result = outer_result.next_source + + assert result.name == "Test Name" + assert result.description == "Test Description" + + optional_metadata = result.optional_metadata + + assert optional_metadata.data_portal_type == "Test Data Portal Type" + assert optional_metadata.supplying_entity == "Test Supplying Entity" + assert optional_metadata.record_formats == ["Test Record Format", "Test Record Format 2"] + + assert result.url == url_mapping.url + html_info = result.html_info + assert html_info.description == "test description" + assert html_info.title == "test html content" + + annotation_info = result.annotations + relevant_info = annotation_info.relevant + assert relevant_info.auto == True + assert relevant_info.user == False + + record_type_info = annotation_info.record_type + assert record_type_info.auto == RecordType.ARREST_RECORDS + assert record_type_info.user == RecordType.ACCIDENT_REPORTS + + agency_info = annotation_info.agency + auto_agency_suggestions = agency_info.auto + assert auto_agency_suggestions.unknown == False + assert len(auto_agency_suggestions.suggestions) == 3 + + # Check user agency suggestions exist and in descending order of count + user_agency_suggestion = agency_info.user + assert user_agency_suggestion.pdap_agency_id == setup_info.user_agency_id + + + # Check confirmed agencies exist + confirmed_agencies = agency_info.confirmed + assert len(confirmed_agencies) == 1 + confirmed_agency = confirmed_agencies[0] + assert confirmed_agency.pdap_agency_id == confirmed_agency_id + +@pytest.mark.asyncio +async def test_approve_and_get_next_source_for_review(api_test_helper): + ath = api_test_helper + db_data_creator = ath.db_data_creator + + setup_info = await setup_for_get_next_url_for_final_review( + db_data_creator=db_data_creator, + include_user_annotations=True + ) + url_mapping = setup_info.url_mapping + + # Add confirmed agency + await db_data_creator.confirmed_suggestions([url_mapping.url_id]) + + # Additionally, include an agency not yet included in the database + additional_agency = 999999 + + agency_ids = [await db_data_creator.agency() for _ in range(3)] + agency_ids.append(additional_agency) + + result: GetNextURLForFinalReviewOuterResponse = await ath.request_validator.approve_and_get_next_source_for_review( + approval_info=FinalReviewApprovalInfo( + url_id=url_mapping.url_id, + record_type=RecordType.ARREST_RECORDS, + agency_ids=agency_ids, + name="New Test Name", + description="New Test Description", + record_formats=["New Test Record Format", "New Test Record Format 2"], + data_portal_type="New Test Data Portal Type", + supplying_entity="New Test Supplying Entity" + ) + ) + + assert result.next_source is None + + adb_client = db_data_creator.adb_client + # Confirm same agency id is listed as confirmed + urls = await adb_client.get_all(URL) + assert len(urls) == 1 + url = urls[0] + assert url.id == url_mapping.url_id + assert url.record_type == RecordType.ARREST_RECORDS.value + assert url.outcome == URLStatus.VALIDATED.value + assert url.name == "New Test Name" + assert url.description == "New Test Description" + + optional_metadata = await adb_client.get_all(URLOptionalDataSourceMetadata) + assert len(optional_metadata) == 1 + assert optional_metadata[0].data_portal_type == "New Test Data Portal Type" + assert optional_metadata[0].supplying_entity == "New Test Supplying Entity" + assert optional_metadata[0].record_formats == ["New Test Record Format", "New Test Record Format 2"] + + # Get agencies + confirmed_agencies = await adb_client.get_all(ConfirmedURLAgency) + assert len(confirmed_agencies) == 4 + for agency in confirmed_agencies: + assert agency.agency_id in agency_ids + + # Check that created agency has placeholder + agencies = await adb_client.get_all(Agency) + for agency in agencies: + if agency.agency_id == additional_agency: + assert agency.name == PLACEHOLDER_AGENCY_NAME + +@pytest.mark.asyncio +async def test_reject_and_get_next_source_for_review(api_test_helper): + ath = api_test_helper + db_data_creator = ath.db_data_creator + + setup_info = await setup_for_get_next_url_for_final_review( + db_data_creator=db_data_creator, + annotation_count=3, + include_user_annotations=True + ) + url_mapping = setup_info.url_mapping + + result: GetNextURLForFinalReviewOuterResponse = await ath.request_validator.reject_and_get_next_source_for_review( + review_info=FinalReviewBaseInfo( + url_id=url_mapping.url_id, + ) + ) + + assert result.next_source is None + + adb_client = db_data_creator.adb_client + # Confirm same agency id is listed as rejected + urls = await adb_client.get_all(URL) + assert len(urls) == 1 + url = urls[0] + assert url.id == url_mapping.url_id + assert url.outcome == URLStatus.REJECTED.value \ No newline at end of file diff --git a/tests/test_automated/integration/api/test_search.py b/tests/test_automated/integration/api/test_search.py new file mode 100644 index 00000000..917690fc --- /dev/null +++ b/tests/test_automated/integration/api/test_search.py @@ -0,0 +1,23 @@ +import pytest + +from core.DTOs.SearchURLResponse import SearchURLResponse + + +@pytest.mark.asyncio +async def test_search_url(api_test_helper): + ath = api_test_helper + + # Create a batch with 1 URL + creation_info = await ath.db_data_creator.batch_and_urls(url_count=1, with_html_content=False) + + # Search for that URL and locate it + response: SearchURLResponse = await ath.request_validator.search_url(url=creation_info.urls[0]) + + assert response.found + assert response.url_id == creation_info.url_ids[0] + + # Search for a non-existent URL + response: SearchURLResponse = await ath.request_validator.search_url(url="http://doesnotexist.com") + + assert not response.found + assert response.url_id is None \ No newline at end of file diff --git a/tests/test_automated/integration/api/test_task.py b/tests/test_automated/integration/api/test_task.py new file mode 100644 index 00000000..547b0eb8 --- /dev/null +++ b/tests/test_automated/integration/api/test_task.py @@ -0,0 +1,55 @@ +import pytest + +from collector_db.enums import TaskType +from tests.test_automated.integration.api.conftest import APITestHelper + + +async def task_setup(ath: APITestHelper) -> int: + iui = ath.db_data_creator.urls(batch_id=ath.db_data_creator.batch(), url_count=3) + url_ids = [url.url_id for url in iui.url_mappings] + + task_id = await ath.db_data_creator.task(url_ids=url_ids) + await ath.db_data_creator.error_info(url_ids=[url_ids[0]], task_id=task_id) + + return task_id + +@pytest.mark.asyncio +async def test_get_task_info(api_test_helper): + ath = api_test_helper + + task_id = await task_setup(ath) + + task_info = ath.request_validator.get_task_info(task_id=task_id) + + assert len(task_info.urls) == 3 + assert len(task_info.url_errors) == 1 + + assert task_info.task_type == TaskType.HTML + +@pytest.mark.asyncio +async def test_get_tasks(api_test_helper): + ath = api_test_helper + for i in range(2): + await task_setup(ath) + + response = ath.request_validator.get_tasks(page=1, task_type=None, task_status=None) + + assert len(response.tasks) == 2 + for task in response.tasks: + assert task.type == TaskType.HTML + assert task.url_count == 3 + assert task.url_error_count == 1 + +@pytest.mark.asyncio +async def test_get_task_status(api_test_helper): + ath = api_test_helper + + response = await ath.request_validator.get_current_task_status() + + assert response.status == TaskType.IDLE + + for task in [task for task in TaskType]: + await ath.async_core.task_manager.set_task_status(task) + response = await ath.request_validator.get_current_task_status() + + assert response.status == task diff --git a/tests/test_automated/integration/api/test_url.py b/tests/test_automated/integration/api/test_url.py index 9ccc7e5f..fccd8e4e 100644 --- a/tests/test_automated/integration/api/test_url.py +++ b/tests/test_automated/integration/api/test_url.py @@ -21,9 +21,6 @@ async def test_get_urls(api_test_helper): url_id_1st = iui.url_mappings[0].url_id - # Add metadata - await db_data_creator.metadata(url_ids=[url_id_1st]) - # Get the latter 2 urls url_ids = [iui.url_mappings[1].url_id, iui.url_mappings[2].url_id] @@ -35,12 +32,10 @@ async def test_get_urls(api_test_helper): assert data.count == 3 assert len(data.urls) == 3 assert data.urls[0].url == iui.url_mappings[0].url - assert len(data.urls[0].metadata) == 1 for i in range(1, 3): assert data.urls[i].url == iui.url_mappings[i].url assert len(data.urls[i].errors) == 1 - assert len(data.urls[i].metadata) == 0 # Retrieve data again with errors only data: GetURLsResponseInfo = api_test_helper.request_validator.get_urls(errors=True) diff --git a/tests/test_automated/integration/collector_db/test_database_structure.py b/tests/test_automated/integration/collector_db/test_database_structure.py index 926a6ed8..6d82631c 100644 --- a/tests/test_automated/integration/collector_db/test_database_structure.py +++ b/tests/test_automated/integration/collector_db/test_database_structure.py @@ -14,15 +14,16 @@ import sqlalchemy as sa from sqlalchemy import create_engine from sqlalchemy.dialects import postgresql -from sqlalchemy.exc import DataError +from sqlalchemy.exc import DataError, DBAPIError from collector_db.DTOs.InsertURLsInfo import InsertURLsInfo from collector_db.enums import URLHTMLContentType from collector_db.helper_functions import get_postgres_connection_string -from collector_db.models import Base +from collector_db.models import Base, Agency from collector_manager.enums import CollectorType, URLStatus -from core.enums import BatchStatus -from tests.helpers.DBDataCreator import DBDataCreator +from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from core.enums import BatchStatus, SuggestionType +from tests.helpers.DBDataCreator import DBDataCreator, BatchURLCreationInfo from util.helper_functions import get_enum_values SATypes: TypeAlias = sa.Integer or sa.String or postgresql.ENUM or sa.TIMESTAMP or sa.Text @@ -51,9 +52,11 @@ def __init__( self, columns: list[ColumnTester], table_name: str, - engine: sa.Engine = create_engine(get_postgres_connection_string()), + engine: Optional[sa.Engine] = None, constraints: Optional[list[ConstraintTester]] = None, ): + if engine is None: + engine = create_engine(get_postgres_connection_string(is_async=True)) self.columns = columns self.table_name = table_name self.constraints = constraints @@ -227,45 +230,11 @@ def test_url(db_data_creator: DBDataCreator): column_name="outcome", type_=postgresql.ENUM, allowed_values=get_enum_values(URLStatus) - ) - ], - engine=db_data_creator.db_client.engine - ) - - table_tester.run_column_tests() - -def test_url_metadata(db_data_creator: DBDataCreator): - batch_id = db_data_creator.batch() - iui: InsertURLsInfo = db_data_creator.urls(batch_id=batch_id, url_count=1) - - - table_tester = TableTester( - table_name="url_metadata", - columns=[ - ColumnTester( - column_name="url_id", - type_=sa.Integer, - allowed_values=[iui.url_mappings[0].url_id] - ), - ColumnTester( - column_name="attribute", - type_=postgresql.ENUM, - allowed_values=["Record Type", "Agency", "Relevant"] - ), - ColumnTester( - column_name="value", - type_=sa.Text, - allowed_values=["Text"] ), ColumnTester( - column_name="validation_status", - type_=postgresql.ENUM, - allowed_values=["Pending Validation", "Validated"] - ), - ColumnTester( - column_name="validation_source", - type_=postgresql.ENUM, - allowed_values=["Machine Learning", "Label Studio", "Manual"] + column_name="name", + type_=sa.String, + allowed_values=['test'], ) ], engine=db_data_creator.db_client.engine @@ -325,4 +294,54 @@ def test_root_url(db_data_creator: DBDataCreator): engine=db_data_creator.db_client.engine ) - table_tester.run_column_tests() \ No newline at end of file + table_tester.run_column_tests() + + +@pytest.mark.asyncio +async def test_upsert_new_agencies(db_data_creator: DBDataCreator): + """ + Check that if the agency doesn't exist, it is added + But if the agency does exist, it is updated with new information + """ + + suggestions = [] + for i in range(3): + suggestion = URLAgencySuggestionInfo( + url_id=1, + suggestion_type=SuggestionType.AUTO_SUGGESTION, + pdap_agency_id=i, + agency_name=f"Test Agency {i}", + state=f"Test State {i}", + county=f"Test County {i}", + locality=f"Test Locality {i}", + user_id=1 + ) + suggestions.append(suggestion) + + adb_client = db_data_creator.adb_client + await adb_client.upsert_new_agencies(suggestions) + + update_suggestion = URLAgencySuggestionInfo( + url_id=1, + suggestion_type=SuggestionType.AUTO_SUGGESTION, + pdap_agency_id=0, + agency_name="Updated Test Agency", + state="Updated Test State", + county="Updated Test County", + locality="Updated Test Locality", + user_id=1 + ) + + await adb_client.upsert_new_agencies([update_suggestion]) + + rows = await adb_client.get_all(Agency) + + assert len(rows) == 3 + + d = {} + for row in rows: + d[row.agency_id] = row.name + + assert d[0] == "Updated Test Agency" + assert d[1] == "Test Agency 1" + assert d[2] == "Test Agency 2" diff --git a/tests/test_automated/integration/collector_db/test_db_client.py b/tests/test_automated/integration/collector_db/test_db_client.py index feadf57f..93edb3ed 100644 --- a/tests/test_automated/integration/collector_db/test_db_client.py +++ b/tests/test_automated/integration/collector_db/test_db_client.py @@ -1,20 +1,28 @@ from datetime import datetime, timedelta import pytest +from fastapi import HTTPException from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.DTOs.BatchInfo import BatchInfo from collector_db.DTOs.LogInfo import LogInfo from collector_db.DTOs.URLErrorInfos import URLErrorPydanticInfo from collector_db.DTOs.URLInfo import URLInfo -from collector_db.DTOs.URLMetadataInfo import URLMetadataInfo -from collector_db.enums import URLMetadataAttributeType, ValidationStatus, ValidationSource +from collector_db.DTOs.URLMapping import URLMapping +from collector_db.constants import PLACEHOLDER_AGENCY_NAME +from collector_db.models import URL, ReviewingUserURL, URLOptionalDataSourceMetadata, ConfirmedURLAgency, Agency from collector_manager.enums import URLStatus -from core.enums import BatchStatus +from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo +from core.enums import BatchStatus, RecordType, SuggestionType +from tests.helpers.complex_test_data_functions import setup_for_get_next_url_for_annotation, setup_for_annotate_agency from tests.helpers.DBDataCreator import DBDataCreator +from tests.helpers.complex_test_data_functions import setup_for_get_next_url_for_final_review - -def test_insert_urls(db_client_test): +@pytest.mark.asyncio +async def test_insert_urls( + db_client_test, + adb_client_test +): # Insert batch batch_info = BatchInfo( strategy="ckan", @@ -38,7 +46,7 @@ def test_insert_urls(db_client_test): collector_metadata={"name": "example_duplicate"}, ) ] - insert_urls_info = db_client_test.insert_urls( + insert_urls_info = await adb_client_test.insert_urls( url_infos=urls, batch_id=batch_id ) @@ -52,11 +60,12 @@ def test_insert_urls(db_client_test): assert insert_urls_info.original_count == 2 assert insert_urls_info.duplicate_count == 1 - -def test_insert_logs(db_data_creator: DBDataCreator): +@pytest.mark.asyncio +async def test_insert_logs(db_data_creator: DBDataCreator): batch_id_1 = db_data_creator.batch() batch_id_2 = db_data_creator.batch() + adb_client = db_data_creator.adb_client db_client = db_data_creator.db_client db_client.insert_logs( log_infos=[ @@ -66,26 +75,28 @@ def test_insert_logs(db_data_creator: DBDataCreator): ] ) - logs = db_client.get_logs_by_batch_id(batch_id_1) + logs = await adb_client.get_logs_by_batch_id(batch_id_1) assert len(logs) == 2 - logs = db_client.get_logs_by_batch_id(batch_id_2) + logs = await adb_client.get_logs_by_batch_id(batch_id_2) assert len(logs) == 1 -def test_delete_old_logs(db_data_creator: DBDataCreator): +@pytest.mark.asyncio +async def test_delete_old_logs(db_data_creator: DBDataCreator): batch_id = db_data_creator.batch() old_datetime = datetime.now() - timedelta(days=1) db_client = db_data_creator.db_client + adb_client = db_data_creator.adb_client log_infos = [] for i in range(3): log_infos.append(LogInfo(log="test log", batch_id=batch_id, created_at=old_datetime)) db_client.insert_logs(log_infos=log_infos) - logs = db_client.get_logs_by_batch_id(batch_id=batch_id) + logs = await adb_client.get_logs_by_batch_id(batch_id=batch_id) assert len(logs) == 3 - db_client.delete_old_logs() + await adb_client.delete_old_logs() - logs = db_client.get_logs_by_batch_id(batch_id=batch_id) + logs = await adb_client.get_logs_by_batch_id(batch_id=batch_id) assert len(logs) == 0 def test_delete_url_updated_at(db_data_creator: DBDataCreator): @@ -109,25 +120,7 @@ def test_delete_url_updated_at(db_data_creator: DBDataCreator): url = db_client.get_urls_by_batch(batch_id=batch_id, page=1)[0] assert url.updated_at > old_updated_at -@pytest.mark.asyncio -async def test_get_url_metadata(db_data_creator: DBDataCreator): - batch_id = db_data_creator.batch() - url_id = db_data_creator.urls(batch_id=batch_id, url_count=1).url_mappings[0].url_id - - adb_client = AsyncDatabaseClient() - - await adb_client.add_url_metadata( - url_metadata_info=URLMetadataInfo( - url_id=url_id, - attribute=URLMetadataAttributeType.RELEVANT, - value="False", - validation_status=ValidationStatus.PENDING_VALIDATION, - validation_source=ValidationSource.MACHINE_LEARNING, - ) - ) - metadata = await adb_client.get_url_metadata_by_status(url_status=URLStatus.PENDING) - print(metadata) @pytest.mark.asyncio async def test_add_url_error_info(db_data_creator: DBDataCreator): @@ -136,12 +129,14 @@ async def test_add_url_error_info(db_data_creator: DBDataCreator): url_ids = [url_mapping.url_id for url_mapping in url_mappings] adb_client = AsyncDatabaseClient() + task_id = await db_data_creator.task() error_infos = [] for url_mapping in url_mappings: uei = URLErrorPydanticInfo( url_id=url_mapping.url_id, error="test error", + task_id=task_id ) error_infos.append(uei) @@ -158,35 +153,635 @@ async def test_add_url_error_info(db_data_creator: DBDataCreator): assert result.url_id in url_ids assert result.error == "test error" + @pytest.mark.asyncio -async def test_get_urls_with_html_data_and_no_relevancy_metadata( - db_data_creator: DBDataCreator, -): +async def test_get_next_url_for_final_review_basic(db_data_creator: DBDataCreator): + """ + Test that an annotated URL is returned + """ + + setup_info = await setup_for_get_next_url_for_final_review( + db_data_creator=db_data_creator, + annotation_count=1, + include_user_annotations=True + ) + + url_mapping = setup_info.url_mapping + # Add agency auto suggestions + await db_data_creator.agency_auto_suggestions( + url_id=url_mapping.url_id, + count=3 + ) + + + result = await db_data_creator.adb_client.get_next_url_for_final_review( + batch_id=None + ) + + assert result.url == url_mapping.url + html_info = result.html_info + assert html_info.description == "test description" + assert html_info.title == "test html content" + + annotation_info = result.annotations + relevant_info = annotation_info.relevant + assert relevant_info.auto == True + assert relevant_info.user == False + + record_type_info = annotation_info.record_type + assert record_type_info.auto == RecordType.ARREST_RECORDS + assert record_type_info.user == RecordType.ACCIDENT_REPORTS + + agency_info = annotation_info.agency + auto_agency_suggestions = agency_info.auto + assert auto_agency_suggestions.unknown == False + assert len(auto_agency_suggestions.suggestions) == 3 + + # Check user agency suggestion exists and is correct + assert agency_info.user.pdap_agency_id == setup_info.user_agency_id + + +@pytest.mark.asyncio +async def test_get_next_url_for_final_review_batch_id_filtering(db_data_creator: DBDataCreator): + setup_info_1 = await setup_for_get_next_url_for_final_review( + db_data_creator=db_data_creator, + annotation_count=3, + include_user_annotations=True + ) + + setup_info_2 = await setup_for_get_next_url_for_final_review( + db_data_creator=db_data_creator, + annotation_count=3, + include_user_annotations=True + ) + + url_mapping_1 = setup_info_1.url_mapping + url_mapping_2 = setup_info_2.url_mapping + + # If a batch id is provided, return first valid URL with that batch id + result_with_batch_id =await db_data_creator.adb_client.get_next_url_for_final_review( + batch_id=setup_info_2.batch_id + ) + + assert result_with_batch_id.url == url_mapping_2.url + + # If no batch id is provided, return first valid URL + result_no_batch_id =await db_data_creator.adb_client.get_next_url_for_final_review( + batch_id=None + ) + + assert result_no_batch_id.url == url_mapping_1.url + + +@pytest.mark.asyncio +async def test_get_next_url_for_final_review_favor_more_components(db_data_creator: DBDataCreator): + """ + Test in the case of two URLs, favoring the one with more annotations for more components + i.e., if one has annotations for record type and agency id, that should be favored over one with just record type + """ + + setup_info_without_user_anno = await setup_for_get_next_url_for_final_review( + db_data_creator=db_data_creator, + annotation_count=3, + include_user_annotations=False + ) + url_mapping_without_user_anno = setup_info_without_user_anno.url_mapping + + setup_info_with_user_anno = await setup_for_get_next_url_for_final_review( + db_data_creator=db_data_creator, + annotation_count=3, + include_user_annotations=True + ) + url_mapping_with_user_anno = setup_info_with_user_anno.url_mapping + + # Have both be listed as unknown + + for url_mapping in [url_mapping_with_user_anno, url_mapping_without_user_anno]: + await db_data_creator.agency_auto_suggestions( + url_id=url_mapping.url_id, + count=3, + suggestion_type=SuggestionType.UNKNOWN + ) + + result = await db_data_creator.adb_client.get_next_url_for_final_review( + batch_id=None + ) + + assert result.id == url_mapping_with_user_anno.url_id + +@pytest.mark.asyncio +async def test_get_next_url_for_final_review_no_annotations(db_data_creator: DBDataCreator): + """ + Test in the case of one URL with no annotations. + Should be returned if it is the only one available. + """ batch_id = db_data_creator.batch() - url_mappings = db_data_creator.urls(batch_id=batch_id, url_count=3).url_mappings - url_ids = [url_info.url_id for url_info in url_mappings] - await db_data_creator.html_data(url_ids) - await db_data_creator.metadata([url_ids[0]]) - results = await db_data_creator.adb_client.get_urls_with_html_data_and_no_relevancy_metadata() + url_mapping = db_data_creator.urls(batch_id=batch_id, url_count=1).url_mappings[0] - permitted_url_ids = [url_id for url_id in url_ids if url_id != url_ids[0]] - assert len(results) == 2 - for result in results: - assert result.url_id in permitted_url_ids - assert len(result.html_infos) == 2 + result = await db_data_creator.adb_client.get_next_url_for_final_review( + batch_id=None + ) + + assert result.id == url_mapping.url_id + + annotations = result.annotations + + agency = annotations.agency + assert agency.confirmed == [] + assert agency.auto.unknown is True + assert agency.auto.suggestions == [] + + record_type = annotations.record_type + assert record_type.auto is None + assert record_type.user is None + + relevant = annotations.relevant + assert relevant.auto is None + assert relevant.user is None @pytest.mark.asyncio -async def test_get_urls_with_metadata(db_data_creator: DBDataCreator): +async def test_get_next_url_for_final_review_only_confirmed_urls(db_data_creator: DBDataCreator): + """ + Test in the case of one URL that is submitted + Should not be returned. + """ batch_id = db_data_creator.batch() - url_mappings = db_data_creator.urls(batch_id=batch_id, url_count=3).url_mappings - url_ids = [url_info.url_id for url_info in url_mappings] - await db_data_creator.metadata([url_ids[0]]) - # Neither of these two URLs should be picked up - await db_data_creator.metadata([url_ids[1]], attribute=URLMetadataAttributeType.RECORD_TYPE) - await db_data_creator.metadata([url_ids[2]], validation_status=ValidationStatus.VALIDATED) - results = await db_data_creator.adb_client.get_urls_with_metadata( - attribute=URLMetadataAttributeType.RELEVANT, - validation_status=ValidationStatus.PENDING_VALIDATION + url_mapping = db_data_creator.urls( + batch_id=batch_id, + url_count=1, + outcome=URLStatus.SUBMITTED + ).url_mappings[0] + + result = await db_data_creator.adb_client.get_next_url_for_final_review( + batch_id=None + ) + + assert result is None + +@pytest.mark.asyncio +async def test_approve_url_basic(db_data_creator: DBDataCreator): + setup_info = await setup_for_get_next_url_for_final_review( + db_data_creator=db_data_creator, + annotation_count=3, + include_user_annotations=True + ) + url_mapping = setup_info.url_mapping + + # Add confirmed agency + agency_id = await db_data_creator.agency_confirmed_suggestion( + url_id=url_mapping.url_id + ) + + adb_client = db_data_creator.adb_client + # Approve URL. Only URL should be affected. No other properties should be changed. + await adb_client.approve_url( + approval_info=FinalReviewApprovalInfo( + url_id=url_mapping.url_id, + record_type=RecordType.ARREST_RECORDS, + relevant=True, + ), + user_id=1 + ) + + # Confirm same agency id is listed as confirmed + urls: list[URL] = await adb_client.get_all(URL) + assert len(urls) == 1 + url = urls[0] + assert url.id == url_mapping.url_id + assert url.record_type == RecordType.ARREST_RECORDS.value + assert url.outcome == URLStatus.VALIDATED.value + assert url.name == "Test Name" + assert url.description == "Test Description" + + confirmed_agency: list[ConfirmedURLAgency] = await adb_client.get_all(ConfirmedURLAgency) + assert len(confirmed_agency) == 1 + assert confirmed_agency[0].url_id == url_mapping.url_id + assert confirmed_agency[0].agency_id == agency_id + + approving_user_urls: list[ReviewingUserURL] = await adb_client.get_all(ReviewingUserURL) + assert len(approving_user_urls) == 1 + assert approving_user_urls[0].user_id == 1 + assert approving_user_urls[0].url_id == url_mapping.url_id + + optional_metadata: list[URLOptionalDataSourceMetadata] = await adb_client.get_all(URLOptionalDataSourceMetadata) + assert len(optional_metadata) == 1 + assert optional_metadata[0].url_id == url_mapping.url_id + assert optional_metadata[0].record_formats == ["Test Record Format", "Test Record Format 2"] + assert optional_metadata[0].data_portal_type == "Test Data Portal Type" + assert optional_metadata[0].supplying_entity == "Test Supplying Entity" + +@pytest.mark.asyncio +async def test_approval_url_error(db_data_creator: DBDataCreator): + setup_info = await setup_for_get_next_url_for_final_review( + db_data_creator=db_data_creator, + annotation_count=3, + include_user_annotations=True, + include_miscellaneous_metadata=False + ) + url_mapping = setup_info.url_mapping + + # Set all required descriptors to none and receive an error + adb_client = db_data_creator.adb_client + with pytest.raises(HTTPException) as e: + await adb_client.approve_url( + approval_info=FinalReviewApprovalInfo( + url_id=url_mapping.url_id, + ), + user_id=1 + ) + assert e.value.status_code == 422 + + # Create kwarg dictionary with all required approval info fields + kwarg_dict = { + "record_type": RecordType.ARREST_RECORDS, + "agency_ids": [await db_data_creator.agency()], + "name": "Test Name", + "description": "Test Description", + } + # For each keyword, create a copy of the kwargs and set that one to none + # Confirm it produces the correct error + for kwarg in kwarg_dict: + kwarg_copy = kwarg_dict.copy() + kwarg_copy[kwarg] = None + with pytest.raises(HTTPException) as e: + await adb_client.approve_url( + approval_info=FinalReviewApprovalInfo( + url_id=url_mapping.url_id, + relevant=True, + **kwarg_copy + ), + user_id=1 + ) + pytest.fail(f"Expected error for kwarg {kwarg}") + + # Test that if all kwargs are set, no error is raised + await adb_client.approve_url( + approval_info=FinalReviewApprovalInfo( + url_id=url_mapping.url_id, + relevant=True, + **kwarg_dict + ), + user_id=1 + ) + +@pytest.mark.asyncio +async def test_get_next_url_for_user_relevance_annotation_pending( + db_data_creator: DBDataCreator +): + """ + Users should receive a valid URL to annotate + All users should receive the same next URL + Once any user annotates that URL, none of the users should receive it again + """ + setup_info = await setup_for_get_next_url_for_annotation( + db_data_creator=db_data_creator, + url_count=2 + ) + + url_1 = setup_info.insert_urls_info.url_mappings[0] + + # Add `Relevancy` attribute with value `True` + await db_data_creator.auto_relevant_suggestions( + url_id=url_1.url_id, + relevant=True + ) + + adb_client = db_data_creator.adb_client + url_1 = await adb_client.get_next_url_for_relevance_annotation( + user_id=1, + batch_id=None + ) + assert url_1 is not None + + url_2 = await adb_client.get_next_url_for_relevance_annotation( + user_id=2, + batch_id=None + ) + assert url_2 is not None + + assert url_1.url_info.url == url_2.url_info.url + + # Annotate this URL, then check that the second URL is returned + await adb_client.add_user_relevant_suggestion( + url_id=url_1.url_info.url_id, + user_id=1, + relevant=True + ) + + url_3 = await adb_client.get_next_url_for_relevance_annotation( + user_id=1, + batch_id=None + ) + assert url_3 is not None + + assert url_1 != url_3 + + # Check that the second URL is also returned for another user + url_4 = await adb_client.get_next_url_for_relevance_annotation( + user_id=2, + batch_id=None + ) + assert url_4 is not None + + + assert url_4 == url_3 + + +@pytest.mark.asyncio +async def test_get_next_url_for_annotation_batch_filtering( + db_data_creator: DBDataCreator +): + """ + Test that for all annotation retrievals, batch filtering works as expected + """ + setup_info_1 = await setup_for_get_next_url_for_annotation( + db_data_creator=db_data_creator, + url_count=1 + ) + setup_info_2 = await setup_for_get_next_url_for_annotation( + db_data_creator=db_data_creator, + url_count=1 + ) + + url_1 = setup_info_1.insert_urls_info.url_mappings[0] + url_2 = setup_info_2.insert_urls_info.url_mappings[0] + + # Test for relevance + # If a batch id is provided, return first valid URL with that batch id + result_with_batch_id = await db_data_creator.adb_client.get_next_url_for_relevance_annotation( + user_id=1, + batch_id=setup_info_2.batch_id + ) + + assert result_with_batch_id.url_info.url == url_2.url + + # If no batch id is provided, return first valid URL + result_no_batch_id = await db_data_creator.adb_client.get_next_url_for_relevance_annotation( + user_id=1, + batch_id=None + ) + + assert result_no_batch_id.url_info.url == url_1.url + + # Test for record type + # If a batch id is provided, return first valid URL with that batch id + result_with_batch_id = await db_data_creator.adb_client.get_next_url_for_record_type_annotation( + user_id=1, + batch_id=setup_info_2.batch_id + ) + + assert result_with_batch_id.url_info.url == url_2.url + + # If no batch id is provided, return first valid URL + result_no_batch_id = await db_data_creator.adb_client.get_next_url_for_record_type_annotation( + user_id=1, + batch_id=None + ) + + assert result_no_batch_id.url_info.url == url_1.url + + # Test for agency + for url in [url_1, url_2]: + await db_data_creator.auto_suggestions( + url_ids=[url.url_id], + num_suggestions=2, + suggestion_type=SuggestionType.AUTO_SUGGESTION + ) + + # If a batch id is provided, return first valid URL with that batch id + result_with_batch_id = await db_data_creator.adb_client.get_next_url_agency_for_annotation( + user_id=1, + batch_id=setup_info_2.batch_id + ) + + assert result_with_batch_id.next_annotation.url == url_2.url + + # If no batch id is provided, return first valid URL + result_no_batch_id = await db_data_creator.adb_client.get_next_url_agency_for_annotation( + user_id=1, + batch_id=None + ) + + assert result_no_batch_id.next_annotation.url == url_1.url + + +@pytest.mark.asyncio +async def test_get_next_url_for_user_relevance_annotation_validated( + db_data_creator: DBDataCreator +): + """ + A validated URL should not turn up in get_next_url_for_user_annotation + """ + + setup_info = await setup_for_get_next_url_for_annotation( + db_data_creator=db_data_creator, + url_count=1, + outcome=URLStatus.VALIDATED + ) + + + url_1 = setup_info.insert_urls_info.url_mappings[0] + + # Add `Relevancy` attribute with value `True` + await db_data_creator.auto_relevant_suggestions( + url_id=url_1.url_id, + relevant=True + ) + + adb_client = db_data_creator.adb_client + url = await adb_client.get_next_url_for_relevance_annotation( + user_id=1, + batch_id=None + ) + assert url is None + +@pytest.mark.asyncio +async def test_annotate_url_marked_not_relevant(db_data_creator: DBDataCreator): + """ + If a URL is marked not relevant by the user, they should not receive that URL + in calls to get an annotation for record type or agency + Other users should still receive the URL + """ + setup_info = await setup_for_get_next_url_for_annotation( + db_data_creator=db_data_creator, + url_count=2 + ) + adb_client = db_data_creator.adb_client + url_to_mark_not_relevant: URLMapping = setup_info.insert_urls_info.url_mappings[0] + url_to_mark_relevant: URLMapping = setup_info.insert_urls_info.url_mappings[1] + for url_mapping in setup_info.insert_urls_info.url_mappings: + await db_data_creator.agency_auto_suggestions( + url_id=url_mapping.url_id, + count=3 + ) + await adb_client.add_user_relevant_suggestion( + user_id=1, + url_id=url_to_mark_not_relevant.url_id, + relevant=False + ) + await adb_client.add_user_relevant_suggestion( + user_id=1, + url_id=url_to_mark_relevant.url_id, + relevant=True + ) + + # User should not receive the URL for record type annotation + record_type_annotation_info = await adb_client.get_next_url_for_record_type_annotation( + user_id=1, + batch_id=None + ) + assert record_type_annotation_info.url_info.url_id != url_to_mark_not_relevant.url_id + + # Other users also should not receive the URL for record type annotation + record_type_annotation_info = await adb_client.get_next_url_for_record_type_annotation( + user_id=2, + batch_id=None + ) + assert record_type_annotation_info.url_info.url_id != \ + url_to_mark_not_relevant.url_id, "Other users should not receive the URL for record type annotation" + + # User should not receive the URL for agency annotation + agency_annotation_info_user_1 = await adb_client.get_next_url_agency_for_annotation( + user_id=1, + batch_id=None + ) + assert agency_annotation_info_user_1.next_annotation.url_id != url_to_mark_not_relevant.url_id + + # Other users also should not receive the URL for agency annotation + agency_annotation_info_user_2 = await adb_client.get_next_url_agency_for_annotation( + user_id=2, + batch_id=None + ) + assert agency_annotation_info_user_1.next_annotation.url_id != url_to_mark_not_relevant.url_id + +@pytest.mark.asyncio +async def test_annotate_url_agency_agency_not_in_db(db_data_creator: DBDataCreator): + setup_info = await setup_for_annotate_agency( + db_data_creator, + url_count=1 + ) + + url_id = setup_info.url_ids[0] + await db_data_creator.adb_client.add_agency_manual_suggestion( + agency_id=1, + url_id=url_id, + user_id=1, + is_new=False + ) + + agencies = await db_data_creator.adb_client.get_all(Agency) + assert len(agencies) + assert agencies[0].name == PLACEHOLDER_AGENCY_NAME + +@pytest.mark.asyncio +async def test_get_next_url_for_user_record_type_annotation(db_data_creator: DBDataCreator): + """ + All users should receive the same next valid URL for record type annotation + Once any user annotates that URL, none of the users should receive it + """ + setup_info = await setup_for_get_next_url_for_annotation( + db_data_creator, + url_count=2 + ) + + # All users should receive the same URL + url_1 = setup_info.insert_urls_info.url_mappings[0] + url_2 = setup_info.insert_urls_info.url_mappings[1] + + adb_client = db_data_creator.adb_client + + url_user_1 = await adb_client.get_next_url_for_record_type_annotation( + user_id=1, + batch_id=None + ) + assert url_user_1 is not None + + url_user_2 = await adb_client.get_next_url_for_record_type_annotation( + user_id=2, + batch_id=None + ) + + assert url_user_2 is not None + + # Check that the URLs are the same + assert url_user_1 == url_user_2 + + # After annotating, both users should receive a different URL + await adb_client.add_user_record_type_suggestion( + user_id=1, + url_id=url_1.url_id, + record_type=RecordType.ARREST_RECORDS + ) + + next_url_user_1 = await adb_client.get_next_url_for_record_type_annotation( + user_id=1, + batch_id=None + ) + + next_url_user_2 = await adb_client.get_next_url_for_record_type_annotation( + user_id=2, + batch_id=None + ) + + assert next_url_user_1 != url_user_1 + assert next_url_user_1 == next_url_user_2 + + + + + +@pytest.mark.asyncio +async def test_get_next_url_for_user_agency_annotation(db_data_creator: DBDataCreator): + """ + All users should receive the same next valid URL for agency annotation + Once any user annotates that URL, none of the users should receive it + """ + setup_info = await setup_for_annotate_agency( + db_data_creator, + url_count=2 + ) + + # All users should receive the same URL + url_1 = setup_info.url_ids[0] + url_2 = setup_info.url_ids[1] + + adb_client = db_data_creator.adb_client + url_user_1 = await adb_client.get_next_url_agency_for_annotation( + user_id=1, + batch_id=None + ) + assert url_user_1 is not None + + url_user_2 = await adb_client.get_next_url_agency_for_annotation( + user_id=2, + batch_id=None + ) + + assert url_user_2 is not None + + # Check that the URLs are the same + assert url_user_1 == url_user_2 + + # Annotate the URL + await adb_client.add_agency_manual_suggestion( + url_id=url_1, + user_id=1, + is_new=True, + agency_id=None + ) + + # Both users should receive the next URL + next_url_user_1 = await adb_client.get_next_url_agency_for_annotation( + user_id=1, + batch_id=None + ) + assert next_url_user_1 is not None + + next_url_user_2 = await adb_client.get_next_url_agency_for_annotation( + user_id=2, + batch_id=None ) - assert len(results) == 1 + assert next_url_user_2 is not None + assert url_user_1 != next_url_user_1 + assert next_url_user_1 == next_url_user_2 diff --git a/tests/test_automated/integration/conftest.py b/tests/test_automated/integration/conftest.py index 89e6b753..70c79c22 100644 --- a/tests/test_automated/integration/conftest.py +++ b/tests/test_automated/integration/conftest.py @@ -1,20 +1,37 @@ +from unittest.mock import MagicMock import pytest -from core.CoreLogger import CoreLogger +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_manager.AsyncCollectorManager import AsyncCollectorManager +from core.AsyncCore import AsyncCore +from core.AsyncCoreLogger import AsyncCoreLogger from core.SourceCollectorCore import SourceCollectorCore @pytest.fixture def test_core(db_client_test): - with CoreLogger( - db_client=db_client_test - ) as logger: - core = SourceCollectorCore( - db_client=db_client_test, - core_logger=logger, - dev_mode=True - ) - yield core - core.shutdown() + core = SourceCollectorCore( + db_client=db_client_test, + ) + yield core + +@pytest.fixture +def test_async_core(adb_client_test): + logger = AsyncCoreLogger( + adb_client=adb_client_test + ) + adb_client = AsyncDatabaseClient() + core = AsyncCore( + adb_client=adb_client, + task_manager=MagicMock(), + collector_manager=AsyncCollectorManager( + adb_client=adb_client, + logger=logger, + dev_mode=True + ), + ) + yield core + core.shutdown() + logger.shutdown() \ No newline at end of file diff --git a/tests/test_automated/integration/core/helpers/common_test_procedures.py b/tests/test_automated/integration/core/helpers/common_test_procedures.py deleted file mode 100644 index d60c59d2..00000000 --- a/tests/test_automated/integration/core/helpers/common_test_procedures.py +++ /dev/null @@ -1,27 +0,0 @@ -import time - -from pydantic import BaseModel - -from collector_manager.enums import CollectorType -from core.SourceCollectorCore import SourceCollectorCore - - -def run_collector_and_wait_for_completion( - collector_type: CollectorType, - core: SourceCollectorCore, - dto: BaseModel -): - collector_name = collector_type.value - response = core.initiate_collector( - collector_type=collector_type, - dto=dto - ) - assert response == f"Started {collector_name} collector with CID: 1" - response = core.get_status(1) - while response == f"1 ({collector_name}) - RUNNING": - time.sleep(1) - response = core.get_status(1) - assert response == f"1 ({collector_name}) - COMPLETED", response - # TODO: Change this logic, since collectors close automatically - response = core.close_collector(1) - assert response.message == "Collector closed and data harvested successfully." diff --git a/tests/test_automated/integration/core/test_async_core.py b/tests/test_automated/integration/core/test_async_core.py new file mode 100644 index 00000000..f2125865 --- /dev/null +++ b/tests/test_automated/integration/core/test_async_core.py @@ -0,0 +1,175 @@ +import types +from unittest.mock import MagicMock, AsyncMock, call + +import pytest + +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_db.enums import TaskType +from collector_db.models import Task +from core.AsyncCore import AsyncCore +from core.DTOs.TaskOperatorRunInfo import TaskOperatorRunInfo, TaskOperatorOutcome +from core.TaskManager import TaskManager +from core.enums import BatchStatus +from tests.helpers.DBDataCreator import DBDataCreator + +def setup_async_core(adb_client: AsyncDatabaseClient): + return AsyncCore( + adb_client=adb_client, + task_manager=TaskManager( + adb_client=adb_client, + huggingface_interface=AsyncMock(), + url_request_interface=AsyncMock(), + html_parser=AsyncMock(), + discord_poster=AsyncMock(), + pdap_client=AsyncMock() + ), + collector_manager=AsyncMock() + ) + +@pytest.mark.asyncio +async def test_conclude_task_success(db_data_creator: DBDataCreator): + ddc = db_data_creator + + batch_id = ddc.batch() + url_ids = ddc.urls(batch_id=batch_id, url_count=3).url_ids + task_id = await ddc.task() + run_info = TaskOperatorRunInfo( + task_id=task_id, + linked_url_ids=url_ids, + outcome=TaskOperatorOutcome.SUCCESS, + ) + + core = setup_async_core(db_data_creator.adb_client) + await core.conclude_task(run_info=run_info) + + task_info = await ddc.adb_client.get_task_info(task_id=task_id) + + assert task_info.task_status == BatchStatus.READY_TO_LABEL + assert len(task_info.urls) == 3 + +@pytest.mark.asyncio +async def test_conclude_task_success(db_data_creator: DBDataCreator): + ddc = db_data_creator + + batch_id = ddc.batch() + url_ids = ddc.urls(batch_id=batch_id, url_count=3).url_ids + task_id = await ddc.task() + run_info = TaskOperatorRunInfo( + task_id=task_id, + linked_url_ids=url_ids, + outcome=TaskOperatorOutcome.SUCCESS, + ) + + core = setup_async_core(db_data_creator.adb_client) + await core.task_manager.conclude_task(run_info=run_info) + + task_info = await ddc.adb_client.get_task_info(task_id=task_id) + + assert task_info.task_status == BatchStatus.READY_TO_LABEL + assert len(task_info.urls) == 3 + +@pytest.mark.asyncio +async def test_conclude_task_error(db_data_creator: DBDataCreator): + ddc = db_data_creator + + batch_id = ddc.batch() + url_ids = ddc.urls(batch_id=batch_id, url_count=3).url_ids + task_id = await ddc.task() + run_info = TaskOperatorRunInfo( + task_id=task_id, + linked_url_ids=url_ids, + outcome=TaskOperatorOutcome.ERROR, + message="test error", + ) + + core = setup_async_core(db_data_creator.adb_client) + await core.task_manager.conclude_task(run_info=run_info) + + task_info = await ddc.adb_client.get_task_info(task_id=task_id) + + assert task_info.task_status == BatchStatus.ERROR + assert task_info.error_info == "test error" + assert len(task_info.urls) == 3 + +@pytest.mark.asyncio +async def test_run_task_prereq_not_met(): + """ + When a task pre-requisite is not met, the task should not be run + """ + core = setup_async_core(AsyncMock()) + + mock_operator = AsyncMock() + mock_operator.meets_task_prerequisites = AsyncMock(return_value=False) + core.task_manager.get_task_operators = AsyncMock(return_value=[mock_operator]) + await core.run_tasks() + + mock_operator.meets_task_prerequisites.assert_called_once() + mock_operator.run_task.assert_not_called() + +@pytest.mark.asyncio +async def test_run_task_prereq_met(db_data_creator: DBDataCreator): + """ + When a task pre-requisite is met, the task should be run + And a task entry should be created in the database + """ + + async def run_task(self, task_id: int) -> TaskOperatorRunInfo: + return TaskOperatorRunInfo( + task_id=task_id, + outcome=TaskOperatorOutcome.SUCCESS, + linked_url_ids=[1, 2, 3] + ) + + core = setup_async_core(db_data_creator.adb_client) + core.task_manager.conclude_task = AsyncMock() + + mock_operator = AsyncMock() + mock_operator.meets_task_prerequisites = AsyncMock( + side_effect=[True, False] + ) + mock_operator.task_type = TaskType.HTML + mock_operator.run_task = types.MethodType(run_task, mock_operator) + + core.task_manager.get_task_operators = AsyncMock(return_value=[mock_operator]) + await core.run_tasks() + + # There should be two calls to meets_task_prerequisites + mock_operator.meets_task_prerequisites.assert_has_calls([call(), call()]) + + results = await db_data_creator.adb_client.get_all(Task) + + assert len(results) == 1 + assert results[0].task_status == BatchStatus.IN_PROCESS.value + + core.task_manager.conclude_task.assert_called_once() + +@pytest.mark.asyncio +async def test_run_task_break_loop(db_data_creator: DBDataCreator): + """ + If the task loop for a single task runs more than 20 times in a row, + this is considered suspicious and possibly indicative of a bug. + In this case, the task loop should be terminated + and an alert should be sent to discord + """ + + async def run_task(self, task_id: int) -> TaskOperatorRunInfo: + return TaskOperatorRunInfo( + task_id=task_id, + outcome=TaskOperatorOutcome.SUCCESS, + linked_url_ids=[1, 2, 3] + ) + + core = setup_async_core(db_data_creator.adb_client) + core.task_manager.conclude_task = AsyncMock() + + mock_operator = AsyncMock() + mock_operator.meets_task_prerequisites = AsyncMock(return_value=True) + mock_operator.task_type = TaskType.HTML + mock_operator.run_task = types.MethodType(run_task, mock_operator) + + core.task_manager.get_task_operators = AsyncMock(return_value=[mock_operator]) + await core.task_manager.trigger_task_run() + + core.task_manager.discord_poster.post_to_discord.assert_called_once_with( + message="Task HTML has been run more than 20 times in a row. Task loop terminated." + ) diff --git a/tests/test_automated/integration/core/test_core_logger.py b/tests/test_automated/integration/core/test_core_logger.py deleted file mode 100644 index 07a98000..00000000 --- a/tests/test_automated/integration/core/test_core_logger.py +++ /dev/null @@ -1,66 +0,0 @@ -import threading -import time - -from collector_db.DTOs.LogInfo import LogInfo -from core.CoreLogger import CoreLogger -from tests.helpers.DBDataCreator import DBDataCreator - - -def test_logger_integration(db_data_creator: DBDataCreator): - batch_id = db_data_creator.batch() - db_client = db_data_creator.db_client - with CoreLogger(flush_interval=1, db_client=db_client) as logger: - - # Simulate logging - logger.log(LogInfo(log="Integration Log 1", batch_id=batch_id)) - logger.log(LogInfo(log="Integration Log 2", batch_id=batch_id)) - - # Wait for the flush interval - time.sleep(1.5) - - # Verify logs in the database - logs = db_client.get_logs_by_batch_id(batch_id) - assert len(logs) == 2 - assert logs[0].log == "Integration Log 1" - - -def test_multithreaded_integration_with_live_db(db_data_creator: DBDataCreator): - # Ensure the database is empty - db_client = db_data_creator.db_client - db_client.delete_all_logs() - - batch_ids = [db_data_creator.batch() for _ in range(5)] - db_client = db_data_creator.db_client - logger = CoreLogger(flush_interval=1, db_client=db_client, batch_size=10) - - # Simulate multiple threads logging - def worker(thread_id): - batch_id = batch_ids[thread_id-1] - for i in range(10): # Each thread logs 10 messages - logger.log(LogInfo(log=f"Thread-{thread_id} Log-{i}", batch_id=batch_id)) - - # Start multiple threads - threads = [threading.Thread(target=worker, args=(i+1,)) for i in range(5)] # 5 threads - for t in threads: - t.start() - for t in threads: - t.join() - - # Allow the logger to flush - logger.shutdown() - time.sleep(10) - - # Verify logs in the database - logs = db_client.get_all_logs() - - # Optional: Print logs for manual inspection - for log in logs: - print(log.log) - - # Assertions - assert len(logs) == 50 # 5 threads * 10 messages each - for i in range(1,6): - for j in range(10): - assert any(log.log == f"Thread-{i} Log-{j}" for log in logs) - - diff --git a/tests/test_automated/integration/core/test_example_collector_lifecycle.py b/tests/test_automated/integration/core/test_example_collector_lifecycle.py index 65b9cd6c..65ffc001 100644 --- a/tests/test_automated/integration/core/test_example_collector_lifecycle.py +++ b/tests/test_automated/integration/core/test_example_collector_lifecycle.py @@ -1,25 +1,38 @@ -import time +import asyncio + +import pytest from collector_db.DTOs.BatchInfo import BatchInfo from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO from collector_manager.enums import CollectorType, URLStatus +from core.AsyncCore import AsyncCore from core.DTOs.CollectorStartInfo import CollectorStartInfo from core.SourceCollectorCore import SourceCollectorCore from core.enums import BatchStatus +from tests.helpers.patch_functions import block_sleep -def test_example_collector_lifecycle(test_core: SourceCollectorCore): +@pytest.mark.asyncio +async def test_example_collector_lifecycle( + test_core: SourceCollectorCore, + test_async_core: AsyncCore, + monkeypatch +): """ Test the flow of an example collector, which generates fake urls and saves them to the database """ + acore = test_async_core core = test_core db_client = core.db_client + + barrier = await block_sleep(monkeypatch) + dto = ExampleInputDTO( example_field="example_value", sleep_time=1 ) - csi: CollectorStartInfo = core.initiate_collector( + csi: CollectorStartInfo = await acore.initiate_collector( collector_type=CollectorType.EXAMPLE, dto=dto, user_id=1 @@ -29,18 +42,21 @@ def test_example_collector_lifecycle(test_core: SourceCollectorCore): batch_id = csi.batch_id + # Yield control so coroutine runs up to the barrier + await asyncio.sleep(0) + assert core.get_status(batch_id) == BatchStatus.IN_PROCESS - print("Sleeping for 1.5 seconds...") - time.sleep(1.5) - print("Done sleeping...") - assert core.get_status(batch_id) == BatchStatus.COMPLETE + # Release the barrier to resume execution + barrier.release() + await acore.collector_manager.logger.flush_all() + assert core.get_status(batch_id) == BatchStatus.READY_TO_LABEL batch_info: BatchInfo = db_client.get_batch_by_id(batch_id) assert batch_info.strategy == "example" - assert batch_info.status == BatchStatus.COMPLETE + assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.total_url_count == 2 assert batch_info.parameters == dto.model_dump() - assert batch_info.compute_time > 1 + assert batch_info.compute_time > 0 url_infos = db_client.get_urls_by_batch(batch_id) assert len(url_infos) == 2 @@ -50,31 +66,44 @@ def test_example_collector_lifecycle(test_core: SourceCollectorCore): assert url_infos[0].url == "https://example.com" assert url_infos[1].url == "https://example.com/2" -def test_example_collector_lifecycle_multiple_batches(test_core: SourceCollectorCore): +@pytest.mark.asyncio +async def test_example_collector_lifecycle_multiple_batches( + test_core: SourceCollectorCore, + test_async_core: AsyncCore, + monkeypatch +): """ Test the flow of an example collector, which generates fake urls and saves them to the database """ + barrier = await block_sleep(monkeypatch) + acore = test_async_core core = test_core csis: list[CollectorStartInfo] = [] + + for i in range(3): dto = ExampleInputDTO( example_field="example_value", sleep_time=1 ) - csi: CollectorStartInfo = core.initiate_collector( + csi: CollectorStartInfo = await acore.initiate_collector( collector_type=CollectorType.EXAMPLE, dto=dto, user_id=1 ) csis.append(csi) + await asyncio.sleep(0) for csi in csis: print("Batch ID:", csi.batch_id) assert core.get_status(csi.batch_id) == BatchStatus.IN_PROCESS - time.sleep(6) + barrier.release() + + await asyncio.sleep(0.15) for csi in csis: - assert core.get_status(csi.batch_id) == BatchStatus.COMPLETE + assert core.get_status(csi.batch_id) == BatchStatus.READY_TO_LABEL + diff --git a/tests/test_automated/integration/security_manager/test_security_manager.py b/tests/test_automated/integration/security_manager/test_security_manager.py index 010c3bf2..eb7e8506 100644 --- a/tests/test_automated/integration/security_manager/test_security_manager.py +++ b/tests/test_automated/integration/security_manager/test_security_manager.py @@ -17,10 +17,17 @@ def mock_get_secret_key(mocker): SECRET_KEY = "test_secret_key" VALID_TOKEN = "valid_token" INVALID_TOKEN = "invalid_token" -FAKE_PAYLOAD = {"sub": 1, "permissions": [Permissions.SOURCE_COLLECTOR.value]} +FAKE_PAYLOAD = { + "sub": "1", + "permissions": [Permissions.SOURCE_COLLECTOR.value] +} -def test_api_with_valid_token(mock_get_secret_key): +def test_api_with_valid_token( + mock_get_secret_key, + monkeypatch +): + monkeypatch.setenv("DISCORD_WEBHOOK_URL", "https://discord.com") token = jwt.encode(FAKE_PAYLOAD, SECRET_KEY, algorithm=ALGORITHM) # Create Test Client diff --git a/tests/test_automated/integration/source_collectors/test_example_collector.py b/tests/test_automated/integration/source_collectors/test_example_collector.py deleted file mode 100644 index 0a6f9491..00000000 --- a/tests/test_automated/integration/source_collectors/test_example_collector.py +++ /dev/null @@ -1,45 +0,0 @@ -import threading -import time - -from collector_db.DTOs.BatchInfo import BatchInfo -from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO -from collector_manager.ExampleCollector import ExampleCollector -from core.SourceCollectorCore import SourceCollectorCore -from core.enums import BatchStatus - - -def test_live_example_collector_abort(test_core: SourceCollectorCore): - core = test_core - db_client = core.db_client - - batch_id = db_client.insert_batch( - BatchInfo( - strategy="example", - status=BatchStatus.IN_PROCESS, - parameters={}, - user_id=1 - ) - ) - - - dto = ExampleInputDTO( - sleep_time=3 - ) - - collector = ExampleCollector( - batch_id=batch_id, - dto=dto, - logger=core.core_logger, - db_client=db_client, - raise_error=True - ) - # Run collector in separate thread - thread = threading.Thread(target=collector.run) - thread.start() - collector.abort() - time.sleep(2) - thread.join() - - - assert db_client.get_batch_status(batch_id) == BatchStatus.ABORTED - diff --git a/tests/test_automated/integration/tasks/__init__.py b/tests/test_automated/integration/tasks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_automated/integration/tasks/conftest.py b/tests/test_automated/integration/tasks/conftest.py new file mode 100644 index 00000000..a4136b20 --- /dev/null +++ b/tests/test_automated/integration/tasks/conftest.py @@ -0,0 +1,23 @@ +from unittest.mock import MagicMock, AsyncMock + +import pytest + +from pdap_access_manager import AccessManager +from pdap_api_client.PDAPClient import PDAPClient + + +@pytest.fixture +def mock_pdap_client() -> PDAPClient: + mock_access_manager = MagicMock( + spec=AccessManager + ) + mock_access_manager.build_url = MagicMock( + return_value="http://example.com" + ) + mock_access_manager.jwt_header = AsyncMock( + return_value={"Authorization": "Bearer token"} + ) + pdap_client = PDAPClient( + access_manager=mock_access_manager + ) + return pdap_client \ No newline at end of file diff --git a/tests/test_automated/integration/tasks/test_agency_preannotation_task.py b/tests/test_automated/integration/tasks/test_agency_preannotation_task.py new file mode 100644 index 00000000..e6278292 --- /dev/null +++ b/tests/test_automated/integration/tasks/test_agency_preannotation_task.py @@ -0,0 +1,321 @@ +from copy import deepcopy +from typing import Optional +from unittest.mock import MagicMock, AsyncMock, patch + +import pytest +from aiohttp import ClientSession + +from tests.helpers.test_batch_creation_parameters import TestBatchCreationParameters, TestURLCreationParameters +from source_collectors.muckrock.MuckrockAPIInterface import MuckrockAPIInterface, AgencyLookupResponseType, AgencyLookupResponse +from collector_db.models import Agency, AutomatedUrlAgencySuggestion +from collector_manager.enums import CollectorType, URLStatus +from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo +from core.classes.task_operators.AgencyIdentificationTaskOperator import AgencyIdentificationTaskOperator +from core.classes.subtasks.AutoGooglerAgencyIdentificationSubtask import AutoGooglerAgencyIdentificationSubtask +from core.classes.subtasks.CKANAgencyIdentificationSubtask import CKANAgencyIdentificationSubtask +from core.classes.subtasks.CommonCrawlerAgencyIdentificationSubtask import CommonCrawlerAgencyIdentificationSubtask +from core.classes.subtasks.MuckrockAgencyIdentificationSubtask import MuckrockAgencyIdentificationSubtask +from core.enums import SuggestionType +from pdap_access_manager import AccessManager +from pdap_api_client.DTOs import MatchAgencyResponse, MatchAgencyInfo +from pdap_api_client.PDAPClient import PDAPClient +from pdap_api_client.enums import MatchAgencyResponseStatus +from tests.helpers.DBDataCreator import DBDataCreator, BatchURLCreationInfo, BatchURLCreationInfoV2 + +sample_agency_suggestions = [ + URLAgencySuggestionInfo( + url_id=-1, # This will be overwritten + suggestion_type=SuggestionType.UNKNOWN, + pdap_agency_id=None, + agency_name=None, + state=None, + county=None, + locality=None + ), + URLAgencySuggestionInfo( + url_id=-1, # This will be overwritten + suggestion_type=SuggestionType.CONFIRMED, + pdap_agency_id=-1, + agency_name="Test Agency", + state="Test State", + county="Test County", + locality="Test Locality" + ), + URLAgencySuggestionInfo( + url_id=-1, # This will be overwritten + suggestion_type=SuggestionType.AUTO_SUGGESTION, + pdap_agency_id=-1, + agency_name="Test Agency 2", + state="Test State 2", + county="Test County 2", + locality="Test Locality 2" + ) +] + +@pytest.mark.asyncio +async def test_agency_preannotation_task(db_data_creator: DBDataCreator): + async def mock_run_subtask( + subtask, + url_id: int, + collector_metadata: Optional[dict] + ): + # Deepcopy to prevent using the same instance in memory + suggestion = deepcopy(sample_agency_suggestions[url_id % 3]) + suggestion.url_id = url_id + suggestion.pdap_agency_id = (url_id % 3) if suggestion.suggestion_type != SuggestionType.UNKNOWN else None + return [suggestion] + + async with ClientSession() as session: + mock = MagicMock() + access_manager = AccessManager( + email=mock.email, + password=mock.password, + api_key=mock.api_key, + session=session + ) + pdap_client = PDAPClient( + access_manager=access_manager + ) + muckrock_api_interface = MuckrockAPIInterface(session=session) + with patch.object( + AgencyIdentificationTaskOperator, + "run_subtask", + side_effect=mock_run_subtask, + ) as mock: + operator = AgencyIdentificationTaskOperator( + adb_client=db_data_creator.adb_client, + pdap_client=pdap_client, + muckrock_api_interface=muckrock_api_interface + ) + + # Confirm does not yet meet prerequisites + assert not await operator.meets_task_prerequisites() + + + d = {} + + # Create six urls, one from each strategy + for strategy in [ + CollectorType.COMMON_CRAWLER, + CollectorType.AUTO_GOOGLER, + CollectorType.MUCKROCK_COUNTY_SEARCH, + CollectorType.MUCKROCK_SIMPLE_SEARCH, + CollectorType.MUCKROCK_ALL_SEARCH, + CollectorType.CKAN + ]: + # Create two URLs for each, one pending and one errored + creation_info: BatchURLCreationInfoV2 = await db_data_creator.batch_v2( + parameters=TestBatchCreationParameters( + strategy=strategy, + urls=[ + TestURLCreationParameters( + count=1, + status=URLStatus.PENDING, + with_html_content=True + ), + TestURLCreationParameters( + count=1, + status=URLStatus.ERROR, + with_html_content=True + ) + ] + ) + ) + d[strategy] = creation_info.url_creation_infos[URLStatus.PENDING].url_mappings[0].url_id + + + # Confirm meets prerequisites + assert await operator.meets_task_prerequisites() + # Run task + run_info = await operator.run_task(1) + assert run_info.outcome == TaskOperatorOutcome.SUCCESS, run_info.message + + # Confirm tasks are piped into the correct subtasks + # * common_crawler into common_crawler_subtask + # * auto_googler into auto_googler_subtask + # * muckrock_county_search into muckrock_subtask + # * muckrock_simple_search into muckrock_subtask + # * muckrock_all_search into muckrock_subtask + # * ckan into ckan_subtask + + assert mock.call_count == 6 + + + # Confirm subtask classes are correct for the given urls + d2 = {} + for call_arg in mock.call_args_list: + subtask_class = call_arg[0][0].__class__ + url_id = call_arg[0][1] + d2[url_id] = subtask_class + + + subtask_class_collector_type = [ + (MuckrockAgencyIdentificationSubtask, CollectorType.MUCKROCK_ALL_SEARCH), + (MuckrockAgencyIdentificationSubtask, CollectorType.MUCKROCK_COUNTY_SEARCH), + (MuckrockAgencyIdentificationSubtask, CollectorType.MUCKROCK_SIMPLE_SEARCH), + (CKANAgencyIdentificationSubtask, CollectorType.CKAN), + (CommonCrawlerAgencyIdentificationSubtask, CollectorType.COMMON_CRAWLER), + (AutoGooglerAgencyIdentificationSubtask, CollectorType.AUTO_GOOGLER) + ] + + for subtask_class, collector_type in subtask_class_collector_type: + url_id = d[collector_type] + assert d2[url_id] == subtask_class + + + # Confirm task again does not meet prerequisites + assert not await operator.meets_task_prerequisites() + + + + + # Check confirmed and auto suggestions + adb_client = db_data_creator.adb_client + confirmed_suggestions = await adb_client.get_urls_with_confirmed_agencies() + assert len(confirmed_suggestions) == 2 + + agencies = await adb_client.get_all(Agency) + assert len(agencies) == 2 + + auto_suggestions = await adb_client.get_all(AutomatedUrlAgencySuggestion) + assert len(auto_suggestions) == 4 + + # Of the auto suggestions, 2 should be unknown + assert len([s for s in auto_suggestions if s.is_unknown]) == 2 + + # Of the auto suggestions, 2 should not be unknown + assert len([s for s in auto_suggestions if not s.is_unknown]) == 2 + +@pytest.mark.asyncio +async def test_common_crawler_subtask(db_data_creator: DBDataCreator): + # Test that common_crawler subtask correctly adds URL to + # url_agency_suggestions with label 'Unknown' + subtask = CommonCrawlerAgencyIdentificationSubtask() + results: list[URLAgencySuggestionInfo] = await subtask.run(url_id=1, collector_metadata={}) + assert len(results) == 1 + assert results[0].url_id == 1 + assert results[0].suggestion_type == SuggestionType.UNKNOWN + + +@pytest.mark.asyncio +async def test_auto_googler_subtask(db_data_creator: DBDataCreator): + # Test that auto_googler subtask correctly adds URL to + # url_agency_suggestions with label 'Unknown' + subtask = AutoGooglerAgencyIdentificationSubtask() + results: list[URLAgencySuggestionInfo] = await subtask.run(url_id=1, collector_metadata={}) + assert len(results) == 1 + assert results[0].url_id == 1 + assert results[0].suggestion_type == SuggestionType.UNKNOWN + +@pytest.mark.asyncio +async def test_muckrock_subtask(db_data_creator: DBDataCreator): + # Test that muckrock subtask correctly sends agency name to + # MatchAgenciesInterface and adds received suggestions to + # url_agency_suggestions + + # Create mock instances for dependency injections + muckrock_api_interface_mock = MagicMock(spec=MuckrockAPIInterface) + pdap_client_mock = MagicMock(spec=PDAPClient) + + # Set up mock return values for method calls + muckrock_api_interface_mock.lookup_agency.return_value = AgencyLookupResponse( + type=AgencyLookupResponseType.FOUND, + name="Mock Agency Name", + error=None + ) + + pdap_client_mock.match_agency.return_value = MatchAgencyResponse( + status=MatchAgencyResponseStatus.PARTIAL_MATCH, + matches=[ + MatchAgencyInfo( + id=1, + submitted_name="Mock Agency Name", + ), + MatchAgencyInfo( + id=2, + submitted_name="Another Mock Agency Name", + ) + ] + ) + + # Create an instance of MuckrockAgencyIdentificationSubtask with mock dependencies + muckrock_agency_identification_subtask = MuckrockAgencyIdentificationSubtask( + muckrock_api_interface=muckrock_api_interface_mock, + pdap_client=pdap_client_mock + ) + + # Run the subtask + results: list[URLAgencySuggestionInfo] = await muckrock_agency_identification_subtask.run( + url_id=1, + collector_metadata={ + "agency": 123 + } + ) + + # Verify the results + assert len(results) == 2 + assert results[0].url_id == 1 + assert results[0].suggestion_type == SuggestionType.AUTO_SUGGESTION + assert results[0].pdap_agency_id == 1 + assert results[0].agency_name == "Mock Agency Name" + assert results[1].url_id == 1 + assert results[1].suggestion_type == SuggestionType.AUTO_SUGGESTION + assert results[1].pdap_agency_id == 2 + assert results[1].agency_name == "Another Mock Agency Name" + + # Assert methods called as expected + muckrock_api_interface_mock.lookup_agency.assert_called_once_with( + muckrock_agency_id=123 + ) + pdap_client_mock.match_agency.assert_called_once_with( + name="Mock Agency Name" + ) + + +@pytest.mark.asyncio +async def test_ckan_subtask(db_data_creator: DBDataCreator): + # Test that ckan subtask correctly sends agency id to + # CKANAPIInterface, sends resultant agency name to + # PDAPClient and adds received suggestions to + # url_agency_suggestions + + pdap_client = AsyncMock() + pdap_client.match_agency.return_value = MatchAgencyResponse( + status=MatchAgencyResponseStatus.PARTIAL_MATCH, + matches=[ + MatchAgencyInfo( + id=1, + submitted_name="Mock Agency Name", + ), + MatchAgencyInfo( + id=2, + submitted_name="Another Mock Agency Name", + ) + ] + ) # Assuming MatchAgencyResponse is a class + + # Create an instance of CKANAgencyIdentificationSubtask + task = CKANAgencyIdentificationSubtask(pdap_client) + + # Call the run method with static values + collector_metadata = {"agency_name": "Test Agency"} + url_id = 1 + + # Call the run method + result = await task.run(url_id, collector_metadata) + + # Check the result + assert len(result) == 2 + assert result[0].url_id == 1 + assert result[0].suggestion_type == SuggestionType.AUTO_SUGGESTION + assert result[0].pdap_agency_id == 1 + assert result[0].agency_name == "Mock Agency Name" + assert result[1].url_id == 1 + assert result[1].suggestion_type == SuggestionType.AUTO_SUGGESTION + assert result[1].pdap_agency_id == 2 + assert result[1].agency_name == "Another Mock Agency Name" + + # Assert methods called as expected + pdap_client.match_agency.assert_called_once_with(name="Test Agency") + diff --git a/tests/test_automated/integration/tasks/test_example_task.py b/tests/test_automated/integration/tasks/test_example_task.py new file mode 100644 index 00000000..2211458c --- /dev/null +++ b/tests/test_automated/integration/tasks/test_example_task.py @@ -0,0 +1,56 @@ +import types + +import pytest + +from collector_db.enums import TaskType +from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from tests.helpers.DBDataCreator import DBDataCreator + +class ExampleTaskOperator(TaskOperatorBase): + + @property + def task_type(self) -> TaskType: + # Use TaskType.HTML so we don't have to add a test enum value to the db + return TaskType.HTML + + def inner_task_logic(self): + raise NotImplementedError + + async def meets_task_prerequisites(self): + return True + +@pytest.mark.asyncio +async def test_example_task_success(db_data_creator: DBDataCreator): + batch_id = db_data_creator.batch() + url_mappings = db_data_creator.urls( + batch_id=batch_id, + url_count=3 + ).url_mappings + url_ids = [url_info.url_id for url_info in url_mappings] + + async def mock_inner_task_logic(self): + # Add link to 3 urls + self.linked_url_ids = url_ids + + operator = ExampleTaskOperator(adb_client=db_data_creator.adb_client) + operator.inner_task_logic = types.MethodType(mock_inner_task_logic, operator) + + run_info = await operator.run_task(1) + assert run_info.outcome == TaskOperatorOutcome.SUCCESS + assert run_info.linked_url_ids == url_ids + + +@pytest.mark.asyncio +async def test_example_task_failure(db_data_creator: DBDataCreator): + operator = ExampleTaskOperator(adb_client=db_data_creator.adb_client) + + def mock_inner_task_logic(self): + raise ValueError("test error") + + operator.inner_task_logic = types.MethodType(mock_inner_task_logic, operator) + run_info = await operator.run_task(1) + assert run_info.outcome == TaskOperatorOutcome.ERROR + + + diff --git a/tests/test_automated/integration/tasks/test_submit_approved_url_task.py b/tests/test_automated/integration/tasks/test_submit_approved_url_task.py new file mode 100644 index 00000000..1477915f --- /dev/null +++ b/tests/test_automated/integration/tasks/test_submit_approved_url_task.py @@ -0,0 +1,218 @@ +from http import HTTPStatus +from unittest.mock import MagicMock, AsyncMock + +import pytest +from deepdiff import DeepDiff + +from collector_db.enums import TaskType +from collector_db.models import URL, URLErrorInfo, URLDataSource +from collector_manager.enums import URLStatus +from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo +from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from core.classes.task_operators.SubmitApprovedURLTaskOperator import SubmitApprovedURLTaskOperator +from core.enums import RecordType, SubmitResponseStatus +from tests.helpers.DBDataCreator import BatchURLCreationInfo, DBDataCreator +from pdap_access_manager import RequestInfo, RequestType, ResponseInfo, DataSourcesNamespaces +from pdap_api_client.PDAPClient import PDAPClient + + +def mock_make_request(pdap_client: PDAPClient, urls: list[str]): + assert len(urls) == 3, "Expected 3 urls" + pdap_client.access_manager.make_request = AsyncMock( + return_value=ResponseInfo( + status_code=HTTPStatus.OK, + data={ + "data_sources": [ + { + "url": urls[0], + "status": SubmitResponseStatus.SUCCESS, + "error": None, + "data_source_id": 21, + }, + { + "url": urls[1], + "status": SubmitResponseStatus.SUCCESS, + "error": None, + "data_source_id": 34, + }, + { + "url": urls[2], + "status": SubmitResponseStatus.FAILURE, + "error": "Test Error", + "data_source_id": None + } + ] + } + ) + ) + + + +async def setup_validated_urls(db_data_creator: DBDataCreator) -> list[str]: + creation_info: BatchURLCreationInfo = await db_data_creator.batch_and_urls( + url_count=3, + with_html_content=True + ) + + url_1 = creation_info.url_ids[0] + url_2 = creation_info.url_ids[1] + url_3 = creation_info.url_ids[2] + await db_data_creator.adb_client.approve_url( + approval_info=FinalReviewApprovalInfo( + url_id=url_1, + record_type=RecordType.ACCIDENT_REPORTS, + agency_ids=[1, 2], + name="URL 1 Name", + description="URL 1 Description", + record_formats=["Record Format 1", "Record Format 2"], + data_portal_type="Data Portal Type 1", + supplying_entity="Supplying Entity 1" + ), + user_id=1 + ) + await db_data_creator.adb_client.approve_url( + approval_info=FinalReviewApprovalInfo( + url_id=url_2, + record_type=RecordType.INCARCERATION_RECORDS, + agency_ids=[3, 4], + name="URL 2 Name", + description="URL 2 Description", + ), + user_id=2 + ) + await db_data_creator.adb_client.approve_url( + approval_info=FinalReviewApprovalInfo( + url_id=url_3, + record_type=RecordType.ACCIDENT_REPORTS, + agency_ids=[5, 6], + name="URL 3 Name", + description="URL 3 Description", + ), + user_id=3 + ) + return creation_info.urls + +@pytest.mark.asyncio +async def test_submit_approved_url_task( + db_data_creator, + mock_pdap_client: PDAPClient, + monkeypatch +): + """ + The submit_approved_url_task should submit + all validated URLs to the PDAP Data Sources App + """ + + + # Get Task Operator + operator = SubmitApprovedURLTaskOperator( + adb_client=db_data_creator.adb_client, + pdap_client=mock_pdap_client + ) + + # Check Task Operator does not yet meet pre-requisites + assert not await operator.meets_task_prerequisites() + + # Create URLs with status 'validated' in database and all requisite URL values + # Ensure they have optional metadata as well + urls = await setup_validated_urls(db_data_creator) + mock_make_request(mock_pdap_client, urls) + + # Check Task Operator does meet pre-requisites + assert await operator.meets_task_prerequisites() + + # Run Task + task_id = await db_data_creator.adb_client.initiate_task( + task_type=TaskType.SUBMIT_APPROVED + ) + run_info = await operator.run_task(task_id=task_id) + + # Check Task has been marked as completed + assert run_info.outcome == TaskOperatorOutcome.SUCCESS, run_info.message + + # Get URLs + urls = await db_data_creator.adb_client.get_all(URL, order_by_attribute="id") + url_1 = urls[0] + url_2 = urls[1] + url_3 = urls[2] + + # Check URLs have been marked as 'submitted' + assert url_1.outcome == URLStatus.SUBMITTED.value + assert url_2.outcome == URLStatus.SUBMITTED.value + assert url_3.outcome == URLStatus.ERROR.value + + # Get URL Data Source Links + url_data_sources = await db_data_creator.adb_client.get_all(URLDataSource) + assert len(url_data_sources) == 2 + + url_data_source_1 = url_data_sources[0] + url_data_source_2 = url_data_sources[1] + + assert url_data_source_1.url_id == url_1.id + assert url_data_source_1.data_source_id == 21 + + assert url_data_source_2.url_id == url_2.id + assert url_data_source_2.data_source_id == 34 + + # Check that errored URL has entry in url_error_info + url_errors = await db_data_creator.adb_client.get_all(URLErrorInfo) + assert len(url_errors) == 1 + url_error = url_errors[0] + assert url_error.url_id == url_3.id + assert url_error.error == "Test Error" + + # Check mock method was called expected parameters + access_manager = mock_pdap_client.access_manager + access_manager.make_request.assert_called_once() + access_manager.build_url.assert_called_with( + namespace=DataSourcesNamespaces.SOURCE_COLLECTOR, + subdomains=['data-sources'] + ) + + call_1 = access_manager.make_request.call_args_list[0][0][0] + expected_call_1 = RequestInfo( + type_=RequestType.POST, + url="http://example.com", + headers=access_manager.jwt_header.return_value, + json_={ + "data_sources": [ + { + "name": "URL 1 Name", + "source_url": url_1.url, + "record_type": "Accident Reports", + "description": "URL 1 Description", + "record_formats": ["Record Format 1", "Record Format 2"], + "data_portal_type": "Data Portal Type 1", + "last_approval_editor": 1, + "supplying_entity": "Supplying Entity 1", + "agency_ids": [1, 2] + }, + { + "name": "URL 2 Name", + "source_url": url_2.url, + "record_type": "Incarceration Records", + "description": "URL 2 Description", + "last_approval_editor": 2, + "supplying_entity": None, + "record_formats": None, + "data_portal_type": None, + "agency_ids": [3, 4] + }, + { + "name": "URL 3 Name", + "source_url": url_3.url, + "record_type": "Accident Reports", + "description": "URL 3 Description", + "last_approval_editor": 3, + "supplying_entity": None, + "record_formats": None, + "data_portal_type": None, + "agency_ids": [5, 6] + } + ] + } + ) + assert call_1.type_ == expected_call_1.type_ + assert call_1.headers == expected_call_1.headers + diff = DeepDiff(call_1.json_, expected_call_1.json_, ignore_order=True) + assert diff == {}, f"Differences found: {diff}" diff --git a/tests/test_automated/integration/tasks/test_url_duplicate_task.py b/tests/test_automated/integration/tasks/test_url_duplicate_task.py new file mode 100644 index 00000000..d66cfe27 --- /dev/null +++ b/tests/test_automated/integration/tasks/test_url_duplicate_task.py @@ -0,0 +1,98 @@ +from http import HTTPStatus +from unittest.mock import MagicMock + +import pytest + +from collector_db.DTOs.URLMapping import URLMapping +from collector_db.models import URL, URLCheckedForDuplicate +from collector_manager.enums import URLStatus +from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from core.classes.task_operators.URLDuplicateTaskOperator import URLDuplicateTaskOperator +from tests.helpers.DBDataCreator import DBDataCreator +from tests.helpers.test_batch_creation_parameters import TestBatchCreationParameters, TestURLCreationParameters +from pdap_access_manager import ResponseInfo +from pdap_api_client.PDAPClient import PDAPClient + + +@pytest.mark.asyncio +async def test_url_duplicate_task( + db_data_creator: DBDataCreator, + mock_pdap_client: PDAPClient +): + + + operator = URLDuplicateTaskOperator( + adb_client=db_data_creator.adb_client, + pdap_client=mock_pdap_client + ) + + assert not await operator.meets_task_prerequisites() + make_request_mock: MagicMock = mock_pdap_client.access_manager.make_request + + make_request_mock.assert_not_called() + + # Add three URLs to the database, one of which is in error, the other two pending + creation_info = await db_data_creator.batch_v2( + parameters=TestBatchCreationParameters( + urls=[ + TestURLCreationParameters( + count=1, + status=URLStatus.ERROR + ), + TestURLCreationParameters( + count=2, + status=URLStatus.PENDING + ), + ] + ) + ) + pending_urls: list[URLMapping] = creation_info.url_creation_infos[URLStatus.PENDING].url_mappings + duplicate_url = pending_urls[0] + non_duplicate_url = pending_urls[1] + assert await operator.meets_task_prerequisites() + make_request_mock.assert_not_called() + + make_request_mock.side_effect = [ + ResponseInfo( + data={ + "duplicates": [ + { + "original_url": duplicate_url.url, + "approval_status": "approved" + } + ], + }, + status_code=HTTPStatus.OK + ), + ResponseInfo( + data={ + "duplicates": [], + }, + status_code=HTTPStatus.OK + ), + ] + run_info = await operator.run_task(1) + assert run_info.outcome == TaskOperatorOutcome.SUCCESS, run_info.message + assert make_request_mock.call_count == 2 + + adb_client = db_data_creator.adb_client + urls: list[URL] = await adb_client.get_all(URL) + assert len(urls) == 3 + url_ids = [url.id for url in urls] + assert duplicate_url.url_id in url_ids + for url in urls: + if url.id == duplicate_url.url_id: + assert url.outcome == URLStatus.DUPLICATE.value + + checked_for_duplicates: list[URLCheckedForDuplicate] = await adb_client.get_all(URLCheckedForDuplicate) + assert len(checked_for_duplicates) == 2 + checked_for_duplicate_url_ids = [url.url_id for url in checked_for_duplicates] + assert duplicate_url.url_id in checked_for_duplicate_url_ids + assert non_duplicate_url.url_id in checked_for_duplicate_url_ids + + assert not await operator.meets_task_prerequisites() + + + + + diff --git a/tests/test_automated/integration/tasks/test_url_html_task.py b/tests/test_automated/integration/tasks/test_url_html_task.py new file mode 100644 index 00000000..4c33016b --- /dev/null +++ b/tests/test_automated/integration/tasks/test_url_html_task.py @@ -0,0 +1,104 @@ +import types +from typing import Optional + +import pytest + +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from collector_db.enums import TaskType +from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from core.classes.task_operators.URLHTMLTaskOperator import URLHTMLTaskOperator +from tests.helpers.DBDataCreator import DBDataCreator +from html_tag_collector.DataClassTags import ResponseHTMLInfo +from html_tag_collector.ResponseParser import HTMLResponseParser +from html_tag_collector.RootURLCache import RootURLCache +from html_tag_collector.URLRequestInterface import URLRequestInterface, URLResponseInfo + + +@pytest.mark.asyncio +async def test_url_html_task(db_data_creator: DBDataCreator): + + mock_html_content = "" + mock_content_type = "text/html" + + async def mock_make_requests(self, urls: list[str]) -> list[URLResponseInfo]: + results = [] + for idx, url in enumerate(urls): + if idx == 2: + results.append( + URLResponseInfo( + success=False, + exception=ValueError("test error"), + content_type=mock_content_type + )) + else: + results.append(URLResponseInfo( + html=mock_html_content, success=True, content_type=mock_content_type)) + return results + + async def mock_parse(self, url: str, html_content: str, content_type: str) -> ResponseHTMLInfo: + assert html_content == mock_html_content + assert content_type == mock_content_type + return ResponseHTMLInfo( + url=url, + title="fake title", + description="fake description", + ) + + async def mock_get_from_cache(self, url: str) -> Optional[str]: + return None + + # Add mock methods or mock classes + url_request_interface = URLRequestInterface() + url_request_interface.make_requests = types.MethodType(mock_make_requests, url_request_interface) + + mock_root_url_cache = RootURLCache() + mock_root_url_cache.get_from_cache = types.MethodType(mock_get_from_cache, mock_root_url_cache) + + html_parser = HTMLResponseParser( + root_url_cache=mock_root_url_cache + ) + html_parser.parse = types.MethodType(mock_parse, html_parser) + + operator = URLHTMLTaskOperator( + adb_client=AsyncDatabaseClient(), + url_request_interface=url_request_interface, + html_parser=html_parser + ) + + meets_prereqs = await operator.meets_task_prerequisites() + # Check that, because no URLs were created, the prereqs are not met + assert not meets_prereqs + + batch_id = db_data_creator.batch() + url_mappings = db_data_creator.urls(batch_id=batch_id, url_count=3).url_mappings + url_ids = [url_info.url_id for url_info in url_mappings] + + task_id = await db_data_creator.adb_client.initiate_task(task_type=TaskType.HTML) + run_info = await operator.run_task(task_id) + assert run_info.outcome == TaskOperatorOutcome.SUCCESS + assert run_info.linked_url_ids == url_ids + + + # Check in database that + # - task type is listed as 'HTML' + # - task has 3 urls + # - task has one errored url with error "ValueError" + task_info = await db_data_creator.adb_client.get_task_info( + task_id=operator.task_id + ) + + assert task_info.error_info is None + assert task_info.task_type == TaskType.HTML + + assert len(task_info.url_errors) == 1 + assert task_info.url_errors[0].error == "test error" + + adb = db_data_creator.adb_client + # Check that both success urls have two rows of HTML data + await adb.link_urls_to_task(task_id=run_info.task_id, url_ids=run_info.linked_url_ids) + hci = await adb.get_html_content_info(url_id=url_ids[0]) + assert len(hci) == 2 + hci = await adb.get_html_content_info(url_id=url_ids[1]) + assert len(hci) == 2 + + # Check that errored url has error info diff --git a/tests/test_automated/integration/tasks/test_url_miscellaneous_metadata_task.py b/tests/test_automated/integration/tasks/test_url_miscellaneous_metadata_task.py new file mode 100644 index 00000000..526efa70 --- /dev/null +++ b/tests/test_automated/integration/tasks/test_url_miscellaneous_metadata_task.py @@ -0,0 +1,145 @@ +from typing import Optional + +import pytest + +from collector_db.models import URL, URLOptionalDataSourceMetadata +from collector_manager.enums import CollectorType +from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from core.classes.task_operators.URLMiscellaneousMetadataTaskOperator import URLMiscellaneousMetadataTaskOperator +from tests.helpers.DBDataCreator import DBDataCreator + + +def batch_and_url( + db_data_creator: DBDataCreator, + collector_type: CollectorType, + collector_metadata: Optional[dict] +): + batch_id = db_data_creator.batch(strategy=collector_type) + url_id = db_data_creator.urls( + batch_id=batch_id, + url_count=1, + collector_metadata=collector_metadata + ).url_mappings[0].url_id + return url_id + + +@pytest.mark.asyncio +async def test_url_miscellaneous_metadata_task(db_data_creator: DBDataCreator): + + operator = URLMiscellaneousMetadataTaskOperator(adb_client=db_data_creator.adb_client) + + # Currently, task should not meet prerequisites + meets_prereqs = await operator.meets_task_prerequisites() + assert not meets_prereqs + + # Add one URL for each of the following batches, with appropriate collector metadata: + # ckan + ckan_url_id = batch_and_url( + db_data_creator, + CollectorType.CKAN, + collector_metadata={ + "submitted_name": "Test CKAN Name", + "description": "Test CKAN Description", + "record_format": ["CSV", "JSON"], + "data_portal_type": "Test Data Portal Type", + "supplying_entity": "Test Supplying Entity" + } + ) + # muckrock_simple + muckrock_simple_url_id = batch_and_url( + db_data_creator, + CollectorType.MUCKROCK_SIMPLE_SEARCH, + collector_metadata={ + 'title': 'Test Muckrock Simple Title', + } + ) + # muckrock_county + muckrock_county_url_id = batch_and_url( + db_data_creator, + CollectorType.MUCKROCK_COUNTY_SEARCH, + collector_metadata={ + 'title': 'Test Muckrock County Title', + } + ) + # muckrock_all + muckrock_all_url_id = batch_and_url( + db_data_creator, + CollectorType.MUCKROCK_ALL_SEARCH, + collector_metadata={ + 'title': 'Test Muckrock All Title', + } + ) + # auto_googler + auto_googler_url_id = batch_and_url( + db_data_creator, + CollectorType.AUTO_GOOGLER, + collector_metadata={ + "title" : "Test Auto Googler Title", + "snippet" : "Test Auto Googler Snippet" + } + ) + # common_crawler + common_crawler_url_id = batch_and_url( + db_data_creator, + CollectorType.COMMON_CRAWLER, + collector_metadata=None + ) + # Add URL HTML + await db_data_creator.html_data([common_crawler_url_id]) + # example + + # Check that task now meets prerequisites + meets_prereqs = await operator.meets_task_prerequisites() + assert meets_prereqs + + # Run task + run_info = await operator.run_task(1) + assert run_info.outcome == TaskOperatorOutcome.SUCCESS + + # Check that each URL has the expected name/description and optional metadata + expected_urls = { + common_crawler_url_id: ("test html content", "test description"), + auto_googler_url_id: ("Test Auto Googler Title", "Test Auto Googler Snippet"), + ckan_url_id: ("Test CKAN Name", "Test CKAN Description"), + muckrock_simple_url_id: ("Test Muckrock Simple Title", "Test Muckrock Simple Title"), + muckrock_county_url_id: ("Test Muckrock County Title", "Test Muckrock County Title"), + muckrock_all_url_id: ("Test Muckrock All Title", "Test Muckrock All Title"), + } + + urls: list[URL] = await db_data_creator.adb_client.get_all(URL) + assert len(urls) == len(expected_urls) + + seen_ids = set() + + for url in urls: + assert url.id not in seen_ids, f"Duplicate url.id found: {url.id}" + seen_ids.add(url.id) + + assert url.id in expected_urls, f"Unexpected url.id: {url.id}" + expected_name, expected_description = expected_urls[url.id] + assert url.name == expected_name, f"For url.id {url.id}, expected name {expected_name}, got {url.name}" + assert url.description == expected_description, f"For url.id {url.id}, expected description {expected_description}, got {url.description}" + + expected_urls = { + common_crawler_url_id: (None, None, None), + auto_googler_url_id: (None, None, None), + ckan_url_id: (["CSV", "JSON"], "Test Data Portal Type", "Test Supplying Entity"), + muckrock_simple_url_id: (None, None, None), + muckrock_county_url_id: (None, None, None), + muckrock_all_url_id: (None, None, None), + } + + metadatas: list[URLOptionalDataSourceMetadata] = await db_data_creator.adb_client.get_all(URLOptionalDataSourceMetadata) + seen_ids = set() + for metadata in metadatas: + assert metadata.url_id not in seen_ids, f"Duplicate url.id found: {metadata.url_id}" + seen_ids.add(metadata.url_id) + + assert metadata.url_id in expected_urls, f"Unexpected url.id: {metadata.url_id}" + expected_record_format, expected_data_portal_type, expected_supplying_entity = expected_urls[metadata.url_id] + assert metadata.record_formats == expected_record_format, f"For url.id {metadata.url_id}, expected record_format {expected_record_format}, got {metadata.url_id}" + assert metadata.data_portal_type == expected_data_portal_type, f"For url.id {metadata.url_id}, expected data_portal_type {expected_data_portal_type}, got {metadata.url_id}" + assert metadata.supplying_entity == expected_supplying_entity, f"For url.id {metadata.url_id}, expected supplying_entity {expected_supplying_entity}, got {metadata.url_id}" + + + diff --git a/tests/test_automated/integration/tasks/test_url_record_type_task.py b/tests/test_automated/integration/tasks/test_url_record_type_task.py new file mode 100644 index 00000000..c941bcf7 --- /dev/null +++ b/tests/test_automated/integration/tasks/test_url_record_type_task.py @@ -0,0 +1,56 @@ +from unittest.mock import MagicMock + +import pytest + +from collector_db.enums import TaskType +from collector_db.models import AutoRecordTypeSuggestion +from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome +from core.classes.task_operators.URLRecordTypeTaskOperator import URLRecordTypeTaskOperator +from core.enums import RecordType +from tests.helpers.DBDataCreator import DBDataCreator +from llm_api_logic.DeepSeekRecordClassifier import DeepSeekRecordClassifier + +@pytest.mark.asyncio +async def test_url_record_type_task(db_data_creator: DBDataCreator): + + mock_classifier = MagicMock(spec=DeepSeekRecordClassifier) + mock_classifier.classify_url.side_effect = [RecordType.ACCIDENT_REPORTS, "Error"] + mock_classifier.model_name = "test_notes" + + operator = URLRecordTypeTaskOperator( + adb_client=db_data_creator.adb_client, + classifier=mock_classifier + ) + + # Should not meet prerequisites + meets_prereqs = await operator.meets_task_prerequisites() + assert not meets_prereqs + + batch_id = db_data_creator.batch() + iui = db_data_creator.urls(batch_id=batch_id, url_count=2) + url_ids = [iui.url_mappings[0].url_id, iui.url_mappings[1].url_id] + await db_data_creator.html_data(url_ids) + + assert await operator.meets_task_prerequisites() + task_id = await db_data_creator.adb_client.initiate_task(task_type=TaskType.RECORD_TYPE) + + run_info = await operator.run_task(task_id) + assert run_info.outcome == TaskOperatorOutcome.SUCCESS + + # Task should have been created + task_info = await db_data_creator.adb_client.get_task_info(task_id=operator.task_id) + assert task_info.error_info is None + + response = await db_data_creator.adb_client.get_tasks() + tasks = response.tasks + assert len(tasks) == 1 + task = tasks[0] + assert task.type == TaskType.RECORD_TYPE + assert run_info.linked_url_ids == url_ids + assert task.url_error_count == 1 + + # Get metadata + suggestions = await db_data_creator.adb_client.get_all(AutoRecordTypeSuggestion) + for suggestion in suggestions: + assert suggestion.record_type == RecordType.ACCIDENT_REPORTS.value + diff --git a/tests/test_automated/integration/cycles/test_url_relevancy_huggingface_cycle.py b/tests/test_automated/integration/tasks/test_url_relevancy_huggingface_task.py similarity index 52% rename from tests/test_automated/integration/cycles/test_url_relevancy_huggingface_cycle.py rename to tests/test_automated/integration/tasks/test_url_relevancy_huggingface_task.py index 064eff51..95fb5fc7 100644 --- a/tests/test_automated/integration/cycles/test_url_relevancy_huggingface_cycle.py +++ b/tests/test_automated/integration/tasks/test_url_relevancy_huggingface_task.py @@ -4,19 +4,16 @@ from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.DTOs.URLWithHTML import URLWithHTML -from collector_db.enums import ValidationStatus, ValidationSource -from collector_db.models import URLMetadata -from core.classes.URLRelevanceHuggingfaceCycler import URLRelevanceHuggingfaceCycler +from collector_db.models import AutoRelevantSuggestion +from core.DTOs.TaskOperatorRunInfo import TaskOperatorRunInfo, TaskOperatorOutcome +from core.classes.task_operators.URLRelevanceHuggingfaceTaskOperator import URLRelevanceHuggingfaceTaskOperator +from tests.helpers.assert_functions import assert_database_has_no_tasks from hugging_face.HuggingFaceInterface import HuggingFaceInterface @pytest.mark.asyncio -async def test_url_relevancy_huggingface_cycle(db_data_creator): - batch_id = db_data_creator.batch() - url_mappings = db_data_creator.urls(batch_id=batch_id, url_count=3).url_mappings - url_ids = [url_info.url_id for url_info in url_mappings] - await db_data_creator.html_data(url_ids) - await db_data_creator.metadata([url_ids[0]]) +async def test_url_relevancy_huggingface_task(db_data_creator): + def num_to_bool(num: int) -> bool: if num == 0: @@ -24,7 +21,7 @@ def num_to_bool(num: int) -> bool: else: return False - def mock_get_url_relevancy( + async def mock_get_url_relevancy( urls_with_html: list[URLWithHTML], threshold: float = 0.8 ) -> list[bool]: @@ -36,19 +33,29 @@ def mock_get_url_relevancy( return results mock_hf_interface = MagicMock(spec=HuggingFaceInterface) - mock_hf_interface.get_url_relevancy = mock_get_url_relevancy + mock_hf_interface.get_url_relevancy_async = mock_get_url_relevancy - cycler = URLRelevanceHuggingfaceCycler( + task_operator = URLRelevanceHuggingfaceTaskOperator( adb_client=AsyncDatabaseClient(), huggingface_interface=mock_hf_interface ) - await cycler.cycle() + meets_task_prerequisites = await task_operator.meets_task_prerequisites() + assert not meets_task_prerequisites + + await assert_database_has_no_tasks(db_data_creator.adb_client) + + batch_id = db_data_creator.batch() + url_mappings = db_data_creator.urls(batch_id=batch_id, url_count=3).url_mappings + url_ids = [url_info.url_id for url_info in url_mappings] + await db_data_creator.html_data(url_ids) + + run_info: TaskOperatorRunInfo = await task_operator.run_task(1) + assert run_info.outcome == TaskOperatorOutcome.SUCCESS, run_info.message + - results = await db_data_creator.adb_client.get_all(URLMetadata) + results = await db_data_creator.adb_client.get_all(AutoRelevantSuggestion) assert len(results) == 3 for result in results: assert result.url_id in url_ids - assert result.value in ['True', 'False'] - assert result.validation_status == ValidationStatus.PENDING_VALIDATION.value - assert result.validation_source == ValidationSource.MACHINE_LEARNING.value \ No newline at end of file + assert result.relevant == num_to_bool(result.url_id % 2) diff --git a/tests/test_automated/unit/collector_manager/test_collector_manager.py b/tests/test_automated/unit/collector_manager/test_collector_manager.py deleted file mode 100644 index 3a7b2fd9..00000000 --- a/tests/test_automated/unit/collector_manager/test_collector_manager.py +++ /dev/null @@ -1,154 +0,0 @@ -import threading -import time -from dataclasses import dataclass -from unittest.mock import Mock, MagicMock - -import pytest - -from collector_db.DatabaseClient import DatabaseClient -from collector_manager.CollectorManager import CollectorManager, InvalidCollectorError -from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO -from collector_manager.ExampleCollector import ExampleCollector -from collector_manager.enums import CollectorType -from core.CoreLogger import CoreLogger - - -@dataclass -class ExampleCollectorSetup: - type = CollectorType.EXAMPLE - dto = ExampleInputDTO( - example_field="example_value", sleep_time=1 - ) - manager = CollectorManager( - logger=Mock(spec=CoreLogger), - db_client=Mock(spec=DatabaseClient) - ) - - def start_collector(self, batch_id: int): - self.manager.start_collector(self.type, batch_id, self.dto) - - -@pytest.fixture -def ecs(): - ecs = ExampleCollectorSetup() - yield ecs - ecs.manager.shutdown_all_collectors() - - - -def test_start_collector(ecs: ExampleCollectorSetup): - manager = ecs.manager - - batch_id = 1 - ecs.start_collector(batch_id) - assert batch_id in manager.collectors, "Collector not added to manager." - future = manager.futures.get(batch_id) - assert future is not None, "Thread not started for collector." - # Check that future is running - assert future.running(), "Future is not running." - - - print("Test passed: Collector starts correctly.") - -def test_abort_collector(ecs: ExampleCollectorSetup): - batch_id = 2 - manager = ecs.manager - - ecs.start_collector(batch_id) - - # Try getting collector initially and succeed - collector = manager.try_getting_collector(batch_id) - assert collector is not None, "Collector not found after start." - - manager.abort_collector(batch_id) - - assert batch_id not in manager.collectors, "Collector not removed after closure." - assert batch_id not in manager.threads, "Thread not removed after closure." - - # Try getting collector after closure and fail - with pytest.raises(InvalidCollectorError) as e: - manager.try_getting_collector(batch_id) - - - -def test_invalid_collector(ecs: ExampleCollectorSetup): - invalid_batch_id = 999 - - with pytest.raises(InvalidCollectorError) as e: - ecs.manager.try_getting_collector(invalid_batch_id) - - -def test_concurrent_collectors(ecs: ExampleCollectorSetup): - manager = ecs.manager - - batch_ids = [1, 2, 3] - - threads = [] - for batch_id in batch_ids: - thread = threading.Thread(target=manager.start_collector, args=(ecs.type, batch_id, ecs.dto)) - threads.append(thread) - thread.start() - - for thread in threads: - thread.join() - - assert all(batch_id in manager.collectors for batch_id in batch_ids), "Not all collectors started." - assert all(manager.futures[batch_id].running() for batch_id in batch_ids), "Not all threads are running." - - print("Test passed: Concurrent collectors managed correctly.") - -def test_thread_safety(ecs: ExampleCollectorSetup): - import concurrent.futures - - manager = ecs.manager - - def start_and_close(batch_id): - ecs.start_collector(batch_id) - time.sleep(0.1) # Simulate some processing - manager.abort_collector(batch_id) - - batch_ids = [i for i in range(1, 6)] - - with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: - executor.map(start_and_close, batch_ids) - - assert not manager.collectors, "Some collectors were not cleaned up." - assert not manager.threads, "Some threads were not cleaned up." - - print("Test passed: Thread safety maintained under concurrent access.") - -def test_shutdown_all_collectors(ecs: ExampleCollectorSetup): - manager = ecs.manager - - batch_ids = [1, 2, 3] - - for batch_id in batch_ids: - ecs.start_collector(batch_id) - - manager.shutdown_all_collectors() - - assert not manager.collectors, "Not all collectors were removed." - assert not manager.threads, "Not all threads were cleaned up." - - print("Test passed: Shutdown cleans up all collectors and threads.") - - -def test_collector_manager_raises_exceptions(monkeypatch): - # Mock dependencies - logger = MagicMock() - db_client = MagicMock() - collector_manager = CollectorManager(logger=logger, db_client=db_client) - - dto = ExampleInputDTO(example_field="example_value", sleep_time=1) - - # Mock a collector type and DTO - batch_id = 1 - - # Patch the example collector run method to raise an exception - monkeypatch.setattr(ExampleCollector, 'run', MagicMock(side_effect=RuntimeError("Collector failed!"))) - - # Start the collector and expect an exception during shutdown - collector_manager.start_collector(CollectorType.EXAMPLE, batch_id, dto) - - with pytest.raises(RuntimeError, match="Collector failed!"): - collector_manager.shutdown_all_collectors() \ No newline at end of file diff --git a/tests/test_automated/unit/core/test_core_logger.py b/tests/test_automated/unit/core/test_core_logger.py index 22d08bfb..b0d52055 100644 --- a/tests/test_automated/unit/core/test_core_logger.py +++ b/tests/test_automated/unit/core/test_core_logger.py @@ -1,86 +1,28 @@ -import threading -import time -from unittest.mock import MagicMock +import asyncio +from unittest.mock import AsyncMock -from collector_db.DTOs.LogInfo import LogInfo -from core.CoreLogger import CoreLogger - - -def test_logger_flush(): - mock_db_client = MagicMock() - logger = CoreLogger(flush_interval=1, db_client=mock_db_client) - - # Add logs - logger.log(LogInfo(log="Log 1", batch_id=1)) - logger.log(LogInfo(log="Log 2", batch_id=1)) - - # Wait for the flush interval - time.sleep(1.5) - - # Verify logs were flushed - assert mock_db_client.insert_logs.called - flushed_logs = mock_db_client.insert_logs.call_args[1]['log_infos'] - assert len(flushed_logs) == 2 - assert flushed_logs[0].log == "Log 1" - - logger.shutdown() - -def test_logger_multithreading(): - mock_db_client = MagicMock() - logger = CoreLogger(flush_interval=1, db_client=mock_db_client, batch_size=10) - - def worker(thread_id): - for i in range(5): # Each thread logs 5 messages - logger.log(LogInfo(log=f"Thread-{thread_id} Log-{i}", batch_id=thread_id)) - - # Start multiple threads - threads = [threading.Thread(target=worker, args=(i,)) for i in range(5)] # 5 threads - for t in threads: - t.start() - for t in threads: - t.join() # Wait for all threads to finish - - # Allow the logger to flush - time.sleep(2) - logger.shutdown() - - # Verify all logs were flushed - assert mock_db_client.insert_logs.called - flushed_logs = [] - for call in mock_db_client.insert_logs.call_args_list: - flushed_logs.extend(call[1]['log_infos']) - - # Ensure all logs are present - assert len(flushed_logs) == 25 # 5 threads * 5 messages each - for i in range(5): - for j in range(5): - assert any(log.log == f"Thread-{i} Log-{j}" for log in flushed_logs) +import pytest +from collector_db.DTOs.LogInfo import LogInfo +from core.AsyncCoreLogger import AsyncCoreLogger -def test_logger_with_delays(): - mock_db_client = MagicMock() - logger = CoreLogger(flush_interval=1, db_client=mock_db_client, batch_size=10) - def worker(thread_id): - for i in range(10): # Each thread logs 10 messages - logger.log(LogInfo(log=f"Thread-{thread_id} Log-{i}", batch_id=thread_id)) - time.sleep(0.1) # Simulate delay between logs +@pytest.mark.asyncio +async def test_logger_flush(): + mock_adb_client = AsyncMock() + async with AsyncCoreLogger(flush_interval=0.01, adb_client=mock_adb_client) as logger: - # Start multiple threads - threads = [threading.Thread(target=worker, args=(i,)) for i in range(5)] # 5 threads - for t in threads: - t.start() - for t in threads: - t.join() # Wait for all threads to finish + # Add logs + await logger.log(LogInfo(log="Log 1", batch_id=1)) + await logger.log(LogInfo(log="Log 2", batch_id=1)) - # Allow the logger to flush - time.sleep(2) - logger.shutdown() + # Wait for the flush interval + await asyncio.sleep(0.02) - # Verify that all logs are eventually flushed - flushed_logs = [] - for call in mock_db_client.insert_logs.call_args_list: - flushed_logs.extend(call[1]['log_infos']) + # Verify logs were flushed + mock_adb_client.insert_logs.assert_called_once() + flushed_logs = mock_adb_client.insert_logs.call_args[1]['log_infos'] + assert len(flushed_logs) == 2 + assert flushed_logs[0].log == "Log 1" - assert len(flushed_logs) == 50 # 5 threads * 10 messages each diff --git a/tests/test_automated/unit/dto/__init__.py b/tests/test_automated/unit/dto/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_automated/unit/dto/test_all_annotation_post_info.py b/tests/test_automated/unit/dto/test_all_annotation_post_info.py new file mode 100644 index 00000000..3e5cbab4 --- /dev/null +++ b/tests/test_automated/unit/dto/test_all_annotation_post_info.py @@ -0,0 +1,37 @@ +import pytest +from pydantic import ValidationError + +from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo +from core.enums import RecordType +from core.exceptions import FailedValidationException + +# Mock values to pass +mock_record_type = RecordType.ARREST_RECORDS.value # replace with valid RecordType if Enum +mock_agency = {"is_new": False, "suggested_agency": 1} # replace with a valid dict for the URLAgencyAnnotationPostInfo model + +@pytest.mark.parametrize( + "is_relevant, record_type, agency, should_raise", + [ + (True, mock_record_type, mock_agency, False), # valid + (True, None, mock_agency, True), # missing record_type + (True, mock_record_type, None, True), # missing agency + (True, None, None, True), # missing both + (False, None, None, False), # valid + (False, mock_record_type, None, True), # record_type present + (False, None, mock_agency, True), # agency present + (False, mock_record_type, mock_agency, True), # both present + ] +) +def test_all_annotation_post_info_validation(is_relevant, record_type, agency, should_raise): + data = { + "is_relevant": is_relevant, + "record_type": record_type, + "agency": agency + } + + if should_raise: + with pytest.raises(FailedValidationException): + AllAnnotationPostInfo(**data) + else: + model = AllAnnotationPostInfo(**data) + assert model.is_relevant == is_relevant diff --git a/tests/test_automated/unit/security_manager/test_security_manager.py b/tests/test_automated/unit/security_manager/test_security_manager.py index f827cc1b..fd03fee5 100644 --- a/tests/test_automated/unit/security_manager/test_security_manager.py +++ b/tests/test_automated/unit/security_manager/test_security_manager.py @@ -49,7 +49,7 @@ def test_validate_token_failure(mock_get_secret_key, mock_jwt_decode): def test_check_access_success(mock_get_secret_key, mock_jwt_decode): sm = SecurityManager() - sm.check_access(VALID_TOKEN) # Should not raise any exceptions. + sm.check_access(VALID_TOKEN, Permissions.SOURCE_COLLECTOR) # Should not raise any exceptions. def test_check_access_failure(mock_get_secret_key, mock_jwt_decode): @@ -57,7 +57,7 @@ def test_check_access_failure(mock_get_secret_key, mock_jwt_decode): with patch(get_patch_path("SecurityManager.validate_token"), return_value=AccessInfo(user_id=1, permissions=[])): sm = SecurityManager() with pytest.raises(HTTPException) as exc_info: - sm.check_access(VALID_TOKEN) + sm.check_access(VALID_TOKEN, Permissions.SOURCE_COLLECTOR) assert exc_info.value.status_code == 403 diff --git a/tests/test_automated/unit/source_collectors/test_autogoogler_collector.py b/tests/test_automated/unit/source_collectors/test_autogoogler_collector.py index 673fcd42..c3fafa61 100644 --- a/tests/test_automated/unit/source_collectors/test_autogoogler_collector.py +++ b/tests/test_automated/unit/source_collectors/test_autogoogler_collector.py @@ -1,10 +1,10 @@ -from unittest.mock import MagicMock +from unittest.mock import AsyncMock import pytest +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.DTOs.URLInfo import URLInfo -from collector_db.DatabaseClient import DatabaseClient -from core.CoreLogger import CoreLogger +from core.AsyncCoreLogger import AsyncCoreLogger from source_collectors.auto_googler.AutoGooglerCollector import AutoGooglerCollector from source_collectors.auto_googler.DTOs import GoogleSearchQueryResultsInnerDTO, AutoGooglerInputDTO @@ -12,7 +12,7 @@ @pytest.fixture def patch_get_query_results(monkeypatch): patch_path = "source_collectors.auto_googler.GoogleSearcher.GoogleSearcher.get_query_results" - mock = MagicMock() + mock = AsyncMock() mock.side_effect = [ [GoogleSearchQueryResultsInnerDTO(url="https://include.com/1", title="keyword", snippet="snippet 1"),], None @@ -20,21 +20,22 @@ def patch_get_query_results(monkeypatch): monkeypatch.setattr(patch_path, mock) yield mock -def test_auto_googler_collector(patch_get_query_results): +@pytest.mark.asyncio +async def test_auto_googler_collector(patch_get_query_results): mock = patch_get_query_results collector = AutoGooglerCollector( batch_id=1, dto=AutoGooglerInputDTO( queries=["keyword"] ), - logger=MagicMock(spec=CoreLogger), - db_client=MagicMock(spec=DatabaseClient), + logger=AsyncMock(spec=AsyncCoreLogger), + adb_client=AsyncMock(spec=AsyncDatabaseClient), raise_error=True ) - collector.run() + await collector.run() mock.assert_called_once_with("keyword") - collector.db_client.insert_urls.assert_called_once_with( + collector.adb_client.insert_urls.assert_called_once_with( url_infos=[URLInfo(url="https://include.com/1", collector_metadata={"query": "keyword", "title": "keyword", "snippet": "snippet 1"})], batch_id=1 ) \ No newline at end of file diff --git a/tests/test_automated/unit/source_collectors/test_ckan_collector.py b/tests/test_automated/unit/source_collectors/test_ckan_collector.py index 21f469dc..e0e9ee47 100644 --- a/tests/test_automated/unit/source_collectors/test_ckan_collector.py +++ b/tests/test_automated/unit/source_collectors/test_ckan_collector.py @@ -1,24 +1,24 @@ import json import pickle -from unittest.mock import MagicMock +from unittest.mock import MagicMock, AsyncMock import pytest -from collector_db.DatabaseClient import DatabaseClient -from core.CoreLogger import CoreLogger +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient +from core.AsyncCoreLogger import AsyncCoreLogger from source_collectors.ckan.CKANCollector import CKANCollector from source_collectors.ckan.DTOs import CKANInputDTO @pytest.fixture def mock_ckan_collector_methods(monkeypatch): - mock = MagicMock() + mock = AsyncMock() mock_path = "source_collectors.ckan.CKANCollector.CKANCollector.get_results" with open("tests/test_data/ckan_get_result_test_data.json", "r", encoding="utf-8") as f: data = json.load(f) - mock.get_results = MagicMock() + mock.get_results = AsyncMock() mock.get_results.return_value = data monkeypatch.setattr(mock_path, mock.get_results) @@ -26,7 +26,7 @@ def mock_ckan_collector_methods(monkeypatch): with open("tests/test_data/ckan_add_collection_child_packages.pkl", "rb") as f: data = pickle.load(f) - mock.add_collection_child_packages = MagicMock() + mock.add_collection_child_packages = AsyncMock() mock.add_collection_child_packages.return_value = data monkeypatch.setattr(mock_path, mock.add_collection_child_packages) @@ -34,23 +34,24 @@ def mock_ckan_collector_methods(monkeypatch): yield mock -def test_ckan_collector(mock_ckan_collector_methods): +@pytest.mark.asyncio +async def test_ckan_collector(mock_ckan_collector_methods): mock = mock_ckan_collector_methods collector = CKANCollector( batch_id=1, dto=CKANInputDTO(), - logger=MagicMock(spec=CoreLogger), - db_client=MagicMock(spec=DatabaseClient), + logger=AsyncMock(spec=AsyncCoreLogger), + adb_client=AsyncMock(spec=AsyncDatabaseClient), raise_error=True ) - collector.run() + await collector.run() mock.get_results.assert_called_once() mock.add_collection_child_packages.assert_called_once() - collector.db_client.insert_urls.assert_called_once() - url_infos = collector.db_client.insert_urls.call_args[1]['url_infos'] + collector.adb_client.insert_urls.assert_called_once() + url_infos = collector.adb_client.insert_urls.call_args[1]['url_infos'] assert len(url_infos) == 2560 first_url_info = url_infos[0] assert first_url_info.url == 'https://catalog.data.gov/dataset/crash-reporting-drivers-data' diff --git a/tests/test_automated/unit/source_collectors/test_collector_closes_properly.py b/tests/test_automated/unit/source_collectors/test_collector_closes_properly.py deleted file mode 100644 index 386120a8..00000000 --- a/tests/test_automated/unit/source_collectors/test_collector_closes_properly.py +++ /dev/null @@ -1,71 +0,0 @@ -import threading -import time -from unittest.mock import Mock, MagicMock - -from collector_db.DTOs.LogInfo import LogInfo -from collector_db.DatabaseClient import DatabaseClient -from collector_manager.CollectorBase import CollectorBase -from collector_manager.enums import CollectorType -from core.CoreLogger import CoreLogger -from core.enums import BatchStatus - - -# Mock a subclass to implement the abstract method -class MockCollector(CollectorBase): - collector_type = CollectorType.EXAMPLE - preprocessor = MagicMock() - - def __init__(self, dto, **kwargs): - super().__init__( - batch_id=1, - dto=dto, - logger=Mock(spec=CoreLogger), - db_client=Mock(spec=DatabaseClient), - raise_error=True - ) - - def run_implementation(self): - while True: - time.sleep(0.1) # Simulate work - self.log("Working...") - -def test_collector_closes_properly(): - # Mock dependencies - mock_dto = Mock() - - # Initialize the collector - collector = MockCollector( - dto=mock_dto, - ) - - # Run the collector in a separate thread - thread = threading.Thread(target=collector.run) - thread.start() - - # Run the collector for a time - time.sleep(1) - # Signal the collector to stop - collector.abort() - - thread.join() - - - - # Assertions - # Check that multiple log calls have been made - assert collector.logger.log.call_count > 1 - # Check that last call to collector.logger.log was with the correct message - assert collector.logger.log.call_args[0][0] == LogInfo( - id=None, - log='Collector was aborted.', - batch_id=1, - created_at=None - ) - - assert not thread.is_alive(), "Thread is still alive after aborting." - assert collector._stop_event.is_set(), "Stop event was not set." - assert collector.status == BatchStatus.ABORTED, "Collector status is not ABORTED." - - print("Test passed: Collector closes properly.") - - diff --git a/tests/test_automated/unit/source_collectors/test_common_crawl_collector.py b/tests/test_automated/unit/source_collectors/test_common_crawl_collector.py index e0dbd144..1c5aa6ee 100644 --- a/tests/test_automated/unit/source_collectors/test_common_crawl_collector.py +++ b/tests/test_automated/unit/source_collectors/test_common_crawl_collector.py @@ -2,9 +2,9 @@ import pytest +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.DTOs.URLInfo import URLInfo -from collector_db.DatabaseClient import DatabaseClient -from core.CoreLogger import CoreLogger +from core.AsyncCoreLogger import AsyncCoreLogger from source_collectors.common_crawler.CommonCrawlerCollector import CommonCrawlerCollector from source_collectors.common_crawler.DTOs import CommonCrawlerInputDTO @@ -23,20 +23,21 @@ def mock_get_common_crawl_search_results(): mock_get_common_crawl_search_results.return_value = mock_results yield mock_get_common_crawl_search_results - -def test_common_crawl_collector(mock_get_common_crawl_search_results): +@pytest.mark.asyncio +async def test_common_crawl_collector(mock_get_common_crawl_search_results): collector = CommonCrawlerCollector( batch_id=1, dto=CommonCrawlerInputDTO( search_term="keyword", ), - logger=mock.MagicMock(spec=CoreLogger), - db_client=mock.MagicMock(spec=DatabaseClient) + logger=mock.AsyncMock(spec=AsyncCoreLogger), + adb_client=mock.AsyncMock(spec=AsyncDatabaseClient), + raise_error=True ) - collector.run() + await collector.run() mock_get_common_crawl_search_results.assert_called_once() - collector.db_client.insert_urls.assert_called_once_with( + collector.adb_client.insert_urls.assert_called_once_with( url_infos=[ URLInfo(url="http://keyword.com"), URLInfo(url="http://keyword.com/page3") diff --git a/tests/test_automated/unit/source_collectors/test_example_collector.py b/tests/test_automated/unit/source_collectors/test_example_collector.py index a0cf0c6f..b770d952 100644 --- a/tests/test_automated/unit/source_collectors/test_example_collector.py +++ b/tests/test_automated/unit/source_collectors/test_example_collector.py @@ -1,9 +1,9 @@ -from unittest.mock import MagicMock +from unittest.mock import AsyncMock from collector_db.DatabaseClient import DatabaseClient from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO from collector_manager.ExampleCollector import ExampleCollector -from core.CoreLogger import CoreLogger +from core.AsyncCoreLogger import AsyncCoreLogger def test_example_collector(): @@ -12,8 +12,8 @@ def test_example_collector(): dto=ExampleInputDTO( sleep_time=1 ), - logger=MagicMock(spec=CoreLogger), - db_client=MagicMock(spec=DatabaseClient), + logger=AsyncMock(spec=AsyncCoreLogger), + adb_client=AsyncMock(spec=DatabaseClient), raise_error=True ) collector.run() \ No newline at end of file diff --git a/tests/test_automated/unit/source_collectors/test_muckrock_collectors.py b/tests/test_automated/unit/source_collectors/test_muckrock_collectors.py index 7dbb92c5..100fbb6e 100644 --- a/tests/test_automated/unit/source_collectors/test_muckrock_collectors.py +++ b/tests/test_automated/unit/source_collectors/test_muckrock_collectors.py @@ -1,11 +1,11 @@ from unittest import mock -from unittest.mock import MagicMock, call +from unittest.mock import MagicMock, call, AsyncMock import pytest +from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.DTOs.URLInfo import URLInfo -from collector_db.DatabaseClient import DatabaseClient -from core.CoreLogger import CoreLogger +from core.AsyncCoreLogger import AsyncCoreLogger from source_collectors.muckrock.DTOs import MuckrockSimpleSearchCollectorInputDTO, \ MuckrockCountySearchCollectorInputDTO, MuckrockAllFOIARequestsCollectorInputDTO from source_collectors.muckrock.classes.MuckrockCollector import MuckrockSimpleSearchCollector, \ @@ -24,32 +24,32 @@ def patch_muckrock_fetcher(monkeypatch): test_data = { "results": inner_test_data } - mock = MagicMock() + mock = AsyncMock() mock.return_value = test_data monkeypatch.setattr(patch_path, mock) return mock - -def test_muckrock_simple_collector(patch_muckrock_fetcher): +@pytest.mark.asyncio +async def test_muckrock_simple_collector(patch_muckrock_fetcher): collector = MuckrockSimpleSearchCollector( batch_id=1, dto=MuckrockSimpleSearchCollectorInputDTO( search_string="keyword", max_results=2 ), - logger=mock.MagicMock(spec=CoreLogger), - db_client=mock.MagicMock(spec=DatabaseClient), + logger=mock.AsyncMock(spec=AsyncCoreLogger), + adb_client=mock.AsyncMock(spec=AsyncDatabaseClient), raise_error=True ) - collector.run() + await collector.run() patch_muckrock_fetcher.assert_has_calls( [ call(FOIAFetchRequest(page=1, page_size=100)), ] ) - collector.db_client.insert_urls.assert_called_once_with( + collector.adb_client.insert_urls.assert_called_once_with( url_infos=[ URLInfo( url='https://include.com/1', @@ -80,13 +80,14 @@ def patch_muckrock_county_level_search_collector_methods(monkeypatch): {"absolute_url": "https://include.com/3", "title": "lemon"}, ] mock = MagicMock() - mock.get_jurisdiction_ids = MagicMock(return_value=get_jurisdiction_ids_data) - mock.get_foia_records = MagicMock(return_value=get_foia_records_data) + mock.get_jurisdiction_ids = AsyncMock(return_value=get_jurisdiction_ids_data) + mock.get_foia_records = AsyncMock(return_value=get_foia_records_data) monkeypatch.setattr(patch_path_get_jurisdiction_ids, mock.get_jurisdiction_ids) monkeypatch.setattr(patch_path_get_foia_records, mock.get_foia_records) return mock -def test_muckrock_county_search_collector(patch_muckrock_county_level_search_collector_methods): +@pytest.mark.asyncio +async def test_muckrock_county_search_collector(patch_muckrock_county_level_search_collector_methods): mock_methods = patch_muckrock_county_level_search_collector_methods collector = MuckrockCountyLevelSearchCollector( @@ -95,16 +96,16 @@ def test_muckrock_county_search_collector(patch_muckrock_county_level_search_col parent_jurisdiction_id=1, town_names=["test"] ), - logger=MagicMock(spec=CoreLogger), - db_client=MagicMock(spec=DatabaseClient), + logger=AsyncMock(spec=AsyncCoreLogger), + adb_client=AsyncMock(spec=AsyncDatabaseClient), raise_error=True ) - collector.run() + await collector.run() mock_methods.get_jurisdiction_ids.assert_called_once() mock_methods.get_foia_records.assert_called_once_with({"Alpha": 1, "Beta": 2}) - collector.db_client.insert_urls.assert_called_once_with( + collector.adb_client.insert_urls.assert_called_once_with( url_infos=[ URLInfo( url='https://include.com/1', @@ -142,9 +143,9 @@ def patch_muckrock_full_search_collector(monkeypatch): } ] }] - mock = MagicMock() + mock = AsyncMock() mock.return_value = test_data - mock.get_page_data = MagicMock(return_value=test_data) + mock.get_page_data = AsyncMock(return_value=test_data) monkeypatch.setattr(patch_path, mock.get_page_data) patch_path = ("source_collectors.muckrock.classes.MuckrockCollector." @@ -155,7 +156,8 @@ def patch_muckrock_full_search_collector(monkeypatch): return mock -def test_muckrock_all_foia_requests_collector(patch_muckrock_full_search_collector): +@pytest.mark.asyncio +async def test_muckrock_all_foia_requests_collector(patch_muckrock_full_search_collector): mock = patch_muckrock_full_search_collector collector = MuckrockAllFOIARequestsCollector( batch_id=1, @@ -163,15 +165,15 @@ def test_muckrock_all_foia_requests_collector(patch_muckrock_full_search_collect start_page=1, total_pages=2 ), - logger=MagicMock(spec=CoreLogger), - db_client=MagicMock(spec=DatabaseClient), + logger=AsyncMock(spec=AsyncCoreLogger), + adb_client=AsyncMock(spec=AsyncDatabaseClient), raise_error=True ) - collector.run() + await collector.run() mock.get_page_data.assert_called_once_with(mock.foia_fetcher.return_value, 1, 2) - collector.db_client.insert_urls.assert_called_once_with( + collector.adb_client.insert_urls.assert_called_once_with( url_infos=[ URLInfo( url='https://include.com/1', diff --git a/tests/test_automated/unit/test_function_trigger.py b/tests/test_automated/unit/test_function_trigger.py new file mode 100644 index 00000000..37b3c948 --- /dev/null +++ b/tests/test_automated/unit/test_function_trigger.py @@ -0,0 +1,67 @@ +import asyncio +from collections import deque + +import pytest + +from core.FunctionTrigger import FunctionTrigger + + +@pytest.mark.asyncio +async def test_single_run(): + calls = [] + + async def task_fn(): + calls.append("run") + await asyncio.sleep(0.01) + + trigger = FunctionTrigger(task_fn) + + await trigger.trigger_or_rerun() + + assert calls == ["run"] + +@pytest.mark.asyncio +async def test_rerun_requested(): + call_log = deque() + + async def task_fn(): + call_log.append("start") + await asyncio.sleep(0.01) + call_log.append("end") + + trigger = FunctionTrigger(task_fn) + + # Start first run + task = asyncio.create_task(trigger.trigger_or_rerun()) + + await asyncio.sleep(0.005) # Ensure it's in the middle of first run + await trigger.trigger_or_rerun() # This should request a rerun + + await task + + # One full loop with rerun should call twice + assert list(call_log) == ["start", "end", "start", "end"] + +@pytest.mark.asyncio +async def test_multiple_quick_triggers_only_rerun_once(): + calls = [] + + async def task_fn(): + calls.append("run") + await asyncio.sleep(0.01) + + trigger = FunctionTrigger(task_fn) + + first = asyncio.create_task(trigger.trigger_or_rerun()) + await asyncio.sleep(0.002) + + # These three should all coalesce into one rerun, not three more + await asyncio.gather( + trigger.trigger_or_rerun(), + trigger.trigger_or_rerun(), + trigger.trigger_or_rerun() + ) + + await first + + assert calls == ["run", "run"] \ No newline at end of file diff --git a/util/DiscordNotifier.py b/util/DiscordNotifier.py new file mode 100644 index 00000000..6df1aa90 --- /dev/null +++ b/util/DiscordNotifier.py @@ -0,0 +1,19 @@ +import logging + +import requests + + +class DiscordPoster: + def __init__(self, webhook_url: str): + if not webhook_url: + logging.error("WEBHOOK_URL environment variable not set") + raise ValueError("WEBHOOK_URL environment variable not set") + self.webhook_url = webhook_url + def post_to_discord(self, message): + try: + requests.post(self.webhook_url, json={"content": message}) + except Exception as e: + logging.error( + f"Error posting message to Discord: {e}." + f"\n\nMessage: {message}" + ) diff --git a/util/alembic_helpers.py b/util/alembic_helpers.py new file mode 100644 index 00000000..84cdbfa7 --- /dev/null +++ b/util/alembic_helpers.py @@ -0,0 +1,49 @@ +from alembic import op +import sqlalchemy as sa + +def switch_enum_type( + table_name, + column_name, + enum_name, + new_enum_values, + drop_old_enum=True, + cast_dict: dict = None +): + """ + Switches an ENUM type in a PostgreSQL column by: + 1. Renaming the old enum type. + 2. Creating the new enum type with the same name. + 3. Updating the column to use the new enum type. + 4. Dropping the old enum type. + + :param table_name: Name of the table containing the ENUM column. + :param column_name: Name of the column using the ENUM type. + :param enum_name: Name of the ENUM type in PostgreSQL. + :param new_enum_values: List of new ENUM values. + :param drop_old_enum: Whether to drop the old ENUM type. + """ + + # Rename old enum type + old_enum_temp_name = f"{enum_name}_old" + op.execute(f'ALTER TYPE "{enum_name}" RENAME TO "{old_enum_temp_name}"') + + # Create new enum type with the updated values + new_enum_type = sa.Enum(*new_enum_values, name=enum_name) + new_enum_type.create(op.get_bind()) + + # Alter the column type to use the new enum type + op.execute(f'ALTER TABLE "{table_name}" ALTER COLUMN "{column_name}" TYPE "{enum_name}" USING "{column_name}"::text::{enum_name}') + + # Drop the old enum type + if drop_old_enum: + op.execute(f'DROP TYPE "{old_enum_temp_name}"') + +def alter_enum_value( + enum_name, + old_value, + new_value +): + """ + Changes one value of an enum type + """ + op.execute(f"ALTER TYPE {enum_name} RENAME VALUE '{old_value}' TO '{new_value}'") \ No newline at end of file diff --git a/util/helper_functions.py b/util/helper_functions.py index ccc7d96e..deb6830b 100644 --- a/util/helper_functions.py +++ b/util/helper_functions.py @@ -1,21 +1,44 @@ import os from enum import Enum +from pathlib import Path from typing import Type from dotenv import load_dotenv from pydantic import BaseModel +def get_project_root(marker_files=(".project-root",)) -> Path: + current = Path(__file__).resolve() + for parent in [current] + list(current.parents): + if any((parent / marker).exists() for marker in marker_files): + return parent + raise FileNotFoundError("No project root found (missing marker files)") + +def project_path(*parts: str) -> Path: + return get_project_root().joinpath(*parts) def get_enum_values(enum: Type[Enum]): return [item.value for item in enum] -def get_from_env(key: str): +def get_from_env(key: str, allow_none: bool = False): load_dotenv() val = os.getenv(key) - if val is None: + if val is None and not allow_none: raise ValueError(f"Environment variable {key} is not set") return val +def load_from_environment(keys: list[str]) -> dict[str, str]: + """ + Load selected keys from environment, returning a dictionary + """ + original_environment = os.environ.copy() + try: + load_dotenv() + return {key: os.getenv(key) for key in keys} + finally: + # Restore the original environment + os.environ.clear() + os.environ.update(original_environment) + def base_model_list_dump(model_list: list[BaseModel]) -> list[dict]: return [model.model_dump() for model in model_list] diff --git a/util/miscellaneous_functions.py b/util/miscellaneous_functions.py index d27793ff..4b0bc88b 100644 --- a/util/miscellaneous_functions.py +++ b/util/miscellaneous_functions.py @@ -32,7 +32,7 @@ def get_project_root() -> Path: """ # Define the root markers that signify the root directory of the project - root_markers = ['.git'] # Add more markers as needed + root_markers = ['execute.sh'] # Add more markers as needed # Start from the current file's directory current_dir = Path(__file__).resolve().parent while current_dir != current_dir.parent: # Check if we've reached the root of the filesystem diff --git a/uv.lock b/uv.lock new file mode 100644 index 00000000..773fee9e --- /dev/null +++ b/uv.lock @@ -0,0 +1,2958 @@ +version = 1 +revision = 2 +requires-python = ">=3.11" +resolution-markers = [ + "python_full_version >= '3.13'", + "python_full_version == '3.12.*'", + "python_full_version < '3.12'", +] + +[[package]] +name = "absl-py" +version = "2.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/f0/e6342091061ed3a46aadc116b13edd7bb5249c3ab1b3ef07f24b0c248fc3/absl_py-2.2.2.tar.gz", hash = "sha256:bf25b2c2eed013ca456918c453d687eab4e8309fba81ee2f4c1a6aa2494175eb", size = 119982, upload_time = "2025-04-03T12:41:04.55Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f6/d4/349f7f4bd5ea92dab34f5bb0fe31775ef6c311427a14d5a5b31ecb442341/absl_py-2.2.2-py3-none-any.whl", hash = "sha256:e5797bc6abe45f64fd95dc06394ca3f2bedf3b5d895e9da691c9ee3397d70092", size = 135565, upload_time = "2025-04-03T12:41:03.172Z" }, +] + +[[package]] +name = "aiohappyeyeballs" +version = "2.6.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760, upload_time = "2025-03-12T01:42:48.764Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265, upload_time = "2025-03-12T01:42:47.083Z" }, +] + +[[package]] +name = "aiohttp" +version = "3.11.18" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohappyeyeballs" }, + { name = "aiosignal" }, + { name = "attrs" }, + { name = "frozenlist" }, + { name = "multidict" }, + { name = "propcache" }, + { name = "yarl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/e7/fa1a8c00e2c54b05dc8cb5d1439f627f7c267874e3f7bb047146116020f9/aiohttp-3.11.18.tar.gz", hash = "sha256:ae856e1138612b7e412db63b7708735cff4d38d0399f6a5435d3dac2669f558a", size = 7678653, upload_time = "2025-04-21T09:43:09.191Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2f/10/fd9ee4f9e042818c3c2390054c08ccd34556a3cb209d83285616434cf93e/aiohttp-3.11.18-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:427fdc56ccb6901ff8088544bde47084845ea81591deb16f957897f0f0ba1be9", size = 712088, upload_time = "2025-04-21T09:40:55.776Z" }, + { url = "https://files.pythonhosted.org/packages/22/eb/6a77f055ca56f7aae2cd2a5607a3c9e7b9554f1497a069dcfcb52bfc9540/aiohttp-3.11.18-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c828b6d23b984255b85b9b04a5b963a74278b7356a7de84fda5e3b76866597b", size = 471450, upload_time = "2025-04-21T09:40:57.301Z" }, + { url = "https://files.pythonhosted.org/packages/78/dc/5f3c0d27c91abf0bb5d103e9c9b0ff059f60cf6031a5f06f456c90731f42/aiohttp-3.11.18-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5c2eaa145bb36b33af1ff2860820ba0589e165be4ab63a49aebfd0981c173b66", size = 457836, upload_time = "2025-04-21T09:40:59.322Z" }, + { url = "https://files.pythonhosted.org/packages/49/7b/55b65af9ef48b9b811c91ff8b5b9de9650c71147f10523e278d297750bc8/aiohttp-3.11.18-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d518ce32179f7e2096bf4e3e8438cf445f05fedd597f252de9f54c728574756", size = 1690978, upload_time = "2025-04-21T09:41:00.795Z" }, + { url = "https://files.pythonhosted.org/packages/a2/5a/3f8938c4f68ae400152b42742653477fc625d6bfe02e764f3521321c8442/aiohttp-3.11.18-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0700055a6e05c2f4711011a44364020d7a10fbbcd02fbf3e30e8f7e7fddc8717", size = 1745307, upload_time = "2025-04-21T09:41:02.89Z" }, + { url = "https://files.pythonhosted.org/packages/b4/42/89b694a293333ef6f771c62da022163bcf44fb03d4824372d88e3dc12530/aiohttp-3.11.18-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8bd1cde83e4684324e6ee19adfc25fd649d04078179890be7b29f76b501de8e4", size = 1780692, upload_time = "2025-04-21T09:41:04.461Z" }, + { url = "https://files.pythonhosted.org/packages/e2/ce/1a75384e01dd1bf546898b6062b1b5f7a59b6692ef802e4dd6db64fed264/aiohttp-3.11.18-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73b8870fe1c9a201b8c0d12c94fe781b918664766728783241a79e0468427e4f", size = 1676934, upload_time = "2025-04-21T09:41:06.728Z" }, + { url = "https://files.pythonhosted.org/packages/a5/31/442483276e6c368ab5169797d9873b5875213cbcf7e74b95ad1c5003098a/aiohttp-3.11.18-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:25557982dd36b9e32c0a3357f30804e80790ec2c4d20ac6bcc598533e04c6361", size = 1621190, upload_time = "2025-04-21T09:41:08.293Z" }, + { url = "https://files.pythonhosted.org/packages/7b/83/90274bf12c079457966008a58831a99675265b6a34b505243e004b408934/aiohttp-3.11.18-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7e889c9df381a2433802991288a61e5a19ceb4f61bd14f5c9fa165655dcb1fd1", size = 1658947, upload_time = "2025-04-21T09:41:11.054Z" }, + { url = "https://files.pythonhosted.org/packages/91/c1/da9cee47a0350b78fdc93670ebe7ad74103011d7778ab4c382ca4883098d/aiohttp-3.11.18-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:9ea345fda05bae217b6cce2acf3682ce3b13d0d16dd47d0de7080e5e21362421", size = 1654443, upload_time = "2025-04-21T09:41:13.213Z" }, + { url = "https://files.pythonhosted.org/packages/c9/f2/73cbe18dc25d624f79a09448adfc4972f82ed6088759ddcf783cd201956c/aiohttp-3.11.18-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9f26545b9940c4b46f0a9388fd04ee3ad7064c4017b5a334dd450f616396590e", size = 1644169, upload_time = "2025-04-21T09:41:14.827Z" }, + { url = "https://files.pythonhosted.org/packages/5b/32/970b0a196c4dccb1b0cfa5b4dc3b20f63d76f1c608f41001a84b2fd23c3d/aiohttp-3.11.18-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:3a621d85e85dccabd700294494d7179ed1590b6d07a35709bb9bd608c7f5dd1d", size = 1728532, upload_time = "2025-04-21T09:41:17.168Z" }, + { url = "https://files.pythonhosted.org/packages/0b/50/b1dc810a41918d2ea9574e74125eb053063bc5e14aba2d98966f7d734da0/aiohttp-3.11.18-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9c23fd8d08eb9c2af3faeedc8c56e134acdaf36e2117ee059d7defa655130e5f", size = 1750310, upload_time = "2025-04-21T09:41:19.353Z" }, + { url = "https://files.pythonhosted.org/packages/95/24/39271f5990b35ff32179cc95537e92499d3791ae82af7dcf562be785cd15/aiohttp-3.11.18-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d9e6b0e519067caa4fd7fb72e3e8002d16a68e84e62e7291092a5433763dc0dd", size = 1691580, upload_time = "2025-04-21T09:41:21.868Z" }, + { url = "https://files.pythonhosted.org/packages/6b/78/75d0353feb77f041460564f12fe58e456436bbc00cbbf5d676dbf0038cc2/aiohttp-3.11.18-cp311-cp311-win32.whl", hash = "sha256:122f3e739f6607e5e4c6a2f8562a6f476192a682a52bda8b4c6d4254e1138f4d", size = 417565, upload_time = "2025-04-21T09:41:24.78Z" }, + { url = "https://files.pythonhosted.org/packages/ed/97/b912dcb654634a813f8518de359364dfc45976f822116e725dc80a688eee/aiohttp-3.11.18-cp311-cp311-win_amd64.whl", hash = "sha256:e6f3c0a3a1e73e88af384b2e8a0b9f4fb73245afd47589df2afcab6b638fa0e6", size = 443652, upload_time = "2025-04-21T09:41:26.48Z" }, + { url = "https://files.pythonhosted.org/packages/b5/d2/5bc436f42bf4745c55f33e1e6a2d69e77075d3e768e3d1a34f96ee5298aa/aiohttp-3.11.18-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:63d71eceb9cad35d47d71f78edac41fcd01ff10cacaa64e473d1aec13fa02df2", size = 706671, upload_time = "2025-04-21T09:41:28.021Z" }, + { url = "https://files.pythonhosted.org/packages/fe/d0/2dbabecc4e078c0474abb40536bbde717fb2e39962f41c5fc7a216b18ea7/aiohttp-3.11.18-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d1929da615840969929e8878d7951b31afe0bac883d84418f92e5755d7b49508", size = 466169, upload_time = "2025-04-21T09:41:29.783Z" }, + { url = "https://files.pythonhosted.org/packages/70/84/19edcf0b22933932faa6e0be0d933a27bd173da02dc125b7354dff4d8da4/aiohttp-3.11.18-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d0aebeb2392f19b184e3fdd9e651b0e39cd0f195cdb93328bd124a1d455cd0e", size = 457554, upload_time = "2025-04-21T09:41:31.327Z" }, + { url = "https://files.pythonhosted.org/packages/32/d0/e8d1f034ae5624a0f21e4fb3feff79342ce631f3a4d26bd3e58b31ef033b/aiohttp-3.11.18-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3849ead845e8444f7331c284132ab314b4dac43bfae1e3cf350906d4fff4620f", size = 1690154, upload_time = "2025-04-21T09:41:33.541Z" }, + { url = "https://files.pythonhosted.org/packages/16/de/2f9dbe2ac6f38f8495562077131888e0d2897e3798a0ff3adda766b04a34/aiohttp-3.11.18-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5e8452ad6b2863709f8b3d615955aa0807bc093c34b8e25b3b52097fe421cb7f", size = 1733402, upload_time = "2025-04-21T09:41:35.634Z" }, + { url = "https://files.pythonhosted.org/packages/e0/04/bd2870e1e9aef990d14b6df2a695f17807baf5c85a4c187a492bda569571/aiohttp-3.11.18-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b8d2b42073611c860a37f718b3d61ae8b4c2b124b2e776e2c10619d920350ec", size = 1783958, upload_time = "2025-04-21T09:41:37.456Z" }, + { url = "https://files.pythonhosted.org/packages/23/06/4203ffa2beb5bedb07f0da0f79b7d9039d1c33f522e0d1a2d5b6218e6f2e/aiohttp-3.11.18-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40fbf91f6a0ac317c0a07eb328a1384941872f6761f2e6f7208b63c4cc0a7ff6", size = 1695288, upload_time = "2025-04-21T09:41:39.756Z" }, + { url = "https://files.pythonhosted.org/packages/30/b2/e2285dda065d9f29ab4b23d8bcc81eb881db512afb38a3f5247b191be36c/aiohttp-3.11.18-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ff5625413fec55216da5eaa011cf6b0a2ed67a565914a212a51aa3755b0009", size = 1618871, upload_time = "2025-04-21T09:41:41.972Z" }, + { url = "https://files.pythonhosted.org/packages/57/e0/88f2987885d4b646de2036f7296ebea9268fdbf27476da551c1a7c158bc0/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7f33a92a2fde08e8c6b0c61815521324fc1612f397abf96eed86b8e31618fdb4", size = 1646262, upload_time = "2025-04-21T09:41:44.192Z" }, + { url = "https://files.pythonhosted.org/packages/e0/19/4d2da508b4c587e7472a032290b2981f7caeca82b4354e19ab3df2f51d56/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:11d5391946605f445ddafda5eab11caf310f90cdda1fd99865564e3164f5cff9", size = 1677431, upload_time = "2025-04-21T09:41:46.049Z" }, + { url = "https://files.pythonhosted.org/packages/eb/ae/047473ea50150a41440f3265f53db1738870b5a1e5406ece561ca61a3bf4/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3cc314245deb311364884e44242e00c18b5896e4fe6d5f942e7ad7e4cb640adb", size = 1637430, upload_time = "2025-04-21T09:41:47.973Z" }, + { url = "https://files.pythonhosted.org/packages/11/32/c6d1e3748077ce7ee13745fae33e5cb1dac3e3b8f8787bf738a93c94a7d2/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0f421843b0f70740772228b9e8093289924359d306530bcd3926f39acbe1adda", size = 1703342, upload_time = "2025-04-21T09:41:50.323Z" }, + { url = "https://files.pythonhosted.org/packages/c5/1d/a3b57bfdbe285f0d45572d6d8f534fd58761da3e9cbc3098372565005606/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e220e7562467dc8d589e31c1acd13438d82c03d7f385c9cd41a3f6d1d15807c1", size = 1740600, upload_time = "2025-04-21T09:41:52.111Z" }, + { url = "https://files.pythonhosted.org/packages/a5/71/f9cd2fed33fa2b7ce4d412fb7876547abb821d5b5520787d159d0748321d/aiohttp-3.11.18-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ab2ef72f8605046115bc9aa8e9d14fd49086d405855f40b79ed9e5c1f9f4faea", size = 1695131, upload_time = "2025-04-21T09:41:53.94Z" }, + { url = "https://files.pythonhosted.org/packages/97/97/d1248cd6d02b9de6aa514793d0dcb20099f0ec47ae71a933290116c070c5/aiohttp-3.11.18-cp312-cp312-win32.whl", hash = "sha256:12a62691eb5aac58d65200c7ae94d73e8a65c331c3a86a2e9670927e94339ee8", size = 412442, upload_time = "2025-04-21T09:41:55.689Z" }, + { url = "https://files.pythonhosted.org/packages/33/9a/e34e65506e06427b111e19218a99abf627638a9703f4b8bcc3e3021277ed/aiohttp-3.11.18-cp312-cp312-win_amd64.whl", hash = "sha256:364329f319c499128fd5cd2d1c31c44f234c58f9b96cc57f743d16ec4f3238c8", size = 439444, upload_time = "2025-04-21T09:41:57.977Z" }, + { url = "https://files.pythonhosted.org/packages/0a/18/be8b5dd6b9cf1b2172301dbed28e8e5e878ee687c21947a6c81d6ceaa15d/aiohttp-3.11.18-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:474215ec618974054cf5dc465497ae9708543cbfc312c65212325d4212525811", size = 699833, upload_time = "2025-04-21T09:42:00.298Z" }, + { url = "https://files.pythonhosted.org/packages/0d/84/ecdc68e293110e6f6f6d7b57786a77555a85f70edd2b180fb1fafaff361a/aiohttp-3.11.18-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6ced70adf03920d4e67c373fd692123e34d3ac81dfa1c27e45904a628567d804", size = 462774, upload_time = "2025-04-21T09:42:02.015Z" }, + { url = "https://files.pythonhosted.org/packages/d7/85/f07718cca55884dad83cc2433746384d267ee970e91f0dcc75c6d5544079/aiohttp-3.11.18-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2d9f6c0152f8d71361905aaf9ed979259537981f47ad099c8b3d81e0319814bd", size = 454429, upload_time = "2025-04-21T09:42:03.728Z" }, + { url = "https://files.pythonhosted.org/packages/82/02/7f669c3d4d39810db8842c4e572ce4fe3b3a9b82945fdd64affea4c6947e/aiohttp-3.11.18-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a35197013ed929c0aed5c9096de1fc5a9d336914d73ab3f9df14741668c0616c", size = 1670283, upload_time = "2025-04-21T09:42:06.053Z" }, + { url = "https://files.pythonhosted.org/packages/ec/79/b82a12f67009b377b6c07a26bdd1b81dab7409fc2902d669dbfa79e5ac02/aiohttp-3.11.18-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:540b8a1f3a424f1af63e0af2d2853a759242a1769f9f1ab053996a392bd70118", size = 1717231, upload_time = "2025-04-21T09:42:07.953Z" }, + { url = "https://files.pythonhosted.org/packages/a6/38/d5a1f28c3904a840642b9a12c286ff41fc66dfa28b87e204b1f242dbd5e6/aiohttp-3.11.18-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9e6710ebebfce2ba21cee6d91e7452d1125100f41b906fb5af3da8c78b764c1", size = 1769621, upload_time = "2025-04-21T09:42:09.855Z" }, + { url = "https://files.pythonhosted.org/packages/53/2d/deb3749ba293e716b5714dda06e257f123c5b8679072346b1eb28b766a0b/aiohttp-3.11.18-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8af2ef3b4b652ff109f98087242e2ab974b2b2b496304063585e3d78de0b000", size = 1678667, upload_time = "2025-04-21T09:42:11.741Z" }, + { url = "https://files.pythonhosted.org/packages/b8/a8/04b6e11683a54e104b984bd19a9790eb1ae5f50968b601bb202d0406f0ff/aiohttp-3.11.18-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:28c3f975e5ae3dbcbe95b7e3dcd30e51da561a0a0f2cfbcdea30fc1308d72137", size = 1601592, upload_time = "2025-04-21T09:42:14.137Z" }, + { url = "https://files.pythonhosted.org/packages/5e/9d/c33305ae8370b789423623f0e073d09ac775cd9c831ac0f11338b81c16e0/aiohttp-3.11.18-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c28875e316c7b4c3e745172d882d8a5c835b11018e33432d281211af35794a93", size = 1621679, upload_time = "2025-04-21T09:42:16.056Z" }, + { url = "https://files.pythonhosted.org/packages/56/45/8e9a27fff0538173d47ba60362823358f7a5f1653c6c30c613469f94150e/aiohttp-3.11.18-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:13cd38515568ae230e1ef6919e2e33da5d0f46862943fcda74e7e915096815f3", size = 1656878, upload_time = "2025-04-21T09:42:18.368Z" }, + { url = "https://files.pythonhosted.org/packages/84/5b/8c5378f10d7a5a46b10cb9161a3aac3eeae6dba54ec0f627fc4ddc4f2e72/aiohttp-3.11.18-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0e2a92101efb9f4c2942252c69c63ddb26d20f46f540c239ccfa5af865197bb8", size = 1620509, upload_time = "2025-04-21T09:42:20.141Z" }, + { url = "https://files.pythonhosted.org/packages/9e/2f/99dee7bd91c62c5ff0aa3c55f4ae7e1bc99c6affef780d7777c60c5b3735/aiohttp-3.11.18-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:e6d3e32b8753c8d45ac550b11a1090dd66d110d4ef805ffe60fa61495360b3b2", size = 1680263, upload_time = "2025-04-21T09:42:21.993Z" }, + { url = "https://files.pythonhosted.org/packages/03/0a/378745e4ff88acb83e2d5c884a4fe993a6e9f04600a4560ce0e9b19936e3/aiohttp-3.11.18-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ea4cf2488156e0f281f93cc2fd365025efcba3e2d217cbe3df2840f8c73db261", size = 1715014, upload_time = "2025-04-21T09:42:23.87Z" }, + { url = "https://files.pythonhosted.org/packages/f6/0b/b5524b3bb4b01e91bc4323aad0c2fcaebdf2f1b4d2eb22743948ba364958/aiohttp-3.11.18-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9d4df95ad522c53f2b9ebc07f12ccd2cb15550941e11a5bbc5ddca2ca56316d7", size = 1666614, upload_time = "2025-04-21T09:42:25.764Z" }, + { url = "https://files.pythonhosted.org/packages/c7/b7/3d7b036d5a4ed5a4c704e0754afe2eef24a824dfab08e6efbffb0f6dd36a/aiohttp-3.11.18-cp313-cp313-win32.whl", hash = "sha256:cdd1bbaf1e61f0d94aced116d6e95fe25942f7a5f42382195fd9501089db5d78", size = 411358, upload_time = "2025-04-21T09:42:27.558Z" }, + { url = "https://files.pythonhosted.org/packages/1e/3c/143831b32cd23b5263a995b2a1794e10aa42f8a895aae5074c20fda36c07/aiohttp-3.11.18-cp313-cp313-win_amd64.whl", hash = "sha256:bdd619c27e44382cf642223f11cfd4d795161362a5a1fc1fa3940397bc89db01", size = 437658, upload_time = "2025-04-21T09:42:29.209Z" }, +] + +[[package]] +name = "aiosignal" +version = "1.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "frozenlist" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ba/b5/6d55e80f6d8a08ce22b982eafa278d823b541c925f11ee774b0b9c43473d/aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54", size = 19424, upload_time = "2024-12-13T17:10:40.86Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/6a/bc7e17a3e87a2985d3e8f4da4cd0f481060eb78fb08596c42be62c90a4d9/aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5", size = 7597, upload_time = "2024-12-13T17:10:38.469Z" }, +] + +[[package]] +name = "alembic" +version = "1.14.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mako" }, + { name = "sqlalchemy" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/09/f844822e4e847a3f0bd41797f93c4674cd4d2462a3f6c459aa528cdf786e/alembic-1.14.1.tar.gz", hash = "sha256:496e888245a53adf1498fcab31713a469c65836f8de76e01399aa1c3e90dd213", size = 1918219, upload_time = "2025-01-19T23:15:30.12Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/7e/ac0991d1745f7d755fc1cd381b3990a45b404b4d008fc75e2a983516fbfe/alembic-1.14.1-py3-none-any.whl", hash = "sha256:1acdd7a3a478e208b0503cd73614d5e4c6efafa4e73518bb60e4f2846a37b1c5", size = 233565, upload_time = "2025-01-19T23:15:32.523Z" }, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload_time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload_time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "anyio" +version = "4.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "sniffio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949, upload_time = "2025-03-17T00:02:54.77Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/ee/48ca1a7c89ffec8b6a0c5d02b89c305671d5ffd8d3c94acf8b8c408575bb/anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c", size = 100916, upload_time = "2025-03-17T00:02:52.713Z" }, +] + +[[package]] +name = "apscheduler" +version = "3.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tzlocal" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4e/00/6d6814ddc19be2df62c8c898c4df6b5b1914f3bd024b780028caa392d186/apscheduler-3.11.0.tar.gz", hash = "sha256:4c622d250b0955a65d5d0eb91c33e6d43fd879834bf541e0a18661ae60460133", size = 107347, upload_time = "2024-11-24T19:39:26.463Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/ae/9a053dd9229c0fde6b1f1f33f609ccff1ee79ddda364c756a924c6d8563b/APScheduler-3.11.0-py3-none-any.whl", hash = "sha256:fc134ca32e50f5eadcc4938e3a4545ab19131435e851abb40b34d63d5141c6da", size = 64004, upload_time = "2024-11-24T19:39:24.442Z" }, +] + +[[package]] +name = "astunparse" +version = "1.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, + { name = "wheel" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/af/4182184d3c338792894f34a62672919db7ca008c89abee9b564dd34d8029/astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872", size = 18290, upload_time = "2019-12-22T18:12:13.129Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2b/03/13dde6512ad7b4557eb792fbcf0c653af6076b81e5941d36ec61f7ce6028/astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8", size = 12732, upload_time = "2019-12-22T18:12:11.297Z" }, +] + +[[package]] +name = "asyncpg" +version = "0.30.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746, upload_time = "2024-10-20T00:30:41.127Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/0e/f5d708add0d0b97446c402db7e8dd4c4183c13edaabe8a8500b411e7b495/asyncpg-0.30.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5e0511ad3dec5f6b4f7a9e063591d407eee66b88c14e2ea636f187da1dcfff6a", size = 674506, upload_time = "2024-10-20T00:29:27.988Z" }, + { url = "https://files.pythonhosted.org/packages/6a/a0/67ec9a75cb24a1d99f97b8437c8d56da40e6f6bd23b04e2f4ea5d5ad82ac/asyncpg-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:915aeb9f79316b43c3207363af12d0e6fd10776641a7de8a01212afd95bdf0ed", size = 645922, upload_time = "2024-10-20T00:29:29.391Z" }, + { url = "https://files.pythonhosted.org/packages/5c/d9/a7584f24174bd86ff1053b14bb841f9e714380c672f61c906eb01d8ec433/asyncpg-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c198a00cce9506fcd0bf219a799f38ac7a237745e1d27f0e1f66d3707c84a5a", size = 3079565, upload_time = "2024-10-20T00:29:30.832Z" }, + { url = "https://files.pythonhosted.org/packages/a0/d7/a4c0f9660e333114bdb04d1a9ac70db690dd4ae003f34f691139a5cbdae3/asyncpg-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3326e6d7381799e9735ca2ec9fd7be4d5fef5dcbc3cb555d8a463d8460607956", size = 3109962, upload_time = "2024-10-20T00:29:33.114Z" }, + { url = "https://files.pythonhosted.org/packages/3c/21/199fd16b5a981b1575923cbb5d9cf916fdc936b377e0423099f209e7e73d/asyncpg-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51da377487e249e35bd0859661f6ee2b81db11ad1f4fc036194bc9cb2ead5056", size = 3064791, upload_time = "2024-10-20T00:29:34.677Z" }, + { url = "https://files.pythonhosted.org/packages/77/52/0004809b3427534a0c9139c08c87b515f1c77a8376a50ae29f001e53962f/asyncpg-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc6d84136f9c4d24d358f3b02be4b6ba358abd09f80737d1ac7c444f36108454", size = 3188696, upload_time = "2024-10-20T00:29:36.389Z" }, + { url = "https://files.pythonhosted.org/packages/52/cb/fbad941cd466117be58b774a3f1cc9ecc659af625f028b163b1e646a55fe/asyncpg-0.30.0-cp311-cp311-win32.whl", hash = "sha256:574156480df14f64c2d76450a3f3aaaf26105869cad3865041156b38459e935d", size = 567358, upload_time = "2024-10-20T00:29:37.915Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0a/0a32307cf166d50e1ad120d9b81a33a948a1a5463ebfa5a96cc5606c0863/asyncpg-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:3356637f0bd830407b5597317b3cb3571387ae52ddc3bca6233682be88bbbc1f", size = 629375, upload_time = "2024-10-20T00:29:39.987Z" }, + { url = "https://files.pythonhosted.org/packages/4b/64/9d3e887bb7b01535fdbc45fbd5f0a8447539833b97ee69ecdbb7a79d0cb4/asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e", size = 673162, upload_time = "2024-10-20T00:29:41.88Z" }, + { url = "https://files.pythonhosted.org/packages/6e/eb/8b236663f06984f212a087b3e849731f917ab80f84450e943900e8ca4052/asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a", size = 637025, upload_time = "2024-10-20T00:29:43.352Z" }, + { url = "https://files.pythonhosted.org/packages/cc/57/2dc240bb263d58786cfaa60920779af6e8d32da63ab9ffc09f8312bd7a14/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3", size = 3496243, upload_time = "2024-10-20T00:29:44.922Z" }, + { url = "https://files.pythonhosted.org/packages/f4/40/0ae9d061d278b10713ea9021ef6b703ec44698fe32178715a501ac696c6b/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737", size = 3575059, upload_time = "2024-10-20T00:29:46.891Z" }, + { url = "https://files.pythonhosted.org/packages/c3/75/d6b895a35a2c6506952247640178e5f768eeb28b2e20299b6a6f1d743ba0/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a", size = 3473596, upload_time = "2024-10-20T00:29:49.201Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e7/3693392d3e168ab0aebb2d361431375bd22ffc7b4a586a0fc060d519fae7/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af", size = 3641632, upload_time = "2024-10-20T00:29:50.768Z" }, + { url = "https://files.pythonhosted.org/packages/32/ea/15670cea95745bba3f0352341db55f506a820b21c619ee66b7d12ea7867d/asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e", size = 560186, upload_time = "2024-10-20T00:29:52.394Z" }, + { url = "https://files.pythonhosted.org/packages/7e/6b/fe1fad5cee79ca5f5c27aed7bd95baee529c1bf8a387435c8ba4fe53d5c1/asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305", size = 621064, upload_time = "2024-10-20T00:29:53.757Z" }, + { url = "https://files.pythonhosted.org/packages/3a/22/e20602e1218dc07692acf70d5b902be820168d6282e69ef0d3cb920dc36f/asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70", size = 670373, upload_time = "2024-10-20T00:29:55.165Z" }, + { url = "https://files.pythonhosted.org/packages/3d/b3/0cf269a9d647852a95c06eb00b815d0b95a4eb4b55aa2d6ba680971733b9/asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3", size = 634745, upload_time = "2024-10-20T00:29:57.14Z" }, + { url = "https://files.pythonhosted.org/packages/8e/6d/a4f31bf358ce8491d2a31bfe0d7bcf25269e80481e49de4d8616c4295a34/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33", size = 3512103, upload_time = "2024-10-20T00:29:58.499Z" }, + { url = "https://files.pythonhosted.org/packages/96/19/139227a6e67f407b9c386cb594d9628c6c78c9024f26df87c912fabd4368/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4", size = 3592471, upload_time = "2024-10-20T00:30:00.354Z" }, + { url = "https://files.pythonhosted.org/packages/67/e4/ab3ca38f628f53f0fd28d3ff20edff1c975dd1cb22482e0061916b4b9a74/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4", size = 3496253, upload_time = "2024-10-20T00:30:02.794Z" }, + { url = "https://files.pythonhosted.org/packages/ef/5f/0bf65511d4eeac3a1f41c54034a492515a707c6edbc642174ae79034d3ba/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba", size = 3662720, upload_time = "2024-10-20T00:30:04.501Z" }, + { url = "https://files.pythonhosted.org/packages/e7/31/1513d5a6412b98052c3ed9158d783b1e09d0910f51fbe0e05f56cc370bc4/asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590", size = 560404, upload_time = "2024-10-20T00:30:06.537Z" }, + { url = "https://files.pythonhosted.org/packages/c8/a4/cec76b3389c4c5ff66301cd100fe88c318563ec8a520e0b2e792b5b84972/asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e", size = 621623, upload_time = "2024-10-20T00:30:09.024Z" }, +] + +[[package]] +name = "attrs" +version = "25.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032, upload_time = "2025-03-13T11:10:22.779Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload_time = "2025-03-13T11:10:21.14Z" }, +] + +[[package]] +name = "beautifulsoup4" +version = "4.13.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d8/e4/0c4c39e18fd76d6a628d4dd8da40543d136ce2d1752bd6eeeab0791f4d6b/beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195", size = 621067, upload_time = "2025-04-15T17:05:13.836Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285, upload_time = "2025-04-15T17:05:12.221Z" }, +] + +[[package]] +name = "boltons" +version = "25.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/63/54/71a94d8e02da9a865587fb3fff100cb0fc7aa9f4d5ed9ed3a591216ddcc7/boltons-25.0.0.tar.gz", hash = "sha256:e110fbdc30b7b9868cb604e3f71d4722dd8f4dcb4a5ddd06028ba8f1ab0b5ace", size = 246294, upload_time = "2025-02-03T05:57:59.129Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/45/7f/0e961cf3908bc4c1c3e027de2794f867c6c89fb4916fc7dba295a0e80a2d/boltons-25.0.0-py3-none-any.whl", hash = "sha256:dc9fb38bf28985715497d1b54d00b62ea866eca3938938ea9043e254a3a6ca62", size = 194210, upload_time = "2025-02-03T05:57:56.705Z" }, +] + +[[package]] +name = "bs4" +version = "0.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/aa/4acaf814ff901145da37332e05bb510452ebed97bc9602695059dd46ef39/bs4-0.0.2.tar.gz", hash = "sha256:a48685c58f50fe127722417bae83fe6badf500d54b55f7e39ffe43b798653925", size = 698, upload_time = "2024-01-17T18:15:47.371Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/bb/bf7aab772a159614954d84aa832c129624ba6c32faa559dfb200a534e50b/bs4-0.0.2-py2.py3-none-any.whl", hash = "sha256:abf8742c0805ef7f662dce4b51cca104cffe52b835238afc169142ab9b3fbccc", size = 1189, upload_time = "2024-01-17T18:15:48.613Z" }, +] + +[[package]] +name = "cachetools" +version = "5.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380, upload_time = "2025-02-20T21:01:19.524Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload_time = "2025-02-20T21:01:16.647Z" }, +] + +[[package]] +name = "certifi" +version = "2025.4.26" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/9e/c05b3920a3b7d20d3d3310465f50348e5b3694f4f88c6daf736eef3024c4/certifi-2025.4.26.tar.gz", hash = "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6", size = 160705, upload_time = "2025-04-26T02:12:29.51Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3", size = 159618, upload_time = "2025-04-26T02:12:27.662Z" }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/33/89c2ced2b67d1c2a61c19c6751aa8902d46ce3dacb23600a283619f5a12d/charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63", size = 126367, upload_time = "2025-05-02T08:34:42.01Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/85/4c40d00dcc6284a1c1ad5de5e0996b06f39d8232f1031cd23c2f5c07ee86/charset_normalizer-3.4.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:be1e352acbe3c78727a16a455126d9ff83ea2dfdcbc83148d2982305a04714c2", size = 198794, upload_time = "2025-05-02T08:32:11.945Z" }, + { url = "https://files.pythonhosted.org/packages/41/d9/7a6c0b9db952598e97e93cbdfcb91bacd89b9b88c7c983250a77c008703c/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa88ca0b1932e93f2d961bf3addbb2db902198dca337d88c89e1559e066e7645", size = 142846, upload_time = "2025-05-02T08:32:13.946Z" }, + { url = "https://files.pythonhosted.org/packages/66/82/a37989cda2ace7e37f36c1a8ed16c58cf48965a79c2142713244bf945c89/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d524ba3f1581b35c03cb42beebab4a13e6cdad7b36246bd22541fa585a56cccd", size = 153350, upload_time = "2025-05-02T08:32:15.873Z" }, + { url = "https://files.pythonhosted.org/packages/df/68/a576b31b694d07b53807269d05ec3f6f1093e9545e8607121995ba7a8313/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28a1005facc94196e1fb3e82a3d442a9d9110b8434fc1ded7a24a2983c9888d8", size = 145657, upload_time = "2025-05-02T08:32:17.283Z" }, + { url = "https://files.pythonhosted.org/packages/92/9b/ad67f03d74554bed3aefd56fe836e1623a50780f7c998d00ca128924a499/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdb20a30fe1175ecabed17cbf7812f7b804b8a315a25f24678bcdf120a90077f", size = 147260, upload_time = "2025-05-02T08:32:18.807Z" }, + { url = "https://files.pythonhosted.org/packages/a6/e6/8aebae25e328160b20e31a7e9929b1578bbdc7f42e66f46595a432f8539e/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f5d9ed7f254402c9e7d35d2f5972c9bbea9040e99cd2861bd77dc68263277c7", size = 149164, upload_time = "2025-05-02T08:32:20.333Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f2/b3c2f07dbcc248805f10e67a0262c93308cfa149a4cd3d1fe01f593e5fd2/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd387a49825780ff861998cd959767800d54f8308936b21025326de4b5a42b9", size = 144571, upload_time = "2025-05-02T08:32:21.86Z" }, + { url = "https://files.pythonhosted.org/packages/60/5b/c3f3a94bc345bc211622ea59b4bed9ae63c00920e2e8f11824aa5708e8b7/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f0aa37f3c979cf2546b73e8222bbfa3dc07a641585340179d768068e3455e544", size = 151952, upload_time = "2025-05-02T08:32:23.434Z" }, + { url = "https://files.pythonhosted.org/packages/e2/4d/ff460c8b474122334c2fa394a3f99a04cf11c646da895f81402ae54f5c42/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e70e990b2137b29dc5564715de1e12701815dacc1d056308e2b17e9095372a82", size = 155959, upload_time = "2025-05-02T08:32:24.993Z" }, + { url = "https://files.pythonhosted.org/packages/a2/2b/b964c6a2fda88611a1fe3d4c400d39c66a42d6c169c924818c848f922415/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0c8c57f84ccfc871a48a47321cfa49ae1df56cd1d965a09abe84066f6853b9c0", size = 153030, upload_time = "2025-05-02T08:32:26.435Z" }, + { url = "https://files.pythonhosted.org/packages/59/2e/d3b9811db26a5ebf444bc0fa4f4be5aa6d76fc6e1c0fd537b16c14e849b6/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6b66f92b17849b85cad91259efc341dce9c1af48e2173bf38a85c6329f1033e5", size = 148015, upload_time = "2025-05-02T08:32:28.376Z" }, + { url = "https://files.pythonhosted.org/packages/90/07/c5fd7c11eafd561bb51220d600a788f1c8d77c5eef37ee49454cc5c35575/charset_normalizer-3.4.2-cp311-cp311-win32.whl", hash = "sha256:daac4765328a919a805fa5e2720f3e94767abd632ae410a9062dff5412bae65a", size = 98106, upload_time = "2025-05-02T08:32:30.281Z" }, + { url = "https://files.pythonhosted.org/packages/a8/05/5e33dbef7e2f773d672b6d79f10ec633d4a71cd96db6673625838a4fd532/charset_normalizer-3.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:e53efc7c7cee4c1e70661e2e112ca46a575f90ed9ae3fef200f2a25e954f4b28", size = 105402, upload_time = "2025-05-02T08:32:32.191Z" }, + { url = "https://files.pythonhosted.org/packages/d7/a4/37f4d6035c89cac7930395a35cc0f1b872e652eaafb76a6075943754f095/charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7", size = 199936, upload_time = "2025-05-02T08:32:33.712Z" }, + { url = "https://files.pythonhosted.org/packages/ee/8a/1a5e33b73e0d9287274f899d967907cd0bf9c343e651755d9307e0dbf2b3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3", size = 143790, upload_time = "2025-05-02T08:32:35.768Z" }, + { url = "https://files.pythonhosted.org/packages/66/52/59521f1d8e6ab1482164fa21409c5ef44da3e9f653c13ba71becdd98dec3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a", size = 153924, upload_time = "2025-05-02T08:32:37.284Z" }, + { url = "https://files.pythonhosted.org/packages/86/2d/fb55fdf41964ec782febbf33cb64be480a6b8f16ded2dbe8db27a405c09f/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214", size = 146626, upload_time = "2025-05-02T08:32:38.803Z" }, + { url = "https://files.pythonhosted.org/packages/8c/73/6ede2ec59bce19b3edf4209d70004253ec5f4e319f9a2e3f2f15601ed5f7/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a", size = 148567, upload_time = "2025-05-02T08:32:40.251Z" }, + { url = "https://files.pythonhosted.org/packages/09/14/957d03c6dc343c04904530b6bef4e5efae5ec7d7990a7cbb868e4595ee30/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd", size = 150957, upload_time = "2025-05-02T08:32:41.705Z" }, + { url = "https://files.pythonhosted.org/packages/0d/c8/8174d0e5c10ccebdcb1b53cc959591c4c722a3ad92461a273e86b9f5a302/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981", size = 145408, upload_time = "2025-05-02T08:32:43.709Z" }, + { url = "https://files.pythonhosted.org/packages/58/aa/8904b84bc8084ac19dc52feb4f5952c6df03ffb460a887b42615ee1382e8/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c", size = 153399, upload_time = "2025-05-02T08:32:46.197Z" }, + { url = "https://files.pythonhosted.org/packages/c2/26/89ee1f0e264d201cb65cf054aca6038c03b1a0c6b4ae998070392a3ce605/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b", size = 156815, upload_time = "2025-05-02T08:32:48.105Z" }, + { url = "https://files.pythonhosted.org/packages/fd/07/68e95b4b345bad3dbbd3a8681737b4338ff2c9df29856a6d6d23ac4c73cb/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d", size = 154537, upload_time = "2025-05-02T08:32:49.719Z" }, + { url = "https://files.pythonhosted.org/packages/77/1a/5eefc0ce04affb98af07bc05f3bac9094513c0e23b0562d64af46a06aae4/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f", size = 149565, upload_time = "2025-05-02T08:32:51.404Z" }, + { url = "https://files.pythonhosted.org/packages/37/a0/2410e5e6032a174c95e0806b1a6585eb21e12f445ebe239fac441995226a/charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c", size = 98357, upload_time = "2025-05-02T08:32:53.079Z" }, + { url = "https://files.pythonhosted.org/packages/6c/4f/c02d5c493967af3eda9c771ad4d2bbc8df6f99ddbeb37ceea6e8716a32bc/charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e", size = 105776, upload_time = "2025-05-02T08:32:54.573Z" }, + { url = "https://files.pythonhosted.org/packages/ea/12/a93df3366ed32db1d907d7593a94f1fe6293903e3e92967bebd6950ed12c/charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0", size = 199622, upload_time = "2025-05-02T08:32:56.363Z" }, + { url = "https://files.pythonhosted.org/packages/04/93/bf204e6f344c39d9937d3c13c8cd5bbfc266472e51fc8c07cb7f64fcd2de/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf", size = 143435, upload_time = "2025-05-02T08:32:58.551Z" }, + { url = "https://files.pythonhosted.org/packages/22/2a/ea8a2095b0bafa6c5b5a55ffdc2f924455233ee7b91c69b7edfcc9e02284/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e", size = 153653, upload_time = "2025-05-02T08:33:00.342Z" }, + { url = "https://files.pythonhosted.org/packages/b6/57/1b090ff183d13cef485dfbe272e2fe57622a76694061353c59da52c9a659/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1", size = 146231, upload_time = "2025-05-02T08:33:02.081Z" }, + { url = "https://files.pythonhosted.org/packages/e2/28/ffc026b26f441fc67bd21ab7f03b313ab3fe46714a14b516f931abe1a2d8/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c", size = 148243, upload_time = "2025-05-02T08:33:04.063Z" }, + { url = "https://files.pythonhosted.org/packages/c0/0f/9abe9bd191629c33e69e47c6ef45ef99773320e9ad8e9cb08b8ab4a8d4cb/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691", size = 150442, upload_time = "2025-05-02T08:33:06.418Z" }, + { url = "https://files.pythonhosted.org/packages/67/7c/a123bbcedca91d5916c056407f89a7f5e8fdfce12ba825d7d6b9954a1a3c/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0", size = 145147, upload_time = "2025-05-02T08:33:08.183Z" }, + { url = "https://files.pythonhosted.org/packages/ec/fe/1ac556fa4899d967b83e9893788e86b6af4d83e4726511eaaad035e36595/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b", size = 153057, upload_time = "2025-05-02T08:33:09.986Z" }, + { url = "https://files.pythonhosted.org/packages/2b/ff/acfc0b0a70b19e3e54febdd5301a98b72fa07635e56f24f60502e954c461/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff", size = 156454, upload_time = "2025-05-02T08:33:11.814Z" }, + { url = "https://files.pythonhosted.org/packages/92/08/95b458ce9c740d0645feb0e96cea1f5ec946ea9c580a94adfe0b617f3573/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b", size = 154174, upload_time = "2025-05-02T08:33:13.707Z" }, + { url = "https://files.pythonhosted.org/packages/78/be/8392efc43487ac051eee6c36d5fbd63032d78f7728cb37aebcc98191f1ff/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148", size = 149166, upload_time = "2025-05-02T08:33:15.458Z" }, + { url = "https://files.pythonhosted.org/packages/44/96/392abd49b094d30b91d9fbda6a69519e95802250b777841cf3bda8fe136c/charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7", size = 98064, upload_time = "2025-05-02T08:33:17.06Z" }, + { url = "https://files.pythonhosted.org/packages/e9/b0/0200da600134e001d91851ddc797809e2fe0ea72de90e09bec5a2fbdaccb/charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980", size = 105641, upload_time = "2025-05-02T08:33:18.753Z" }, + { url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626, upload_time = "2025-05-02T08:34:40.053Z" }, +] + +[[package]] +name = "ckanapi" +version = "4.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "docopt" }, + { name = "python-slugify" }, + { name = "requests" }, + { name = "setuptools" }, + { name = "simplejson" }, + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/36/31/c0131cfe3cdae242699c2889d20016fbe2444dcaf86070ee03863d1035ba/ckanapi-4.8.tar.gz", hash = "sha256:3a98d81e6cb7480883eb1d031740205d3e94176376e9d284d218829d81d0afed", size = 37633, upload_time = "2024-04-04T15:46:09.451Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/ac/626837e55aeb17f8e3982128a25fbf5f7880a397039eb7a1b5cebaca7fa4/ckanapi-4.8-py3-none-any.whl", hash = "sha256:a6ac36b55321368cf39d70f701542276fe098484517e339adf18595f30c076b8", size = 46316, upload_time = "2024-04-04T15:46:07.725Z" }, +] + +[[package]] +name = "click" +version = "8.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/0f/62ca20172d4f87d93cf89665fbaedcd560ac48b465bd1d92bfc7ea6b0a41/click-8.2.0.tar.gz", hash = "sha256:f5452aeddd9988eefa20f90f05ab66f17fce1ee2a36907fd30b05bbb5953814d", size = 235857, upload_time = "2025-05-10T22:21:03.111Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/58/1f37bf81e3c689cc74ffa42102fa8915b59085f54a6e4a80bc6265c0f6bf/click-8.2.0-py3-none-any.whl", hash = "sha256:6b303f0b2aa85f1cb4e5303078fadcbcd4e476f114fab9b5007005711839325c", size = 102156, upload_time = "2025-05-10T22:21:01.352Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload_time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload_time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "data-source-identification" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "aiohttp" }, + { name = "alembic" }, + { name = "apscheduler" }, + { name = "asyncpg" }, + { name = "beautifulsoup4" }, + { name = "bs4" }, + { name = "ckanapi" }, + { name = "datasets" }, + { name = "docker" }, + { name = "environs" }, + { name = "fastapi", extra = ["standard"] }, + { name = "from-root" }, + { name = "google-api-python-client" }, + { name = "httpx" }, + { name = "huggingface-hub" }, + { name = "keras" }, + { name = "lxml" }, + { name = "marshmallow" }, + { name = "numpy" }, + { name = "openai" }, + { name = "pandas" }, + { name = "pdap-access-manager" }, + { name = "playwright" }, + { name = "psycopg", extra = ["binary"] }, + { name = "psycopg2-binary" }, + { name = "pydantic" }, + { name = "pyjwt" }, + { name = "python-dotenv" }, + { name = "requests" }, + { name = "sqlalchemy" }, + { name = "starlette" }, + { name = "tensorflow-cpu" }, + { name = "tensorflow-io-gcs-filesystem" }, + { name = "tqdm" }, + { name = "transformers" }, + { name = "urllib3" }, + { name = "uvicorn" }, +] + +[package.dev-dependencies] +dev = [ + { name = "deepdiff" }, + { name = "docker" }, + { name = "pendulum" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-mock" }, + { name = "pytest-timeout" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiohttp", specifier = "~=3.11.11" }, + { name = "alembic", specifier = "~=1.14.0" }, + { name = "apscheduler", specifier = "~=3.11.0" }, + { name = "asyncpg", specifier = "~=0.30.0" }, + { name = "beautifulsoup4", specifier = ">=4.12.3" }, + { name = "bs4", specifier = "~=0.0.2" }, + { name = "ckanapi", specifier = "~=4.8" }, + { name = "datasets", specifier = "~=2.19.1" }, + { name = "docker", specifier = "~=7.1.0" }, + { name = "environs", specifier = ">=14.1.1" }, + { name = "fastapi", extras = ["standard"], specifier = "~=0.115.6" }, + { name = "from-root", specifier = "~=1.3.0" }, + { name = "google-api-python-client", specifier = ">=2.156.0" }, + { name = "httpx", specifier = "~=0.28.1" }, + { name = "huggingface-hub", specifier = "~=0.28.1" }, + { name = "keras", specifier = "~=2.15.0" }, + { name = "lxml", specifier = "~=5.1.0" }, + { name = "marshmallow", specifier = "~=3.23.2" }, + { name = "numpy", specifier = "~=1.26.4" }, + { name = "openai", specifier = "~=1.60.1" }, + { name = "pandas", specifier = "~=2.2.3" }, + { name = "pdap-access-manager", specifier = "==0.3.5" }, + { name = "playwright", specifier = "~=1.49.1" }, + { name = "psycopg", extras = ["binary"], specifier = "~=3.1.20" }, + { name = "psycopg2-binary", specifier = "~=2.9.6" }, + { name = "pydantic", specifier = "~=2.11.3" }, + { name = "pyjwt", specifier = "~=2.10.1" }, + { name = "python-dotenv", specifier = "~=1.0.1" }, + { name = "requests", specifier = "~=2.32.3" }, + { name = "sqlalchemy", specifier = "~=2.0.36" }, + { name = "starlette", specifier = "~=0.45.3" }, + { name = "tensorflow-cpu", specifier = "~=2.15.1" }, + { name = "tensorflow-io-gcs-filesystem", specifier = "==0.31.0" }, + { name = "tqdm", specifier = ">=4.64.1" }, + { name = "transformers", specifier = "~=4.40.2" }, + { name = "urllib3", specifier = "~=1.26.18" }, + { name = "uvicorn", specifier = "~=0.34.0" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "deepdiff", specifier = ">=8.5.0" }, + { name = "docker", specifier = ">=7.1.0" }, + { name = "pendulum", specifier = ">=3.1.0" }, + { name = "pytest", specifier = ">=7.2.2" }, + { name = "pytest-asyncio", specifier = "~=0.25.2" }, + { name = "pytest-mock", specifier = "==3.12.0" }, + { name = "pytest-timeout", specifier = "~=2.3.1" }, +] + +[[package]] +name = "datasets" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "dill" }, + { name = "filelock" }, + { name = "fsspec", extra = ["http"] }, + { name = "huggingface-hub" }, + { name = "multiprocess" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "pyarrow" }, + { name = "pyarrow-hotfix" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "tqdm" }, + { name = "xxhash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/52/e7/6ee66732f74e4fb1c8915e58b3c253aded777ad0fa457f3f831dd0cd09b4/datasets-2.19.2.tar.gz", hash = "sha256:eccb82fb3bb5ee26ccc6d7a15b7f1f834e2cc4e59b7cff7733a003552bad51ef", size = 2215337, upload_time = "2024-06-03T05:11:44.756Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/59/46818ebeb708234a60e42ccf409d20709e482519d2aa450b501ddbba4594/datasets-2.19.2-py3-none-any.whl", hash = "sha256:e07ff15d75b1af75c87dd96323ba2a361128d495136652f37fd62f918d17bb4e", size = 542113, upload_time = "2024-06-03T05:11:41.151Z" }, +] + +[[package]] +name = "deepdiff" +version = "8.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "orderly-set" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0a/0f/9cd2624f7dcd755cbf1fa21fb7234541f19a1be96a56f387ec9053ebe220/deepdiff-8.5.0.tar.gz", hash = "sha256:a4dd3529fa8d4cd5b9cbb6e3ea9c95997eaa919ba37dac3966c1b8f872dc1cd1", size = 538517, upload_time = "2025-05-09T18:44:10.035Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/3b/2e0797200c51531a6d8c97a8e4c9fa6fb56de7e6e2a15c1c067b6b10a0b0/deepdiff-8.5.0-py3-none-any.whl", hash = "sha256:d4599db637f36a1c285f5fdfc2cd8d38bde8d8be8636b65ab5e425b67c54df26", size = 85112, upload_time = "2025-05-09T18:44:07.784Z" }, +] + +[[package]] +name = "dill" +version = "0.3.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/17/4d/ac7ffa80c69ea1df30a8aa11b3578692a5118e7cd1aa157e3ef73b092d15/dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca", size = 184847, upload_time = "2024-01-27T23:42:16.145Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7", size = 116252, upload_time = "2024-01-27T23:42:14.239Z" }, +] + +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload_time = "2023-12-24T09:54:32.31Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload_time = "2023-12-24T09:54:30.421Z" }, +] + +[[package]] +name = "dnspython" +version = "2.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/4a/263763cb2ba3816dd94b08ad3a33d5fdae34ecb856678773cc40a3605829/dnspython-2.7.0.tar.gz", hash = "sha256:ce9c432eda0dc91cf618a5cedf1a4e142651196bbcd2c80e89ed5a907e5cfaf1", size = 345197, upload_time = "2024-10-05T20:14:59.362Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/68/1b/e0a87d256e40e8c888847551b20a017a6b98139178505dc7ffb96f04e954/dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86", size = 313632, upload_time = "2024-10-05T20:14:57.687Z" }, +] + +[[package]] +name = "docker" +version = "7.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "requests" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/91/9b/4a2ea29aeba62471211598dac5d96825bb49348fa07e906ea930394a83ce/docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c", size = 117834, upload_time = "2024-05-23T11:13:57.216Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774, upload_time = "2024-05-23T11:13:55.01Z" }, +] + +[[package]] +name = "docopt" +version = "0.6.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/55/8f8cab2afd404cf578136ef2cc5dfb50baa1761b68c9da1fb1e4eed343c9/docopt-0.6.2.tar.gz", hash = "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491", size = 25901, upload_time = "2014-06-16T11:18:57.406Z" } + +[[package]] +name = "email-validator" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dnspython" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/ce/13508a1ec3f8bb981ae4ca79ea40384becc868bfae97fd1c942bb3a001b1/email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7", size = 48967, upload_time = "2024-06-20T11:30:30.034Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/ee/bf0adb559ad3c786f12bcbc9296b3f5675f529199bef03e2df281fa1fadb/email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631", size = 33521, upload_time = "2024-06-20T11:30:28.248Z" }, +] + +[[package]] +name = "environs" +version = "14.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "marshmallow" }, + { name = "python-dotenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/31/d3/e82bdbb8cc332e751f67a3f668c5d134d57f983497d9f3a59a375b6e8fd8/environs-14.1.1.tar.gz", hash = "sha256:03db7ee2d50ec697b68814cd175a3a05a7c7954804e4e419ca8b570dc5a835cf", size = 32050, upload_time = "2025-02-10T20:24:26.437Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/1c/ab9752f02d32d981d647c05822be9ff93809be8953dacea2da2bec9a9de9/environs-14.1.1-py3-none-any.whl", hash = "sha256:45bc56f1d53bbc59d8dd69bba97377dd88ec28b8229d81cedbd455b21789445b", size = 15566, upload_time = "2025-02-10T20:24:22.116Z" }, +] + +[[package]] +name = "fastapi" +version = "0.115.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "starlette" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f4/55/ae499352d82338331ca1e28c7f4a63bfd09479b16395dce38cf50a39e2c2/fastapi-0.115.12.tar.gz", hash = "sha256:1e2c2a2646905f9e83d32f04a3f86aff4a286669c6c950ca95b5fd68c2602681", size = 295236, upload_time = "2025-03-23T22:55:43.822Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/50/b3/b51f09c2ba432a576fe63758bddc81f78f0c6309d9e5c10d194313bf021e/fastapi-0.115.12-py3-none-any.whl", hash = "sha256:e94613d6c05e27be7ffebdd6ea5f388112e5e430c8f7d6494a9d1d88d43e814d", size = 95164, upload_time = "2025-03-23T22:55:42.101Z" }, +] + +[package.optional-dependencies] +standard = [ + { name = "email-validator" }, + { name = "fastapi-cli", extra = ["standard"] }, + { name = "httpx" }, + { name = "jinja2" }, + { name = "python-multipart" }, + { name = "uvicorn", extra = ["standard"] }, +] + +[[package]] +name = "fastapi-cli" +version = "0.0.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "rich-toolkit" }, + { name = "typer" }, + { name = "uvicorn", extra = ["standard"] }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fe/73/82a5831fbbf8ed75905bacf5b2d9d3dfd6f04d6968b29fe6f72a5ae9ceb1/fastapi_cli-0.0.7.tar.gz", hash = "sha256:02b3b65956f526412515907a0793c9094abd4bfb5457b389f645b0ea6ba3605e", size = 16753, upload_time = "2024-12-15T14:28:10.028Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/e6/5daefc851b514ce2287d8f5d358ae4341089185f78f3217a69d0ce3a390c/fastapi_cli-0.0.7-py3-none-any.whl", hash = "sha256:d549368ff584b2804336c61f192d86ddea080c11255f375959627911944804f4", size = 10705, upload_time = "2024-12-15T14:28:06.18Z" }, +] + +[package.optional-dependencies] +standard = [ + { name = "uvicorn", extra = ["standard"] }, +] + +[[package]] +name = "filelock" +version = "3.18.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075, upload_time = "2025-03-14T07:11:40.47Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload_time = "2025-03-14T07:11:39.145Z" }, +] + +[[package]] +name = "flatbuffers" +version = "25.2.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/30/eb5dce7994fc71a2f685d98ec33cc660c0a5887db5610137e60d8cbc4489/flatbuffers-25.2.10.tar.gz", hash = "sha256:97e451377a41262f8d9bd4295cc836133415cc03d8cb966410a4af92eb00d26e", size = 22170, upload_time = "2025-02-11T04:26:46.257Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/25/155f9f080d5e4bc0082edfda032ea2bc2b8fab3f4d25d46c1e9dd22a1a89/flatbuffers-25.2.10-py2.py3-none-any.whl", hash = "sha256:ebba5f4d5ea615af3f7fd70fc310636fbb2bbd1f566ac0a23d98dd412de50051", size = 30953, upload_time = "2025-02-11T04:26:44.484Z" }, +] + +[[package]] +name = "from-root" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/30/5259cfafc8372df008a5605ca19aba9d560285471ee043f39cbc5a7b7fa2/from_root-1.3.0.tar.gz", hash = "sha256:da1359f5faabca367f685cac927cb2f307bb35c488fdd0361f963d6f1cd2674f", size = 4858, upload_time = "2022-12-27T12:41:25.78Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/a8/451d0294d5d9ead3d26c25837df0588d1bcdd9235abf91e0ded629369921/from_root-1.3.0-py3-none-any.whl", hash = "sha256:7446a9b6481e668329cc11ad0a234fe4c83c63468c652e037d02846a75c726f8", size = 5489, upload_time = "2022-12-27T12:41:23.989Z" }, +] + +[[package]] +name = "frozenlist" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/f4/d744cba2da59b5c1d88823cf9e8a6c74e4659e2b27604ed973be2a0bf5ab/frozenlist-1.6.0.tar.gz", hash = "sha256:b99655c32c1c8e06d111e7f41c06c29a5318cb1835df23a45518e02a47c63b68", size = 42831, upload_time = "2025-04-17T22:38:53.099Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/53/b5/bc883b5296ec902115c00be161da93bf661199c465ec4c483feec6ea4c32/frozenlist-1.6.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ae8337990e7a45683548ffb2fee1af2f1ed08169284cd829cdd9a7fa7470530d", size = 160912, upload_time = "2025-04-17T22:36:17.235Z" }, + { url = "https://files.pythonhosted.org/packages/6f/93/51b058b563d0704b39c56baa222828043aafcac17fd3734bec5dbeb619b1/frozenlist-1.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8c952f69dd524558694818a461855f35d36cc7f5c0adddce37e962c85d06eac0", size = 124315, upload_time = "2025-04-17T22:36:18.735Z" }, + { url = "https://files.pythonhosted.org/packages/c9/e0/46cd35219428d350558b874d595e132d1c17a9471a1bd0d01d518a261e7c/frozenlist-1.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8f5fef13136c4e2dee91bfb9a44e236fff78fc2cd9f838eddfc470c3d7d90afe", size = 122230, upload_time = "2025-04-17T22:36:20.6Z" }, + { url = "https://files.pythonhosted.org/packages/d1/0f/7ad2ce928ad06d6dd26a61812b959ded573d3e9d0ee6109d96c2be7172e9/frozenlist-1.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:716bbba09611b4663ecbb7cd022f640759af8259e12a6ca939c0a6acd49eedba", size = 314842, upload_time = "2025-04-17T22:36:22.088Z" }, + { url = "https://files.pythonhosted.org/packages/34/76/98cbbd8a20a5c3359a2004ae5e5b216af84a150ccbad67c8f8f30fb2ea91/frozenlist-1.6.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7b8c4dc422c1a3ffc550b465090e53b0bf4839047f3e436a34172ac67c45d595", size = 304919, upload_time = "2025-04-17T22:36:24.247Z" }, + { url = "https://files.pythonhosted.org/packages/9a/fa/258e771ce3a44348c05e6b01dffc2bc67603fba95761458c238cd09a2c77/frozenlist-1.6.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b11534872256e1666116f6587a1592ef395a98b54476addb5e8d352925cb5d4a", size = 324074, upload_time = "2025-04-17T22:36:26.291Z" }, + { url = "https://files.pythonhosted.org/packages/d5/a4/047d861fd8c538210e12b208c0479912273f991356b6bdee7ea8356b07c9/frozenlist-1.6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c6eceb88aaf7221f75be6ab498dc622a151f5f88d536661af3ffc486245a626", size = 321292, upload_time = "2025-04-17T22:36:27.909Z" }, + { url = "https://files.pythonhosted.org/packages/c0/25/cfec8af758b4525676cabd36efcaf7102c1348a776c0d1ad046b8a7cdc65/frozenlist-1.6.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62c828a5b195570eb4b37369fcbbd58e96c905768d53a44d13044355647838ff", size = 301569, upload_time = "2025-04-17T22:36:29.448Z" }, + { url = "https://files.pythonhosted.org/packages/87/2f/0c819372fa9f0c07b153124bf58683b8d0ca7bb73ea5ccde9b9ef1745beb/frozenlist-1.6.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1c6bd2c6399920c9622362ce95a7d74e7f9af9bfec05fff91b8ce4b9647845a", size = 313625, upload_time = "2025-04-17T22:36:31.55Z" }, + { url = "https://files.pythonhosted.org/packages/50/5f/f0cf8b0fdedffdb76b3745aa13d5dbe404d63493cc211ce8250f2025307f/frozenlist-1.6.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:49ba23817781e22fcbd45fd9ff2b9b8cdb7b16a42a4851ab8025cae7b22e96d0", size = 312523, upload_time = "2025-04-17T22:36:33.078Z" }, + { url = "https://files.pythonhosted.org/packages/e1/6c/38c49108491272d3e84125bbabf2c2d0b304899b52f49f0539deb26ad18d/frozenlist-1.6.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:431ef6937ae0f853143e2ca67d6da76c083e8b1fe3df0e96f3802fd37626e606", size = 322657, upload_time = "2025-04-17T22:36:34.688Z" }, + { url = "https://files.pythonhosted.org/packages/bd/4b/3bd3bad5be06a9d1b04b1c22be80b5fe65b502992d62fab4bdb25d9366ee/frozenlist-1.6.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9d124b38b3c299ca68433597ee26b7819209cb8a3a9ea761dfe9db3a04bba584", size = 303414, upload_time = "2025-04-17T22:36:36.363Z" }, + { url = "https://files.pythonhosted.org/packages/5b/89/7e225a30bef6e85dbfe22622c24afe932e9444de3b40d58b1ea589a14ef8/frozenlist-1.6.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:118e97556306402e2b010da1ef21ea70cb6d6122e580da64c056b96f524fbd6a", size = 320321, upload_time = "2025-04-17T22:36:38.16Z" }, + { url = "https://files.pythonhosted.org/packages/22/72/7e3acef4dd9e86366cb8f4d8f28e852c2b7e116927e9722b31a6f71ea4b0/frozenlist-1.6.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:fb3b309f1d4086b5533cf7bbcf3f956f0ae6469664522f1bde4feed26fba60f1", size = 323975, upload_time = "2025-04-17T22:36:40.289Z" }, + { url = "https://files.pythonhosted.org/packages/d8/85/e5da03d20507e13c66ce612c9792b76811b7a43e3320cce42d95b85ac755/frozenlist-1.6.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54dece0d21dce4fdb188a1ffc555926adf1d1c516e493c2914d7c370e454bc9e", size = 316553, upload_time = "2025-04-17T22:36:42.045Z" }, + { url = "https://files.pythonhosted.org/packages/ac/8e/6c609cbd0580ae8a0661c408149f196aade7d325b1ae7adc930501b81acb/frozenlist-1.6.0-cp311-cp311-win32.whl", hash = "sha256:654e4ba1d0b2154ca2f096bed27461cf6160bc7f504a7f9a9ef447c293caf860", size = 115511, upload_time = "2025-04-17T22:36:44.067Z" }, + { url = "https://files.pythonhosted.org/packages/f2/13/a84804cfde6de12d44ed48ecbf777ba62b12ff09e761f76cdd1ff9e14bb1/frozenlist-1.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:3e911391bffdb806001002c1f860787542f45916c3baf764264a52765d5a5603", size = 120863, upload_time = "2025-04-17T22:36:45.465Z" }, + { url = "https://files.pythonhosted.org/packages/9c/8a/289b7d0de2fbac832ea80944d809759976f661557a38bb8e77db5d9f79b7/frozenlist-1.6.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c5b9e42ace7d95bf41e19b87cec8f262c41d3510d8ad7514ab3862ea2197bfb1", size = 160193, upload_time = "2025-04-17T22:36:47.382Z" }, + { url = "https://files.pythonhosted.org/packages/19/80/2fd17d322aec7f430549f0669f599997174f93ee17929ea5b92781ec902c/frozenlist-1.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ca9973735ce9f770d24d5484dcb42f68f135351c2fc81a7a9369e48cf2998a29", size = 123831, upload_time = "2025-04-17T22:36:49.401Z" }, + { url = "https://files.pythonhosted.org/packages/99/06/f5812da431273f78c6543e0b2f7de67dfd65eb0a433978b2c9c63d2205e4/frozenlist-1.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6ac40ec76041c67b928ca8aaffba15c2b2ee3f5ae8d0cb0617b5e63ec119ca25", size = 121862, upload_time = "2025-04-17T22:36:51.899Z" }, + { url = "https://files.pythonhosted.org/packages/d0/31/9e61c6b5fc493cf24d54881731204d27105234d09878be1a5983182cc4a5/frozenlist-1.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95b7a8a3180dfb280eb044fdec562f9b461614c0ef21669aea6f1d3dac6ee576", size = 316361, upload_time = "2025-04-17T22:36:53.402Z" }, + { url = "https://files.pythonhosted.org/packages/9d/55/22ca9362d4f0222324981470fd50192be200154d51509ee6eb9baa148e96/frozenlist-1.6.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c444d824e22da6c9291886d80c7d00c444981a72686e2b59d38b285617cb52c8", size = 307115, upload_time = "2025-04-17T22:36:55.016Z" }, + { url = "https://files.pythonhosted.org/packages/ae/39/4fff42920a57794881e7bb3898dc7f5f539261711ea411b43bba3cde8b79/frozenlist-1.6.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb52c8166499a8150bfd38478248572c924c003cbb45fe3bcd348e5ac7c000f9", size = 322505, upload_time = "2025-04-17T22:36:57.12Z" }, + { url = "https://files.pythonhosted.org/packages/55/f2/88c41f374c1e4cf0092a5459e5f3d6a1e17ed274c98087a76487783df90c/frozenlist-1.6.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b35298b2db9c2468106278537ee529719228950a5fdda686582f68f247d1dc6e", size = 322666, upload_time = "2025-04-17T22:36:58.735Z" }, + { url = "https://files.pythonhosted.org/packages/75/51/034eeb75afdf3fd03997856195b500722c0b1a50716664cde64e28299c4b/frozenlist-1.6.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d108e2d070034f9d57210f22fefd22ea0d04609fc97c5f7f5a686b3471028590", size = 302119, upload_time = "2025-04-17T22:37:00.512Z" }, + { url = "https://files.pythonhosted.org/packages/2b/a6/564ecde55ee633270a793999ef4fd1d2c2b32b5a7eec903b1012cb7c5143/frozenlist-1.6.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e1be9111cb6756868ac242b3c2bd1f09d9aea09846e4f5c23715e7afb647103", size = 316226, upload_time = "2025-04-17T22:37:02.102Z" }, + { url = "https://files.pythonhosted.org/packages/f1/c8/6c0682c32377f402b8a6174fb16378b683cf6379ab4d2827c580892ab3c7/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:94bb451c664415f02f07eef4ece976a2c65dcbab9c2f1705b7031a3a75349d8c", size = 312788, upload_time = "2025-04-17T22:37:03.578Z" }, + { url = "https://files.pythonhosted.org/packages/b6/b8/10fbec38f82c5d163ca1750bfff4ede69713badf236a016781cf1f10a0f0/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:d1a686d0b0949182b8faddea596f3fc11f44768d1f74d4cad70213b2e139d821", size = 325914, upload_time = "2025-04-17T22:37:05.213Z" }, + { url = "https://files.pythonhosted.org/packages/62/ca/2bf4f3a1bd40cdedd301e6ecfdbb291080d5afc5f9ce350c0739f773d6b9/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ea8e59105d802c5a38bdbe7362822c522230b3faba2aa35c0fa1765239b7dd70", size = 305283, upload_time = "2025-04-17T22:37:06.985Z" }, + { url = "https://files.pythonhosted.org/packages/09/64/20cc13ccf94abc2a1f482f74ad210703dc78a590d0b805af1c9aa67f76f9/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:abc4e880a9b920bc5020bf6a431a6bb40589d9bca3975c980495f63632e8382f", size = 319264, upload_time = "2025-04-17T22:37:08.618Z" }, + { url = "https://files.pythonhosted.org/packages/20/ff/86c6a2bbe98cfc231519f5e6d712a0898488ceac804a917ce014f32e68f6/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9a79713adfe28830f27a3c62f6b5406c37376c892b05ae070906f07ae4487046", size = 326482, upload_time = "2025-04-17T22:37:10.196Z" }, + { url = "https://files.pythonhosted.org/packages/2f/da/8e381f66367d79adca245d1d71527aac774e30e291d41ef161ce2d80c38e/frozenlist-1.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a0318c2068e217a8f5e3b85e35899f5a19e97141a45bb925bb357cfe1daf770", size = 318248, upload_time = "2025-04-17T22:37:12.284Z" }, + { url = "https://files.pythonhosted.org/packages/39/24/1a1976563fb476ab6f0fa9fefaac7616a4361dbe0461324f9fd7bf425dbe/frozenlist-1.6.0-cp312-cp312-win32.whl", hash = "sha256:853ac025092a24bb3bf09ae87f9127de9fe6e0c345614ac92536577cf956dfcc", size = 115161, upload_time = "2025-04-17T22:37:13.902Z" }, + { url = "https://files.pythonhosted.org/packages/80/2e/fb4ed62a65f8cd66044706b1013f0010930d8cbb0729a2219561ea075434/frozenlist-1.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:2bdfe2d7e6c9281c6e55523acd6c2bf77963cb422fdc7d142fb0cb6621b66878", size = 120548, upload_time = "2025-04-17T22:37:15.326Z" }, + { url = "https://files.pythonhosted.org/packages/6f/e5/04c7090c514d96ca00887932417f04343ab94904a56ab7f57861bf63652d/frozenlist-1.6.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:1d7fb014fe0fbfee3efd6a94fc635aeaa68e5e1720fe9e57357f2e2c6e1a647e", size = 158182, upload_time = "2025-04-17T22:37:16.837Z" }, + { url = "https://files.pythonhosted.org/packages/e9/8f/60d0555c61eec855783a6356268314d204137f5e0c53b59ae2fc28938c99/frozenlist-1.6.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01bcaa305a0fdad12745502bfd16a1c75b14558dabae226852f9159364573117", size = 122838, upload_time = "2025-04-17T22:37:18.352Z" }, + { url = "https://files.pythonhosted.org/packages/5a/a7/d0ec890e3665b4b3b7c05dc80e477ed8dc2e2e77719368e78e2cd9fec9c8/frozenlist-1.6.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8b314faa3051a6d45da196a2c495e922f987dc848e967d8cfeaee8a0328b1cd4", size = 120980, upload_time = "2025-04-17T22:37:19.857Z" }, + { url = "https://files.pythonhosted.org/packages/cc/19/9b355a5e7a8eba903a008579964192c3e427444752f20b2144b10bb336df/frozenlist-1.6.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da62fecac21a3ee10463d153549d8db87549a5e77eefb8c91ac84bb42bb1e4e3", size = 305463, upload_time = "2025-04-17T22:37:21.328Z" }, + { url = "https://files.pythonhosted.org/packages/9c/8d/5b4c758c2550131d66935ef2fa700ada2461c08866aef4229ae1554b93ca/frozenlist-1.6.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d1eb89bf3454e2132e046f9599fbcf0a4483ed43b40f545551a39316d0201cd1", size = 297985, upload_time = "2025-04-17T22:37:23.55Z" }, + { url = "https://files.pythonhosted.org/packages/48/2c/537ec09e032b5865715726b2d1d9813e6589b571d34d01550c7aeaad7e53/frozenlist-1.6.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d18689b40cb3936acd971f663ccb8e2589c45db5e2c5f07e0ec6207664029a9c", size = 311188, upload_time = "2025-04-17T22:37:25.221Z" }, + { url = "https://files.pythonhosted.org/packages/31/2f/1aa74b33f74d54817055de9a4961eff798f066cdc6f67591905d4fc82a84/frozenlist-1.6.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e67ddb0749ed066b1a03fba812e2dcae791dd50e5da03be50b6a14d0c1a9ee45", size = 311874, upload_time = "2025-04-17T22:37:26.791Z" }, + { url = "https://files.pythonhosted.org/packages/bf/f0/cfec18838f13ebf4b37cfebc8649db5ea71a1b25dacd691444a10729776c/frozenlist-1.6.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc5e64626e6682638d6e44398c9baf1d6ce6bc236d40b4b57255c9d3f9761f1f", size = 291897, upload_time = "2025-04-17T22:37:28.958Z" }, + { url = "https://files.pythonhosted.org/packages/ea/a5/deb39325cbbea6cd0a46db8ccd76150ae2fcbe60d63243d9df4a0b8c3205/frozenlist-1.6.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:437cfd39564744ae32ad5929e55b18ebd88817f9180e4cc05e7d53b75f79ce85", size = 305799, upload_time = "2025-04-17T22:37:30.889Z" }, + { url = "https://files.pythonhosted.org/packages/78/22/6ddec55c5243a59f605e4280f10cee8c95a449f81e40117163383829c241/frozenlist-1.6.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:62dd7df78e74d924952e2feb7357d826af8d2f307557a779d14ddf94d7311be8", size = 302804, upload_time = "2025-04-17T22:37:32.489Z" }, + { url = "https://files.pythonhosted.org/packages/5d/b7/d9ca9bab87f28855063c4d202936800219e39db9e46f9fb004d521152623/frozenlist-1.6.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:a66781d7e4cddcbbcfd64de3d41a61d6bdde370fc2e38623f30b2bd539e84a9f", size = 316404, upload_time = "2025-04-17T22:37:34.59Z" }, + { url = "https://files.pythonhosted.org/packages/a6/3a/1255305db7874d0b9eddb4fe4a27469e1fb63720f1fc6d325a5118492d18/frozenlist-1.6.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:482fe06e9a3fffbcd41950f9d890034b4a54395c60b5e61fae875d37a699813f", size = 295572, upload_time = "2025-04-17T22:37:36.337Z" }, + { url = "https://files.pythonhosted.org/packages/2a/f2/8d38eeee39a0e3a91b75867cc102159ecccf441deb6ddf67be96d3410b84/frozenlist-1.6.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:e4f9373c500dfc02feea39f7a56e4f543e670212102cc2eeb51d3a99c7ffbde6", size = 307601, upload_time = "2025-04-17T22:37:37.923Z" }, + { url = "https://files.pythonhosted.org/packages/38/04/80ec8e6b92f61ef085422d7b196822820404f940950dde5b2e367bede8bc/frozenlist-1.6.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e69bb81de06827147b7bfbaeb284d85219fa92d9f097e32cc73675f279d70188", size = 314232, upload_time = "2025-04-17T22:37:39.669Z" }, + { url = "https://files.pythonhosted.org/packages/3a/58/93b41fb23e75f38f453ae92a2f987274c64637c450285577bd81c599b715/frozenlist-1.6.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7613d9977d2ab4a9141dde4a149f4357e4065949674c5649f920fec86ecb393e", size = 308187, upload_time = "2025-04-17T22:37:41.662Z" }, + { url = "https://files.pythonhosted.org/packages/6a/a2/e64df5c5aa36ab3dee5a40d254f3e471bb0603c225f81664267281c46a2d/frozenlist-1.6.0-cp313-cp313-win32.whl", hash = "sha256:4def87ef6d90429f777c9d9de3961679abf938cb6b7b63d4a7eb8a268babfce4", size = 114772, upload_time = "2025-04-17T22:37:43.132Z" }, + { url = "https://files.pythonhosted.org/packages/a0/77/fead27441e749b2d574bb73d693530d59d520d4b9e9679b8e3cb779d37f2/frozenlist-1.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:37a8a52c3dfff01515e9bbbee0e6063181362f9de3db2ccf9bc96189b557cbfd", size = 119847, upload_time = "2025-04-17T22:37:45.118Z" }, + { url = "https://files.pythonhosted.org/packages/df/bd/cc6d934991c1e5d9cafda83dfdc52f987c7b28343686aef2e58a9cf89f20/frozenlist-1.6.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:46138f5a0773d064ff663d273b309b696293d7a7c00a0994c5c13a5078134b64", size = 174937, upload_time = "2025-04-17T22:37:46.635Z" }, + { url = "https://files.pythonhosted.org/packages/f2/a2/daf945f335abdbfdd5993e9dc348ef4507436936ab3c26d7cfe72f4843bf/frozenlist-1.6.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:f88bc0a2b9c2a835cb888b32246c27cdab5740059fb3688852bf91e915399b91", size = 136029, upload_time = "2025-04-17T22:37:48.192Z" }, + { url = "https://files.pythonhosted.org/packages/51/65/4c3145f237a31247c3429e1c94c384d053f69b52110a0d04bfc8afc55fb2/frozenlist-1.6.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:777704c1d7655b802c7850255639672e90e81ad6fa42b99ce5ed3fbf45e338dd", size = 134831, upload_time = "2025-04-17T22:37:50.485Z" }, + { url = "https://files.pythonhosted.org/packages/77/38/03d316507d8dea84dfb99bdd515ea245628af964b2bf57759e3c9205cc5e/frozenlist-1.6.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85ef8d41764c7de0dcdaf64f733a27352248493a85a80661f3c678acd27e31f2", size = 392981, upload_time = "2025-04-17T22:37:52.558Z" }, + { url = "https://files.pythonhosted.org/packages/37/02/46285ef9828f318ba400a51d5bb616ded38db8466836a9cfa39f3903260b/frozenlist-1.6.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:da5cb36623f2b846fb25009d9d9215322318ff1c63403075f812b3b2876c8506", size = 371999, upload_time = "2025-04-17T22:37:54.092Z" }, + { url = "https://files.pythonhosted.org/packages/0d/64/1212fea37a112c3c5c05bfb5f0a81af4836ce349e69be75af93f99644da9/frozenlist-1.6.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cbb56587a16cf0fb8acd19e90ff9924979ac1431baea8681712716a8337577b0", size = 392200, upload_time = "2025-04-17T22:37:55.951Z" }, + { url = "https://files.pythonhosted.org/packages/81/ce/9a6ea1763e3366e44a5208f76bf37c76c5da570772375e4d0be85180e588/frozenlist-1.6.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c6154c3ba59cda3f954c6333025369e42c3acd0c6e8b6ce31eb5c5b8116c07e0", size = 390134, upload_time = "2025-04-17T22:37:57.633Z" }, + { url = "https://files.pythonhosted.org/packages/bc/36/939738b0b495b2c6d0c39ba51563e453232813042a8d908b8f9544296c29/frozenlist-1.6.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e8246877afa3f1ae5c979fe85f567d220f86a50dc6c493b9b7d8191181ae01e", size = 365208, upload_time = "2025-04-17T22:37:59.742Z" }, + { url = "https://files.pythonhosted.org/packages/b4/8b/939e62e93c63409949c25220d1ba8e88e3960f8ef6a8d9ede8f94b459d27/frozenlist-1.6.0-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b0f6cce16306d2e117cf9db71ab3a9e8878a28176aeaf0dbe35248d97b28d0c", size = 385548, upload_time = "2025-04-17T22:38:01.416Z" }, + { url = "https://files.pythonhosted.org/packages/62/38/22d2873c90102e06a7c5a3a5b82ca47e393c6079413e8a75c72bff067fa8/frozenlist-1.6.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1b8e8cd8032ba266f91136d7105706ad57770f3522eac4a111d77ac126a25a9b", size = 391123, upload_time = "2025-04-17T22:38:03.049Z" }, + { url = "https://files.pythonhosted.org/packages/44/78/63aaaf533ee0701549500f6d819be092c6065cb5c577edb70c09df74d5d0/frozenlist-1.6.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:e2ada1d8515d3ea5378c018a5f6d14b4994d4036591a52ceaf1a1549dec8e1ad", size = 394199, upload_time = "2025-04-17T22:38:04.776Z" }, + { url = "https://files.pythonhosted.org/packages/54/45/71a6b48981d429e8fbcc08454dc99c4c2639865a646d549812883e9c9dd3/frozenlist-1.6.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:cdb2c7f071e4026c19a3e32b93a09e59b12000751fc9b0b7758da899e657d215", size = 373854, upload_time = "2025-04-17T22:38:06.576Z" }, + { url = "https://files.pythonhosted.org/packages/3f/f3/dbf2a5e11736ea81a66e37288bf9f881143a7822b288a992579ba1b4204d/frozenlist-1.6.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:03572933a1969a6d6ab509d509e5af82ef80d4a5d4e1e9f2e1cdd22c77a3f4d2", size = 395412, upload_time = "2025-04-17T22:38:08.197Z" }, + { url = "https://files.pythonhosted.org/packages/b3/f1/c63166806b331f05104d8ea385c4acd511598568b1f3e4e8297ca54f2676/frozenlist-1.6.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:77effc978947548b676c54bbd6a08992759ea6f410d4987d69feea9cd0919911", size = 394936, upload_time = "2025-04-17T22:38:10.056Z" }, + { url = "https://files.pythonhosted.org/packages/ef/ea/4f3e69e179a430473eaa1a75ff986526571215fefc6b9281cdc1f09a4eb8/frozenlist-1.6.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a2bda8be77660ad4089caf2223fdbd6db1858462c4b85b67fbfa22102021e497", size = 391459, upload_time = "2025-04-17T22:38:11.826Z" }, + { url = "https://files.pythonhosted.org/packages/d3/c3/0fc2c97dea550df9afd072a37c1e95421652e3206bbeaa02378b24c2b480/frozenlist-1.6.0-cp313-cp313t-win32.whl", hash = "sha256:a4d96dc5bcdbd834ec6b0f91027817214216b5b30316494d2b1aebffb87c534f", size = 128797, upload_time = "2025-04-17T22:38:14.013Z" }, + { url = "https://files.pythonhosted.org/packages/ae/f5/79c9320c5656b1965634fe4be9c82b12a3305bdbc58ad9cb941131107b20/frozenlist-1.6.0-cp313-cp313t-win_amd64.whl", hash = "sha256:e18036cb4caa17ea151fd5f3d70be9d354c99eb8cf817a3ccde8a7873b074348", size = 134709, upload_time = "2025-04-17T22:38:15.551Z" }, + { url = "https://files.pythonhosted.org/packages/71/3e/b04a0adda73bd52b390d730071c0d577073d3d26740ee1bad25c3ad0f37b/frozenlist-1.6.0-py3-none-any.whl", hash = "sha256:535eec9987adb04701266b92745d6cdcef2e77669299359c3009c3404dd5d191", size = 12404, upload_time = "2025-04-17T22:38:51.668Z" }, +] + +[[package]] +name = "fsspec" +version = "2024.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/b8/e3ba21f03c00c27adc9a8cd1cab8adfb37b6024757133924a9a4eab63a83/fsspec-2024.3.1.tar.gz", hash = "sha256:f39780e282d7d117ffb42bb96992f8a90795e4d0fb0f661a70ca39fe9c43ded9", size = 170742, upload_time = "2024-03-18T19:35:13.995Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/93/6d/66d48b03460768f523da62a57a7e14e5e95fdf339d79e996ce3cecda2cdb/fsspec-2024.3.1-py3-none-any.whl", hash = "sha256:918d18d41bf73f0e2b261824baeb1b124bcf771767e3a26425cd7dec3332f512", size = 171991, upload_time = "2024-03-18T19:35:11.259Z" }, +] + +[package.optional-dependencies] +http = [ + { name = "aiohttp" }, +] + +[[package]] +name = "gast" +version = "0.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3c/14/c566f5ca00c115db7725263408ff952b8ae6d6a4e792ef9c84e77d9af7a1/gast-0.6.0.tar.gz", hash = "sha256:88fc5300d32c7ac6ca7b515310862f71e6fdf2c029bbec7c66c0f5dd47b6b1fb", size = 27708, upload_time = "2024-06-27T20:31:49.527Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/61/8001b38461d751cd1a0c3a6ae84346796a5758123f3ed97a1b121dfbf4f3/gast-0.6.0-py3-none-any.whl", hash = "sha256:52b182313f7330389f72b069ba00f174cfe2a06411099547288839c6cbafbd54", size = 21173, upload_time = "2024-07-09T13:15:15.615Z" }, +] + +[[package]] +name = "google-api-core" +version = "2.24.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "googleapis-common-protos" }, + { name = "proto-plus" }, + { name = "protobuf" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/09/5c/085bcb872556934bb119e5e09de54daa07873f6866b8f0303c49e72287f7/google_api_core-2.24.2.tar.gz", hash = "sha256:81718493daf06d96d6bc76a91c23874dbf2fac0adbbf542831b805ee6e974696", size = 163516, upload_time = "2025-03-10T15:55:26.201Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/95/f472d85adab6e538da2025dfca9e976a0d125cc0af2301f190e77b76e51c/google_api_core-2.24.2-py3-none-any.whl", hash = "sha256:810a63ac95f3c441b7c0e43d344e372887f62ce9071ba972eacf32672e072de9", size = 160061, upload_time = "2025-03-10T15:55:24.386Z" }, +] + +[[package]] +name = "google-api-python-client" +version = "2.169.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-api-core" }, + { name = "google-auth" }, + { name = "google-auth-httplib2" }, + { name = "httplib2" }, + { name = "uritemplate" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4f/e6/787c24738fc7c99de9289abe60bd64591800ae1cdf60db7b87e0e6ef9cdd/google_api_python_client-2.169.0.tar.gz", hash = "sha256:0585bb97bd5f5bf3ed8d4bf624593e4c5a14d06c811d1952b07a1f94b4d12c51", size = 12811341, upload_time = "2025-04-29T15:46:05.603Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2d/bd/6aa93c38756cc9fc63262e0dc3d3f1ff7241ce6f413a25ad6e4a9c98b473/google_api_python_client-2.169.0-py3-none-any.whl", hash = "sha256:dae3e882dc0e6f28e60cf09c1f13fedfd881db84f824dd418aa9e44def2fe00d", size = 13323742, upload_time = "2025-04-29T15:46:02.521Z" }, +] + +[[package]] +name = "google-auth" +version = "2.40.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cachetools" }, + { name = "pyasn1-modules" }, + { name = "rsa" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/94/a5/38c21d0e731bb716cffcf987bd9a3555cb95877ab4b616cfb96939933f20/google_auth-2.40.1.tar.gz", hash = "sha256:58f0e8416a9814c1d86c9b7f6acf6816b51aba167b2c76821965271bac275540", size = 280975, upload_time = "2025-05-07T01:04:55.3Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/b1/1272c6e80847ba5349f5ccb7574596393d1e222543f5003cb810865c3575/google_auth-2.40.1-py2.py3-none-any.whl", hash = "sha256:ed4cae4f5c46b41bae1d19c036e06f6c371926e97b19e816fc854eff811974ee", size = 216101, upload_time = "2025-05-07T01:04:53.612Z" }, +] + +[[package]] +name = "google-auth-httplib2" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "httplib2" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/56/be/217a598a818567b28e859ff087f347475c807a5649296fb5a817c58dacef/google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05", size = 10842, upload_time = "2023-12-12T17:40:30.722Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/8a/fe34d2f3f9470a27b01c9e76226965863f153d5fbe276f83608562e49c04/google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d", size = 9253, upload_time = "2023-12-12T17:40:13.055Z" }, +] + +[[package]] +name = "google-auth-oauthlib" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "requests-oauthlib" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fb/87/e10bf24f7bcffc1421b84d6f9c3377c30ec305d082cd737ddaa6d8f77f7c/google_auth_oauthlib-1.2.2.tar.gz", hash = "sha256:11046fb8d3348b296302dd939ace8af0a724042e8029c1b872d87fabc9f41684", size = 20955, upload_time = "2025-04-22T16:40:29.172Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/84/40ee070be95771acd2f4418981edb834979424565c3eec3cd88b6aa09d24/google_auth_oauthlib-1.2.2-py3-none-any.whl", hash = "sha256:fd619506f4b3908b5df17b65f39ca8d66ea56986e5472eb5978fd8f3786f00a2", size = 19072, upload_time = "2025-04-22T16:40:28.174Z" }, +] + +[[package]] +name = "google-pasta" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/35/4a/0bd53b36ff0323d10d5f24ebd67af2de10a1117f5cf4d7add90df92756f1/google-pasta-0.2.0.tar.gz", hash = "sha256:c9f2c8dfc8f96d0d5808299920721be30c9eec37f2389f28904f454565c8a16e", size = 40430, upload_time = "2020-03-13T18:57:50.34Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/de/c648ef6835192e6e2cc03f40b19eeda4382c49b5bafb43d88b931c4c74ac/google_pasta-0.2.0-py3-none-any.whl", hash = "sha256:b32482794a366b5366a32c92a9a9201b107821889935a02b3e51f6b432ea84ed", size = 57471, upload_time = "2020-03-13T18:57:48.872Z" }, +] + +[[package]] +name = "googleapis-common-protos" +version = "1.70.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/39/24/33db22342cf4a2ea27c9955e6713140fedd51e8b141b5ce5260897020f1a/googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257", size = 145903, upload_time = "2025-04-14T10:17:02.924Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/86/f1/62a193f0227cf15a920390abe675f386dec35f7ae3ffe6da582d3ade42c7/googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8", size = 294530, upload_time = "2025-04-14T10:17:01.271Z" }, +] + +[[package]] +name = "greenlet" +version = "3.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2f/ff/df5fede753cc10f6a5be0931204ea30c35fa2f2ea7a35b25bdaf4fe40e46/greenlet-3.1.1.tar.gz", hash = "sha256:4ce3ac6cdb6adf7946475d7ef31777c26d94bccc377e070a7986bd2d5c515467", size = 186022, upload_time = "2024-09-20T18:21:04.506Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/28/62/1c2665558618553c42922ed47a4e6d6527e2fa3516a8256c2f431c5d0441/greenlet-3.1.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:e4d333e558953648ca09d64f13e6d8f0523fa705f51cae3f03b5983489958c70", size = 272479, upload_time = "2024-09-20T17:07:22.332Z" }, + { url = "https://files.pythonhosted.org/packages/76/9d/421e2d5f07285b6e4e3a676b016ca781f63cfe4a0cd8eaecf3fd6f7a71ae/greenlet-3.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09fc016b73c94e98e29af67ab7b9a879c307c6731a2c9da0db5a7d9b7edd1159", size = 640404, upload_time = "2024-09-20T17:36:45.588Z" }, + { url = "https://files.pythonhosted.org/packages/e5/de/6e05f5c59262a584e502dd3d261bbdd2c97ab5416cc9c0b91ea38932a901/greenlet-3.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d5e975ca70269d66d17dd995dafc06f1b06e8cb1ec1e9ed54c1d1e4a7c4cf26e", size = 652813, upload_time = "2024-09-20T17:39:19.052Z" }, + { url = "https://files.pythonhosted.org/packages/49/93/d5f93c84241acdea15a8fd329362c2c71c79e1a507c3f142a5d67ea435ae/greenlet-3.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b2813dc3de8c1ee3f924e4d4227999285fd335d1bcc0d2be6dc3f1f6a318ec1", size = 648517, upload_time = "2024-09-20T17:44:24.101Z" }, + { url = "https://files.pythonhosted.org/packages/15/85/72f77fc02d00470c86a5c982b8daafdf65d38aefbbe441cebff3bf7037fc/greenlet-3.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e347b3bfcf985a05e8c0b7d462ba6f15b1ee1c909e2dcad795e49e91b152c383", size = 647831, upload_time = "2024-09-20T17:08:40.577Z" }, + { url = "https://files.pythonhosted.org/packages/f7/4b/1c9695aa24f808e156c8f4813f685d975ca73c000c2a5056c514c64980f6/greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e8f8c9cb53cdac7ba9793c276acd90168f416b9ce36799b9b885790f8ad6c0a", size = 602413, upload_time = "2024-09-20T17:08:31.728Z" }, + { url = "https://files.pythonhosted.org/packages/76/70/ad6e5b31ef330f03b12559d19fda2606a522d3849cde46b24f223d6d1619/greenlet-3.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62ee94988d6b4722ce0028644418d93a52429e977d742ca2ccbe1c4f4a792511", size = 1129619, upload_time = "2024-09-20T17:44:14.222Z" }, + { url = "https://files.pythonhosted.org/packages/f4/fb/201e1b932e584066e0f0658b538e73c459b34d44b4bd4034f682423bc801/greenlet-3.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1776fd7f989fc6b8d8c8cb8da1f6b82c5814957264d1f6cf818d475ec2bf6395", size = 1155198, upload_time = "2024-09-20T17:09:23.903Z" }, + { url = "https://files.pythonhosted.org/packages/12/da/b9ed5e310bb8b89661b80cbcd4db5a067903bbcd7fc854923f5ebb4144f0/greenlet-3.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:48ca08c771c268a768087b408658e216133aecd835c0ded47ce955381105ba39", size = 298930, upload_time = "2024-09-20T17:25:18.656Z" }, + { url = "https://files.pythonhosted.org/packages/7d/ec/bad1ac26764d26aa1353216fcbfa4670050f66d445448aafa227f8b16e80/greenlet-3.1.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:4afe7ea89de619adc868e087b4d2359282058479d7cfb94970adf4b55284574d", size = 274260, upload_time = "2024-09-20T17:08:07.301Z" }, + { url = "https://files.pythonhosted.org/packages/66/d4/c8c04958870f482459ab5956c2942c4ec35cac7fe245527f1039837c17a9/greenlet-3.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f406b22b7c9a9b4f8aa9d2ab13d6ae0ac3e85c9a809bd590ad53fed2bf70dc79", size = 649064, upload_time = "2024-09-20T17:36:47.628Z" }, + { url = "https://files.pythonhosted.org/packages/51/41/467b12a8c7c1303d20abcca145db2be4e6cd50a951fa30af48b6ec607581/greenlet-3.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c3a701fe5a9695b238503ce5bbe8218e03c3bcccf7e204e455e7462d770268aa", size = 663420, upload_time = "2024-09-20T17:39:21.258Z" }, + { url = "https://files.pythonhosted.org/packages/27/8f/2a93cd9b1e7107d5c7b3b7816eeadcac2ebcaf6d6513df9abaf0334777f6/greenlet-3.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2846930c65b47d70b9d178e89c7e1a69c95c1f68ea5aa0a58646b7a96df12441", size = 658035, upload_time = "2024-09-20T17:44:26.501Z" }, + { url = "https://files.pythonhosted.org/packages/57/5c/7c6f50cb12be092e1dccb2599be5a942c3416dbcfb76efcf54b3f8be4d8d/greenlet-3.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99cfaa2110534e2cf3ba31a7abcac9d328d1d9f1b95beede58294a60348fba36", size = 660105, upload_time = "2024-09-20T17:08:42.048Z" }, + { url = "https://files.pythonhosted.org/packages/f1/66/033e58a50fd9ec9df00a8671c74f1f3a320564c6415a4ed82a1c651654ba/greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1443279c19fca463fc33e65ef2a935a5b09bb90f978beab37729e1c3c6c25fe9", size = 613077, upload_time = "2024-09-20T17:08:33.707Z" }, + { url = "https://files.pythonhosted.org/packages/19/c5/36384a06f748044d06bdd8776e231fadf92fc896bd12cb1c9f5a1bda9578/greenlet-3.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b7cede291382a78f7bb5f04a529cb18e068dd29e0fb27376074b6d0317bf4dd0", size = 1135975, upload_time = "2024-09-20T17:44:15.989Z" }, + { url = "https://files.pythonhosted.org/packages/38/f9/c0a0eb61bdf808d23266ecf1d63309f0e1471f284300ce6dac0ae1231881/greenlet-3.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:23f20bb60ae298d7d8656c6ec6db134bca379ecefadb0b19ce6f19d1f232a942", size = 1163955, upload_time = "2024-09-20T17:09:25.539Z" }, + { url = "https://files.pythonhosted.org/packages/43/21/a5d9df1d21514883333fc86584c07c2b49ba7c602e670b174bd73cfc9c7f/greenlet-3.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:7124e16b4c55d417577c2077be379514321916d5790fa287c9ed6f23bd2ffd01", size = 299655, upload_time = "2024-09-20T17:21:22.427Z" }, + { url = "https://files.pythonhosted.org/packages/f3/57/0db4940cd7bb461365ca8d6fd53e68254c9dbbcc2b452e69d0d41f10a85e/greenlet-3.1.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:05175c27cb459dcfc05d026c4232f9de8913ed006d42713cb8a5137bd49375f1", size = 272990, upload_time = "2024-09-20T17:08:26.312Z" }, + { url = "https://files.pythonhosted.org/packages/1c/ec/423d113c9f74e5e402e175b157203e9102feeb7088cee844d735b28ef963/greenlet-3.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:935e943ec47c4afab8965954bf49bfa639c05d4ccf9ef6e924188f762145c0ff", size = 649175, upload_time = "2024-09-20T17:36:48.983Z" }, + { url = "https://files.pythonhosted.org/packages/a9/46/ddbd2db9ff209186b7b7c621d1432e2f21714adc988703dbdd0e65155c77/greenlet-3.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:667a9706c970cb552ede35aee17339a18e8f2a87a51fba2ed39ceeeb1004798a", size = 663425, upload_time = "2024-09-20T17:39:22.705Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f9/9c82d6b2b04aa37e38e74f0c429aece5eeb02bab6e3b98e7db89b23d94c6/greenlet-3.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8a678974d1f3aa55f6cc34dc480169d58f2e6d8958895d68845fa4ab566509e", size = 657736, upload_time = "2024-09-20T17:44:28.544Z" }, + { url = "https://files.pythonhosted.org/packages/d9/42/b87bc2a81e3a62c3de2b0d550bf91a86939442b7ff85abb94eec3fc0e6aa/greenlet-3.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efc0f674aa41b92da8c49e0346318c6075d734994c3c4e4430b1c3f853e498e4", size = 660347, upload_time = "2024-09-20T17:08:45.56Z" }, + { url = "https://files.pythonhosted.org/packages/37/fa/71599c3fd06336cdc3eac52e6871cfebab4d9d70674a9a9e7a482c318e99/greenlet-3.1.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0153404a4bb921f0ff1abeb5ce8a5131da56b953eda6e14b88dc6bbc04d2049e", size = 615583, upload_time = "2024-09-20T17:08:36.85Z" }, + { url = "https://files.pythonhosted.org/packages/4e/96/e9ef85de031703ee7a4483489b40cf307f93c1824a02e903106f2ea315fe/greenlet-3.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:275f72decf9932639c1c6dd1013a1bc266438eb32710016a1c742df5da6e60a1", size = 1133039, upload_time = "2024-09-20T17:44:18.287Z" }, + { url = "https://files.pythonhosted.org/packages/87/76/b2b6362accd69f2d1889db61a18c94bc743e961e3cab344c2effaa4b4a25/greenlet-3.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c4aab7f6381f38a4b42f269057aee279ab0fc7bf2e929e3d4abfae97b682a12c", size = 1160716, upload_time = "2024-09-20T17:09:27.112Z" }, + { url = "https://files.pythonhosted.org/packages/1f/1b/54336d876186920e185066d8c3024ad55f21d7cc3683c856127ddb7b13ce/greenlet-3.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:b42703b1cf69f2aa1df7d1030b9d77d3e584a70755674d60e710f0af570f3761", size = 299490, upload_time = "2024-09-20T17:17:09.501Z" }, + { url = "https://files.pythonhosted.org/packages/5f/17/bea55bf36990e1638a2af5ba10c1640273ef20f627962cf97107f1e5d637/greenlet-3.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1695e76146579f8c06c1509c7ce4dfe0706f49c6831a817ac04eebb2fd02011", size = 643731, upload_time = "2024-09-20T17:36:50.376Z" }, + { url = "https://files.pythonhosted.org/packages/78/d2/aa3d2157f9ab742a08e0fd8f77d4699f37c22adfbfeb0c610a186b5f75e0/greenlet-3.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7876452af029456b3f3549b696bb36a06db7c90747740c5302f74a9e9fa14b13", size = 649304, upload_time = "2024-09-20T17:39:24.55Z" }, + { url = "https://files.pythonhosted.org/packages/f1/8e/d0aeffe69e53ccff5a28fa86f07ad1d2d2d6537a9506229431a2a02e2f15/greenlet-3.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ead44c85f8ab905852d3de8d86f6f8baf77109f9da589cb4fa142bd3b57b475", size = 646537, upload_time = "2024-09-20T17:44:31.102Z" }, + { url = "https://files.pythonhosted.org/packages/05/79/e15408220bbb989469c8871062c97c6c9136770657ba779711b90870d867/greenlet-3.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8320f64b777d00dd7ccdade271eaf0cad6636343293a25074cc5566160e4de7b", size = 642506, upload_time = "2024-09-20T17:08:47.852Z" }, + { url = "https://files.pythonhosted.org/packages/18/87/470e01a940307796f1d25f8167b551a968540fbe0551c0ebb853cb527dd6/greenlet-3.1.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6510bf84a6b643dabba74d3049ead221257603a253d0a9873f55f6a59a65f822", size = 602753, upload_time = "2024-09-20T17:08:38.079Z" }, + { url = "https://files.pythonhosted.org/packages/e2/72/576815ba674eddc3c25028238f74d7b8068902b3968cbe456771b166455e/greenlet-3.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:04b013dc07c96f83134b1e99888e7a79979f1a247e2a9f59697fa14b5862ed01", size = 1122731, upload_time = "2024-09-20T17:44:20.556Z" }, + { url = "https://files.pythonhosted.org/packages/ac/38/08cc303ddddc4b3d7c628c3039a61a3aae36c241ed01393d00c2fd663473/greenlet-3.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:411f015496fec93c1c8cd4e5238da364e1da7a124bcb293f085bf2860c32c6f6", size = 1142112, upload_time = "2024-09-20T17:09:28.753Z" }, +] + +[[package]] +name = "grpcio" +version = "1.71.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/95/aa11fc09a85d91fbc7dd405dcb2a1e0256989d67bf89fa65ae24b3ba105a/grpcio-1.71.0.tar.gz", hash = "sha256:2b85f7820475ad3edec209d3d89a7909ada16caab05d3f2e08a7e8ae3200a55c", size = 12549828, upload_time = "2025-03-10T19:28:49.203Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/63/04/a085f3ad4133426f6da8c1becf0749872a49feb625a407a2e864ded3fb12/grpcio-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:d6aa986318c36508dc1d5001a3ff169a15b99b9f96ef5e98e13522c506b37eef", size = 5210453, upload_time = "2025-03-10T19:24:33.342Z" }, + { url = "https://files.pythonhosted.org/packages/b4/d5/0bc53ed33ba458de95020970e2c22aa8027b26cc84f98bea7fcad5d695d1/grpcio-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:d2c170247315f2d7e5798a22358e982ad6eeb68fa20cf7a820bb74c11f0736e7", size = 11347567, upload_time = "2025-03-10T19:24:35.215Z" }, + { url = "https://files.pythonhosted.org/packages/e3/6d/ce334f7e7a58572335ccd61154d808fe681a4c5e951f8a1ff68f5a6e47ce/grpcio-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:e6f83a583ed0a5b08c5bc7a3fe860bb3c2eac1f03f1f63e0bc2091325605d2b7", size = 5696067, upload_time = "2025-03-10T19:24:37.988Z" }, + { url = "https://files.pythonhosted.org/packages/05/4a/80befd0b8b1dc2b9ac5337e57473354d81be938f87132e147c4a24a581bd/grpcio-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4be74ddeeb92cc87190e0e376dbc8fc7736dbb6d3d454f2fa1f5be1dee26b9d7", size = 6348377, upload_time = "2025-03-10T19:24:40.361Z" }, + { url = "https://files.pythonhosted.org/packages/c7/67/cbd63c485051eb78663355d9efd1b896cfb50d4a220581ec2cb9a15cd750/grpcio-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd0dfbe4d5eb1fcfec9490ca13f82b089a309dc3678e2edabc144051270a66e", size = 5940407, upload_time = "2025-03-10T19:24:42.685Z" }, + { url = "https://files.pythonhosted.org/packages/98/4b/7a11aa4326d7faa499f764eaf8a9b5a0eb054ce0988ee7ca34897c2b02ae/grpcio-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a2242d6950dc892afdf9e951ed7ff89473aaf744b7d5727ad56bdaace363722b", size = 6030915, upload_time = "2025-03-10T19:24:44.463Z" }, + { url = "https://files.pythonhosted.org/packages/eb/a2/cdae2d0e458b475213a011078b0090f7a1d87f9a68c678b76f6af7c6ac8c/grpcio-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0fa05ee31a20456b13ae49ad2e5d585265f71dd19fbd9ef983c28f926d45d0a7", size = 6648324, upload_time = "2025-03-10T19:24:46.287Z" }, + { url = "https://files.pythonhosted.org/packages/27/df/f345c8daaa8d8574ce9869f9b36ca220c8845923eb3087e8f317eabfc2a8/grpcio-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3d081e859fb1ebe176de33fc3adb26c7d46b8812f906042705346b314bde32c3", size = 6197839, upload_time = "2025-03-10T19:24:48.565Z" }, + { url = "https://files.pythonhosted.org/packages/f2/2c/cd488dc52a1d0ae1bad88b0d203bc302efbb88b82691039a6d85241c5781/grpcio-1.71.0-cp311-cp311-win32.whl", hash = "sha256:d6de81c9c00c8a23047136b11794b3584cdc1460ed7cbc10eada50614baa1444", size = 3619978, upload_time = "2025-03-10T19:24:50.518Z" }, + { url = "https://files.pythonhosted.org/packages/ee/3f/cf92e7e62ccb8dbdf977499547dfc27133124d6467d3a7d23775bcecb0f9/grpcio-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:24e867651fc67717b6f896d5f0cac0ec863a8b5fb7d6441c2ab428f52c651c6b", size = 4282279, upload_time = "2025-03-10T19:24:52.313Z" }, + { url = "https://files.pythonhosted.org/packages/4c/83/bd4b6a9ba07825bd19c711d8b25874cd5de72c2a3fbf635c3c344ae65bd2/grpcio-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:0ff35c8d807c1c7531d3002be03221ff9ae15712b53ab46e2a0b4bb271f38537", size = 5184101, upload_time = "2025-03-10T19:24:54.11Z" }, + { url = "https://files.pythonhosted.org/packages/31/ea/2e0d90c0853568bf714693447f5c73272ea95ee8dad107807fde740e595d/grpcio-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:b78a99cd1ece4be92ab7c07765a0b038194ded2e0a26fd654591ee136088d8d7", size = 11310927, upload_time = "2025-03-10T19:24:56.1Z" }, + { url = "https://files.pythonhosted.org/packages/ac/bc/07a3fd8af80467390af491d7dc66882db43884128cdb3cc8524915e0023c/grpcio-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:dc1a1231ed23caac1de9f943d031f1bc38d0f69d2a3b243ea0d664fc1fbd7fec", size = 5654280, upload_time = "2025-03-10T19:24:58.55Z" }, + { url = "https://files.pythonhosted.org/packages/16/af/21f22ea3eed3d0538b6ef7889fce1878a8ba4164497f9e07385733391e2b/grpcio-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6beeea5566092c5e3c4896c6d1d307fb46b1d4bdf3e70c8340b190a69198594", size = 6312051, upload_time = "2025-03-10T19:25:00.682Z" }, + { url = "https://files.pythonhosted.org/packages/49/9d/e12ddc726dc8bd1aa6cba67c85ce42a12ba5b9dd75d5042214a59ccf28ce/grpcio-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5170929109450a2c031cfe87d6716f2fae39695ad5335d9106ae88cc32dc84c", size = 5910666, upload_time = "2025-03-10T19:25:03.01Z" }, + { url = "https://files.pythonhosted.org/packages/d9/e9/38713d6d67aedef738b815763c25f092e0454dc58e77b1d2a51c9d5b3325/grpcio-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5b08d03ace7aca7b2fadd4baf291139b4a5f058805a8327bfe9aece7253b6d67", size = 6012019, upload_time = "2025-03-10T19:25:05.174Z" }, + { url = "https://files.pythonhosted.org/packages/80/da/4813cd7adbae6467724fa46c952d7aeac5e82e550b1c62ed2aeb78d444ae/grpcio-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f903017db76bf9cc2b2d8bdd37bf04b505bbccad6be8a81e1542206875d0e9db", size = 6637043, upload_time = "2025-03-10T19:25:06.987Z" }, + { url = "https://files.pythonhosted.org/packages/52/ca/c0d767082e39dccb7985c73ab4cf1d23ce8613387149e9978c70c3bf3b07/grpcio-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:469f42a0b410883185eab4689060a20488a1a0a00f8bbb3cbc1061197b4c5a79", size = 6186143, upload_time = "2025-03-10T19:25:08.877Z" }, + { url = "https://files.pythonhosted.org/packages/00/61/7b2c8ec13303f8fe36832c13d91ad4d4ba57204b1c723ada709c346b2271/grpcio-1.71.0-cp312-cp312-win32.whl", hash = "sha256:ad9f30838550695b5eb302add33f21f7301b882937460dd24f24b3cc5a95067a", size = 3604083, upload_time = "2025-03-10T19:25:10.736Z" }, + { url = "https://files.pythonhosted.org/packages/fd/7c/1e429c5fb26122055d10ff9a1d754790fb067d83c633ff69eddcf8e3614b/grpcio-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:652350609332de6dac4ece254e5d7e1ff834e203d6afb769601f286886f6f3a8", size = 4272191, upload_time = "2025-03-10T19:25:13.12Z" }, + { url = "https://files.pythonhosted.org/packages/04/dd/b00cbb45400d06b26126dcfdbdb34bb6c4f28c3ebbd7aea8228679103ef6/grpcio-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:cebc1b34ba40a312ab480ccdb396ff3c529377a2fce72c45a741f7215bfe8379", size = 5184138, upload_time = "2025-03-10T19:25:15.101Z" }, + { url = "https://files.pythonhosted.org/packages/ed/0a/4651215983d590ef53aac40ba0e29dda941a02b097892c44fa3357e706e5/grpcio-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:85da336e3649a3d2171e82f696b5cad2c6231fdd5bad52616476235681bee5b3", size = 11310747, upload_time = "2025-03-10T19:25:17.201Z" }, + { url = "https://files.pythonhosted.org/packages/57/a3/149615b247f321e13f60aa512d3509d4215173bdb982c9098d78484de216/grpcio-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f9a412f55bb6e8f3bb000e020dbc1e709627dcb3a56f6431fa7076b4c1aab0db", size = 5653991, upload_time = "2025-03-10T19:25:20.39Z" }, + { url = "https://files.pythonhosted.org/packages/ca/56/29432a3e8d951b5e4e520a40cd93bebaa824a14033ea8e65b0ece1da6167/grpcio-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47be9584729534660416f6d2a3108aaeac1122f6b5bdbf9fd823e11fe6fbaa29", size = 6312781, upload_time = "2025-03-10T19:25:22.823Z" }, + { url = "https://files.pythonhosted.org/packages/a3/f8/286e81a62964ceb6ac10b10925261d4871a762d2a763fbf354115f9afc98/grpcio-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9c80ac6091c916db81131d50926a93ab162a7e97e4428ffc186b6e80d6dda4", size = 5910479, upload_time = "2025-03-10T19:25:24.828Z" }, + { url = "https://files.pythonhosted.org/packages/35/67/d1febb49ec0f599b9e6d4d0d44c2d4afdbed9c3e80deb7587ec788fcf252/grpcio-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:789d5e2a3a15419374b7b45cd680b1e83bbc1e52b9086e49308e2c0b5bbae6e3", size = 6013262, upload_time = "2025-03-10T19:25:26.987Z" }, + { url = "https://files.pythonhosted.org/packages/a1/04/f9ceda11755f0104a075ad7163fc0d96e2e3a9fe25ef38adfc74c5790daf/grpcio-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:1be857615e26a86d7363e8a163fade914595c81fec962b3d514a4b1e8760467b", size = 6643356, upload_time = "2025-03-10T19:25:29.606Z" }, + { url = "https://files.pythonhosted.org/packages/fb/ce/236dbc3dc77cf9a9242adcf1f62538734ad64727fabf39e1346ad4bd5c75/grpcio-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a76d39b5fafd79ed604c4be0a869ec3581a172a707e2a8d7a4858cb05a5a7637", size = 6186564, upload_time = "2025-03-10T19:25:31.537Z" }, + { url = "https://files.pythonhosted.org/packages/10/fd/b3348fce9dd4280e221f513dd54024e765b21c348bc475516672da4218e9/grpcio-1.71.0-cp313-cp313-win32.whl", hash = "sha256:74258dce215cb1995083daa17b379a1a5a87d275387b7ffe137f1d5131e2cfbb", size = 3601890, upload_time = "2025-03-10T19:25:33.421Z" }, + { url = "https://files.pythonhosted.org/packages/be/f8/db5d5f3fc7e296166286c2a397836b8b042f7ad1e11028d82b061701f0f7/grpcio-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:22c3bc8d488c039a199f7a003a38cb7635db6656fa96437a8accde8322ce2366", size = 4273308, upload_time = "2025-03-10T19:25:35.79Z" }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload_time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload_time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "h5py" +version = "3.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/03/2e/a22d6a8bfa6f8be33e7febd985680fba531562795f0a9077ed1eb047bfb0/h5py-3.13.0.tar.gz", hash = "sha256:1870e46518720023da85d0895a1960ff2ce398c5671eac3b1a41ec696b7105c3", size = 414876, upload_time = "2025-02-18T16:04:01.824Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/86/2b/50b15fdefb577d073b49699e6ea6a0a77a3a1016c2b67e2149fc50124a10/h5py-3.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8a8e38ef4ceb969f832cc230c0cf808c613cc47e31e768fd7b1106c55afa1cb8", size = 3422922, upload_time = "2025-02-18T16:02:36.376Z" }, + { url = "https://files.pythonhosted.org/packages/94/59/36d87a559cab9c59b59088d52e86008d27a9602ce3afc9d3b51823014bf3/h5py-3.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f35640e81b03c02a88b8bf99fb6a9d3023cc52f7c627694db2f379e0028f2868", size = 2921619, upload_time = "2025-02-18T16:02:40.722Z" }, + { url = "https://files.pythonhosted.org/packages/37/ef/6f80b19682c0b0835bbee7b253bec9c16af9004f2fd6427b1dd858100273/h5py-3.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:337af114616f3656da0c83b68fcf53ecd9ce9989a700b0883a6e7c483c3235d4", size = 4259366, upload_time = "2025-02-18T16:02:44.544Z" }, + { url = "https://files.pythonhosted.org/packages/03/71/c99f662d4832c8835453cf3476f95daa28372023bda4aa1fca9e97c24f09/h5py-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:782ff0ac39f455f21fd1c8ebc007328f65f43d56718a89327eec76677ebf238a", size = 4509058, upload_time = "2025-02-18T16:02:49.035Z" }, + { url = "https://files.pythonhosted.org/packages/56/89/e3ff23e07131ff73a72a349be9639e4de84e163af89c1c218b939459a98a/h5py-3.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:22ffe2a25770a2d67213a1b94f58006c14dce06933a42d2aaa0318c5868d1508", size = 2966428, upload_time = "2025-02-18T16:02:52.061Z" }, + { url = "https://files.pythonhosted.org/packages/d8/20/438f6366ba4ded80eadb38f8927f5e2cd6d2e087179552f20ae3dbcd5d5b/h5py-3.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:477c58307b6b9a2509c59c57811afb9f598aedede24a67da808262dfa0ee37b4", size = 3384442, upload_time = "2025-02-18T16:02:56.545Z" }, + { url = "https://files.pythonhosted.org/packages/10/13/cc1cb7231399617d9951233eb12fddd396ff5d4f7f057ee5d2b1ca0ee7e7/h5py-3.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:57c4c74f627c616f02b7aec608a8c706fe08cb5b0ba7c08555a4eb1dde20805a", size = 2917567, upload_time = "2025-02-18T16:03:00.079Z" }, + { url = "https://files.pythonhosted.org/packages/9e/d9/aed99e1c858dc698489f916eeb7c07513bc864885d28ab3689d572ba0ea0/h5py-3.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:357e6dc20b101a805ccfd0024731fbaf6e8718c18c09baf3b5e4e9d198d13fca", size = 4669544, upload_time = "2025-02-18T16:03:05.675Z" }, + { url = "https://files.pythonhosted.org/packages/a7/da/3c137006ff5f0433f0fb076b1ebe4a7bf7b5ee1e8811b5486af98b500dd5/h5py-3.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6f13f9b5ce549448c01e4dfe08ea8d1772e6078799af2c1c8d09e941230a90d", size = 4932139, upload_time = "2025-02-18T16:03:10.129Z" }, + { url = "https://files.pythonhosted.org/packages/25/61/d897952629cae131c19d4c41b2521e7dd6382f2d7177c87615c2e6dced1a/h5py-3.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:21daf38171753899b5905f3d82c99b0b1ec2cbbe282a037cad431feb620e62ec", size = 2954179, upload_time = "2025-02-18T16:03:13.716Z" }, + { url = "https://files.pythonhosted.org/packages/60/43/f276f27921919a9144074320ce4ca40882fc67b3cfee81c3f5c7df083e97/h5py-3.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e520ec76de00943dd017c8ea3f354fa1d2f542eac994811943a8faedf2a7d5cb", size = 3358040, upload_time = "2025-02-18T16:03:20.579Z" }, + { url = "https://files.pythonhosted.org/packages/1b/86/ad4a4cf781b08d4572be8bbdd8f108bb97b266a14835c640dc43dafc0729/h5py-3.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e79d8368cd9295045956bfb436656bea3f915beaa11d342e9f79f129f5178763", size = 2892766, upload_time = "2025-02-18T16:03:26.831Z" }, + { url = "https://files.pythonhosted.org/packages/69/84/4c6367d6b58deaf0fa84999ec819e7578eee96cea6cbd613640d0625ed5e/h5py-3.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56dd172d862e850823c4af02dc4ddbc308f042b85472ffdaca67f1598dff4a57", size = 4664255, upload_time = "2025-02-18T16:03:31.903Z" }, + { url = "https://files.pythonhosted.org/packages/fd/41/bc2df86b72965775f6d621e0ee269a5f3ac23e8f870abf519de9c7d93b4d/h5py-3.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be949b46b7388074c5acae017fbbe3e5ba303fd9daaa52157fdfef30bbdacadd", size = 4927580, upload_time = "2025-02-18T16:03:36.429Z" }, + { url = "https://files.pythonhosted.org/packages/97/34/165b87ea55184770a0c1fcdb7e017199974ad2e271451fd045cfe35f3add/h5py-3.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:4f97ecde7ac6513b21cd95efdfc38dc6d19f96f6ca6f2a30550e94e551458e0a", size = 2940890, upload_time = "2025-02-18T16:03:41.037Z" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload_time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload_time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httplib2" +version = "0.22.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyparsing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/ad/2371116b22d616c194aa25ec410c9c6c37f23599dcd590502b74db197584/httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81", size = 351116, upload_time = "2023-03-21T22:29:37.214Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/6c/d2fbdaaa5959339d53ba38e94c123e4e84b8fbc4b84beb0e70d7c1608486/httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc", size = 96854, upload_time = "2023-03-21T22:29:35.683Z" }, +] + +[[package]] +name = "httptools" +version = "0.6.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/9a/ce5e1f7e131522e6d3426e8e7a490b3a01f39a6696602e1c4f33f9e94277/httptools-0.6.4.tar.gz", hash = "sha256:4e93eee4add6493b59a5c514da98c939b244fce4a0d8879cd3f466562f4b7d5c", size = 240639, upload_time = "2024-10-16T19:45:08.902Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/26/bb526d4d14c2774fe07113ca1db7255737ffbb119315839af2065abfdac3/httptools-0.6.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f47f8ed67cc0ff862b84a1189831d1d33c963fb3ce1ee0c65d3b0cbe7b711069", size = 199029, upload_time = "2024-10-16T19:44:18.427Z" }, + { url = "https://files.pythonhosted.org/packages/a6/17/3e0d3e9b901c732987a45f4f94d4e2c62b89a041d93db89eafb262afd8d5/httptools-0.6.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0614154d5454c21b6410fdf5262b4a3ddb0f53f1e1721cfd59d55f32138c578a", size = 103492, upload_time = "2024-10-16T19:44:19.515Z" }, + { url = "https://files.pythonhosted.org/packages/b7/24/0fe235d7b69c42423c7698d086d4db96475f9b50b6ad26a718ef27a0bce6/httptools-0.6.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8787367fbdfccae38e35abf7641dafc5310310a5987b689f4c32cc8cc3ee975", size = 462891, upload_time = "2024-10-16T19:44:21.067Z" }, + { url = "https://files.pythonhosted.org/packages/b1/2f/205d1f2a190b72da6ffb5f41a3736c26d6fa7871101212b15e9b5cd8f61d/httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40b0f7fe4fd38e6a507bdb751db0379df1e99120c65fbdc8ee6c1d044897a636", size = 459788, upload_time = "2024-10-16T19:44:22.958Z" }, + { url = "https://files.pythonhosted.org/packages/6e/4c/d09ce0eff09057a206a74575ae8f1e1e2f0364d20e2442224f9e6612c8b9/httptools-0.6.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:40a5ec98d3f49904b9fe36827dcf1aadfef3b89e2bd05b0e35e94f97c2b14721", size = 433214, upload_time = "2024-10-16T19:44:24.513Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d2/84c9e23edbccc4a4c6f96a1b8d99dfd2350289e94f00e9ccc7aadde26fb5/httptools-0.6.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dacdd3d10ea1b4ca9df97a0a303cbacafc04b5cd375fa98732678151643d4988", size = 434120, upload_time = "2024-10-16T19:44:26.295Z" }, + { url = "https://files.pythonhosted.org/packages/d0/46/4d8e7ba9581416de1c425b8264e2cadd201eb709ec1584c381f3e98f51c1/httptools-0.6.4-cp311-cp311-win_amd64.whl", hash = "sha256:288cd628406cc53f9a541cfaf06041b4c71d751856bab45e3702191f931ccd17", size = 88565, upload_time = "2024-10-16T19:44:29.188Z" }, + { url = "https://files.pythonhosted.org/packages/bb/0e/d0b71465c66b9185f90a091ab36389a7352985fe857e352801c39d6127c8/httptools-0.6.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:df017d6c780287d5c80601dafa31f17bddb170232d85c066604d8558683711a2", size = 200683, upload_time = "2024-10-16T19:44:30.175Z" }, + { url = "https://files.pythonhosted.org/packages/e2/b8/412a9bb28d0a8988de3296e01efa0bd62068b33856cdda47fe1b5e890954/httptools-0.6.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:85071a1e8c2d051b507161f6c3e26155b5c790e4e28d7f236422dbacc2a9cc44", size = 104337, upload_time = "2024-10-16T19:44:31.786Z" }, + { url = "https://files.pythonhosted.org/packages/9b/01/6fb20be3196ffdc8eeec4e653bc2a275eca7f36634c86302242c4fbb2760/httptools-0.6.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69422b7f458c5af875922cdb5bd586cc1f1033295aa9ff63ee196a87519ac8e1", size = 508796, upload_time = "2024-10-16T19:44:32.825Z" }, + { url = "https://files.pythonhosted.org/packages/f7/d8/b644c44acc1368938317d76ac991c9bba1166311880bcc0ac297cb9d6bd7/httptools-0.6.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16e603a3bff50db08cd578d54f07032ca1631450ceb972c2f834c2b860c28ea2", size = 510837, upload_time = "2024-10-16T19:44:33.974Z" }, + { url = "https://files.pythonhosted.org/packages/52/d8/254d16a31d543073a0e57f1c329ca7378d8924e7e292eda72d0064987486/httptools-0.6.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec4f178901fa1834d4a060320d2f3abc5c9e39766953d038f1458cb885f47e81", size = 485289, upload_time = "2024-10-16T19:44:35.111Z" }, + { url = "https://files.pythonhosted.org/packages/5f/3c/4aee161b4b7a971660b8be71a92c24d6c64372c1ab3ae7f366b3680df20f/httptools-0.6.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f9eb89ecf8b290f2e293325c646a211ff1c2493222798bb80a530c5e7502494f", size = 489779, upload_time = "2024-10-16T19:44:36.253Z" }, + { url = "https://files.pythonhosted.org/packages/12/b7/5cae71a8868e555f3f67a50ee7f673ce36eac970f029c0c5e9d584352961/httptools-0.6.4-cp312-cp312-win_amd64.whl", hash = "sha256:db78cb9ca56b59b016e64b6031eda5653be0589dba2b1b43453f6e8b405a0970", size = 88634, upload_time = "2024-10-16T19:44:37.357Z" }, + { url = "https://files.pythonhosted.org/packages/94/a3/9fe9ad23fd35f7de6b91eeb60848986058bd8b5a5c1e256f5860a160cc3e/httptools-0.6.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ade273d7e767d5fae13fa637f4d53b6e961fb7fd93c7797562663f0171c26660", size = 197214, upload_time = "2024-10-16T19:44:38.738Z" }, + { url = "https://files.pythonhosted.org/packages/ea/d9/82d5e68bab783b632023f2fa31db20bebb4e89dfc4d2293945fd68484ee4/httptools-0.6.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:856f4bc0478ae143bad54a4242fccb1f3f86a6e1be5548fecfd4102061b3a083", size = 102431, upload_time = "2024-10-16T19:44:39.818Z" }, + { url = "https://files.pythonhosted.org/packages/96/c1/cb499655cbdbfb57b577734fde02f6fa0bbc3fe9fb4d87b742b512908dff/httptools-0.6.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:322d20ea9cdd1fa98bd6a74b77e2ec5b818abdc3d36695ab402a0de8ef2865a3", size = 473121, upload_time = "2024-10-16T19:44:41.189Z" }, + { url = "https://files.pythonhosted.org/packages/af/71/ee32fd358f8a3bb199b03261f10921716990808a675d8160b5383487a317/httptools-0.6.4-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d87b29bd4486c0093fc64dea80231f7c7f7eb4dc70ae394d70a495ab8436071", size = 473805, upload_time = "2024-10-16T19:44:42.384Z" }, + { url = "https://files.pythonhosted.org/packages/8a/0a/0d4df132bfca1507114198b766f1737d57580c9ad1cf93c1ff673e3387be/httptools-0.6.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:342dd6946aa6bda4b8f18c734576106b8a31f2fe31492881a9a160ec84ff4bd5", size = 448858, upload_time = "2024-10-16T19:44:43.959Z" }, + { url = "https://files.pythonhosted.org/packages/1e/6a/787004fdef2cabea27bad1073bf6a33f2437b4dbd3b6fb4a9d71172b1c7c/httptools-0.6.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b36913ba52008249223042dca46e69967985fb4051951f94357ea681e1f5dc0", size = 452042, upload_time = "2024-10-16T19:44:45.071Z" }, + { url = "https://files.pythonhosted.org/packages/4d/dc/7decab5c404d1d2cdc1bb330b1bf70e83d6af0396fd4fc76fc60c0d522bf/httptools-0.6.4-cp313-cp313-win_amd64.whl", hash = "sha256:28908df1b9bb8187393d5b5db91435ccc9c8e891657f9cbb42a2541b44c82fc8", size = 87682, upload_time = "2024-10-16T19:44:46.46Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload_time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload_time = "2024-12-06T15:37:21.509Z" }, +] + +[[package]] +name = "huggingface-hub" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e7/ce/a734204aaae6c35a22f9956ebcd8d8708ae5b842e15d6f42bd6f49e634a4/huggingface_hub-0.28.1.tar.gz", hash = "sha256:893471090c98e3b6efbdfdacafe4052b20b84d59866fb6f54c33d9af18c303ae", size = 387074, upload_time = "2025-01-30T13:45:41.519Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/da/6c2bea5327b640920267d3bf2c9fc114cfbd0a5de234d81cda80cc9e33c8/huggingface_hub-0.28.1-py3-none-any.whl", hash = "sha256:aa6b9a3ffdae939b72c464dbb0d7f99f56e649b55c3d52406f49e0a5a620c0a7", size = 464068, upload_time = "2025-01-30T13:45:39.514Z" }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload_time = "2024-09-15T18:07:39.745Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload_time = "2024-09-15T18:07:37.964Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload_time = "2025-03-19T20:09:59.721Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload_time = "2025-03-19T20:10:01.071Z" }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload_time = "2025-03-05T20:05:02.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload_time = "2025-03-05T20:05:00.369Z" }, +] + +[[package]] +name = "jiter" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1e/c2/e4562507f52f0af7036da125bb699602ead37a2332af0788f8e0a3417f36/jiter-0.9.0.tar.gz", hash = "sha256:aadba0964deb424daa24492abc3d229c60c4a31bfee205aedbf1acc7639d7893", size = 162604, upload_time = "2025-03-10T21:37:03.278Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/23/44/e241a043f114299254e44d7e777ead311da400517f179665e59611ab0ee4/jiter-0.9.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6c4d99c71508912a7e556d631768dcdef43648a93660670986916b297f1c54af", size = 314654, upload_time = "2025-03-10T21:35:23.939Z" }, + { url = "https://files.pythonhosted.org/packages/fb/1b/a7e5e42db9fa262baaa9489d8d14ca93f8663e7f164ed5e9acc9f467fc00/jiter-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8f60fb8ce7df529812bf6c625635a19d27f30806885139e367af93f6e734ef58", size = 320909, upload_time = "2025-03-10T21:35:26.127Z" }, + { url = "https://files.pythonhosted.org/packages/60/bf/8ebdfce77bc04b81abf2ea316e9c03b4a866a7d739cf355eae4d6fd9f6fe/jiter-0.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51c4e1a4f8ea84d98b7b98912aa4290ac3d1eabfde8e3c34541fae30e9d1f08b", size = 341733, upload_time = "2025-03-10T21:35:27.94Z" }, + { url = "https://files.pythonhosted.org/packages/a8/4e/754ebce77cff9ab34d1d0fa0fe98f5d42590fd33622509a3ba6ec37ff466/jiter-0.9.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f4c677c424dc76684fea3e7285a7a2a7493424bea89ac441045e6a1fb1d7b3b", size = 365097, upload_time = "2025-03-10T21:35:29.605Z" }, + { url = "https://files.pythonhosted.org/packages/32/2c/6019587e6f5844c612ae18ca892f4cd7b3d8bbf49461ed29e384a0f13d98/jiter-0.9.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2221176dfec87f3470b21e6abca056e6b04ce9bff72315cb0b243ca9e835a4b5", size = 406603, upload_time = "2025-03-10T21:35:31.696Z" }, + { url = "https://files.pythonhosted.org/packages/da/e9/c9e6546c817ab75a1a7dab6dcc698e62e375e1017113e8e983fccbd56115/jiter-0.9.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3c7adb66f899ffa25e3c92bfcb593391ee1947dbdd6a9a970e0d7e713237d572", size = 396625, upload_time = "2025-03-10T21:35:33.182Z" }, + { url = "https://files.pythonhosted.org/packages/be/bd/976b458add04271ebb5a255e992bd008546ea04bb4dcadc042a16279b4b4/jiter-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c98d27330fdfb77913c1097a7aab07f38ff2259048949f499c9901700789ac15", size = 351832, upload_time = "2025-03-10T21:35:35.394Z" }, + { url = "https://files.pythonhosted.org/packages/07/51/fe59e307aaebec9265dbad44d9d4381d030947e47b0f23531579b9a7c2df/jiter-0.9.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:eda3f8cc74df66892b1d06b5d41a71670c22d95a1ca2cbab73654745ce9d0419", size = 384590, upload_time = "2025-03-10T21:35:37.171Z" }, + { url = "https://files.pythonhosted.org/packages/db/55/5dcd2693794d8e6f4889389ff66ef3be557a77f8aeeca8973a97a7c00557/jiter-0.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dd5ab5ddc11418dce28343123644a100f487eaccf1de27a459ab36d6cca31043", size = 520690, upload_time = "2025-03-10T21:35:38.717Z" }, + { url = "https://files.pythonhosted.org/packages/54/d5/9f51dc90985e9eb251fbbb747ab2b13b26601f16c595a7b8baba964043bd/jiter-0.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:42f8a68a69f047b310319ef8e2f52fdb2e7976fb3313ef27df495cf77bcad965", size = 512649, upload_time = "2025-03-10T21:35:40.157Z" }, + { url = "https://files.pythonhosted.org/packages/a6/e5/4e385945179bcf128fa10ad8dca9053d717cbe09e258110e39045c881fe5/jiter-0.9.0-cp311-cp311-win32.whl", hash = "sha256:a25519efb78a42254d59326ee417d6f5161b06f5da827d94cf521fed961b1ff2", size = 206920, upload_time = "2025-03-10T21:35:41.72Z" }, + { url = "https://files.pythonhosted.org/packages/4c/47/5e0b94c603d8e54dd1faab439b40b832c277d3b90743e7835879ab663757/jiter-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:923b54afdd697dfd00d368b7ccad008cccfeb1efb4e621f32860c75e9f25edbd", size = 210119, upload_time = "2025-03-10T21:35:43.46Z" }, + { url = "https://files.pythonhosted.org/packages/af/d7/c55086103d6f29b694ec79156242304adf521577530d9031317ce5338c59/jiter-0.9.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:7b46249cfd6c48da28f89eb0be3f52d6fdb40ab88e2c66804f546674e539ec11", size = 309203, upload_time = "2025-03-10T21:35:44.852Z" }, + { url = "https://files.pythonhosted.org/packages/b0/01/f775dfee50beb420adfd6baf58d1c4d437de41c9b666ddf127c065e5a488/jiter-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:609cf3c78852f1189894383cf0b0b977665f54cb38788e3e6b941fa6d982c00e", size = 319678, upload_time = "2025-03-10T21:35:46.365Z" }, + { url = "https://files.pythonhosted.org/packages/ab/b8/09b73a793714726893e5d46d5c534a63709261af3d24444ad07885ce87cb/jiter-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d726a3890a54561e55a9c5faea1f7655eda7f105bd165067575ace6e65f80bb2", size = 341816, upload_time = "2025-03-10T21:35:47.856Z" }, + { url = "https://files.pythonhosted.org/packages/35/6f/b8f89ec5398b2b0d344257138182cc090302854ed63ed9c9051e9c673441/jiter-0.9.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2e89dc075c1fef8fa9be219e249f14040270dbc507df4215c324a1839522ea75", size = 364152, upload_time = "2025-03-10T21:35:49.397Z" }, + { url = "https://files.pythonhosted.org/packages/9b/ca/978cc3183113b8e4484cc7e210a9ad3c6614396e7abd5407ea8aa1458eef/jiter-0.9.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04e8ffa3c353b1bc4134f96f167a2082494351e42888dfcf06e944f2729cbe1d", size = 406991, upload_time = "2025-03-10T21:35:50.745Z" }, + { url = "https://files.pythonhosted.org/packages/13/3a/72861883e11a36d6aa314b4922125f6ae90bdccc225cd96d24cc78a66385/jiter-0.9.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:203f28a72a05ae0e129b3ed1f75f56bc419d5f91dfacd057519a8bd137b00c42", size = 395824, upload_time = "2025-03-10T21:35:52.162Z" }, + { url = "https://files.pythonhosted.org/packages/87/67/22728a86ef53589c3720225778f7c5fdb617080e3deaed58b04789418212/jiter-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fca1a02ad60ec30bb230f65bc01f611c8608b02d269f998bc29cca8619a919dc", size = 351318, upload_time = "2025-03-10T21:35:53.566Z" }, + { url = "https://files.pythonhosted.org/packages/69/b9/f39728e2e2007276806d7a6609cda7fac44ffa28ca0d02c49a4f397cc0d9/jiter-0.9.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:237e5cee4d5d2659aaf91bbf8ec45052cc217d9446070699441a91b386ae27dc", size = 384591, upload_time = "2025-03-10T21:35:54.95Z" }, + { url = "https://files.pythonhosted.org/packages/eb/8f/8a708bc7fd87b8a5d861f1c118a995eccbe6d672fe10c9753e67362d0dd0/jiter-0.9.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:528b6b71745e7326eed73c53d4aa57e2a522242320b6f7d65b9c5af83cf49b6e", size = 520746, upload_time = "2025-03-10T21:35:56.444Z" }, + { url = "https://files.pythonhosted.org/packages/95/1e/65680c7488bd2365dbd2980adaf63c562d3d41d3faac192ebc7ef5b4ae25/jiter-0.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9f48e86b57bc711eb5acdfd12b6cb580a59cc9a993f6e7dcb6d8b50522dcd50d", size = 512754, upload_time = "2025-03-10T21:35:58.789Z" }, + { url = "https://files.pythonhosted.org/packages/78/f3/fdc43547a9ee6e93c837685da704fb6da7dba311fc022e2766d5277dfde5/jiter-0.9.0-cp312-cp312-win32.whl", hash = "sha256:699edfde481e191d81f9cf6d2211debbfe4bd92f06410e7637dffb8dd5dfde06", size = 207075, upload_time = "2025-03-10T21:36:00.616Z" }, + { url = "https://files.pythonhosted.org/packages/cd/9d/742b289016d155f49028fe1bfbeb935c9bf0ffeefdf77daf4a63a42bb72b/jiter-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:099500d07b43f61d8bd780466d429c45a7b25411b334c60ca875fa775f68ccb0", size = 207999, upload_time = "2025-03-10T21:36:02.366Z" }, + { url = "https://files.pythonhosted.org/packages/e7/1b/4cd165c362e8f2f520fdb43245e2b414f42a255921248b4f8b9c8d871ff1/jiter-0.9.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:2764891d3f3e8b18dce2cff24949153ee30c9239da7c00f032511091ba688ff7", size = 308197, upload_time = "2025-03-10T21:36:03.828Z" }, + { url = "https://files.pythonhosted.org/packages/13/aa/7a890dfe29c84c9a82064a9fe36079c7c0309c91b70c380dc138f9bea44a/jiter-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:387b22fbfd7a62418d5212b4638026d01723761c75c1c8232a8b8c37c2f1003b", size = 318160, upload_time = "2025-03-10T21:36:05.281Z" }, + { url = "https://files.pythonhosted.org/packages/6a/38/5888b43fc01102f733f085673c4f0be5a298f69808ec63de55051754e390/jiter-0.9.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d8da8629ccae3606c61d9184970423655fb4e33d03330bcdfe52d234d32f69", size = 341259, upload_time = "2025-03-10T21:36:06.716Z" }, + { url = "https://files.pythonhosted.org/packages/3d/5e/bbdbb63305bcc01006de683b6228cd061458b9b7bb9b8d9bc348a58e5dc2/jiter-0.9.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1be73d8982bdc278b7b9377426a4b44ceb5c7952073dd7488e4ae96b88e1103", size = 363730, upload_time = "2025-03-10T21:36:08.138Z" }, + { url = "https://files.pythonhosted.org/packages/75/85/53a3edc616992fe4af6814c25f91ee3b1e22f7678e979b6ea82d3bc0667e/jiter-0.9.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2228eaaaa111ec54b9e89f7481bffb3972e9059301a878d085b2b449fbbde635", size = 405126, upload_time = "2025-03-10T21:36:10.934Z" }, + { url = "https://files.pythonhosted.org/packages/ae/b3/1ee26b12b2693bd3f0b71d3188e4e5d817b12e3c630a09e099e0a89e28fa/jiter-0.9.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:11509bfecbc319459647d4ac3fd391d26fdf530dad00c13c4dadabf5b81f01a4", size = 393668, upload_time = "2025-03-10T21:36:12.468Z" }, + { url = "https://files.pythonhosted.org/packages/11/87/e084ce261950c1861773ab534d49127d1517b629478304d328493f980791/jiter-0.9.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f22238da568be8bbd8e0650e12feeb2cfea15eda4f9fc271d3b362a4fa0604d", size = 352350, upload_time = "2025-03-10T21:36:14.148Z" }, + { url = "https://files.pythonhosted.org/packages/f0/06/7dca84b04987e9df563610aa0bc154ea176e50358af532ab40ffb87434df/jiter-0.9.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:17f5d55eb856597607562257c8e36c42bc87f16bef52ef7129b7da11afc779f3", size = 384204, upload_time = "2025-03-10T21:36:15.545Z" }, + { url = "https://files.pythonhosted.org/packages/16/2f/82e1c6020db72f397dd070eec0c85ebc4df7c88967bc86d3ce9864148f28/jiter-0.9.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:6a99bed9fbb02f5bed416d137944419a69aa4c423e44189bc49718859ea83bc5", size = 520322, upload_time = "2025-03-10T21:36:17.016Z" }, + { url = "https://files.pythonhosted.org/packages/36/fd/4f0cd3abe83ce208991ca61e7e5df915aa35b67f1c0633eb7cf2f2e88ec7/jiter-0.9.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e057adb0cd1bd39606100be0eafe742de2de88c79df632955b9ab53a086b3c8d", size = 512184, upload_time = "2025-03-10T21:36:18.47Z" }, + { url = "https://files.pythonhosted.org/packages/a0/3c/8a56f6d547731a0b4410a2d9d16bf39c861046f91f57c98f7cab3d2aa9ce/jiter-0.9.0-cp313-cp313-win32.whl", hash = "sha256:f7e6850991f3940f62d387ccfa54d1a92bd4bb9f89690b53aea36b4364bcab53", size = 206504, upload_time = "2025-03-10T21:36:19.809Z" }, + { url = "https://files.pythonhosted.org/packages/f4/1c/0c996fd90639acda75ed7fa698ee5fd7d80243057185dc2f63d4c1c9f6b9/jiter-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:c8ae3bf27cd1ac5e6e8b7a27487bf3ab5f82318211ec2e1346a5b058756361f7", size = 204943, upload_time = "2025-03-10T21:36:21.536Z" }, + { url = "https://files.pythonhosted.org/packages/78/0f/77a63ca7aa5fed9a1b9135af57e190d905bcd3702b36aca46a01090d39ad/jiter-0.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f0b2827fb88dda2cbecbbc3e596ef08d69bda06c6f57930aec8e79505dc17001", size = 317281, upload_time = "2025-03-10T21:36:22.959Z" }, + { url = "https://files.pythonhosted.org/packages/f9/39/a3a1571712c2bf6ec4c657f0d66da114a63a2e32b7e4eb8e0b83295ee034/jiter-0.9.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:062b756ceb1d40b0b28f326cba26cfd575a4918415b036464a52f08632731e5a", size = 350273, upload_time = "2025-03-10T21:36:24.414Z" }, + { url = "https://files.pythonhosted.org/packages/ee/47/3729f00f35a696e68da15d64eb9283c330e776f3b5789bac7f2c0c4df209/jiter-0.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6f7838bc467ab7e8ef9f387bd6de195c43bad82a569c1699cb822f6609dd4cdf", size = 206867, upload_time = "2025-03-10T21:36:25.843Z" }, +] + +[[package]] +name = "keras" +version = "2.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/03/80072f4ee46e3c77e95b06d684fadf90a67759e4e9f1d86a563e0965c71a/keras-2.15.0.tar.gz", hash = "sha256:81871d298c064dc4ac6b58440fdae67bfcf47c8d7ad28580fab401834c06a575", size = 1252015, upload_time = "2023-11-07T00:39:57.716Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/a7/0d4490de967a67f68a538cc9cdb259bff971c4b5787f7765dc7c8f118f71/keras-2.15.0-py3-none-any.whl", hash = "sha256:2dcc6d2e30cf9c951064b63c1f4c404b966c59caf09e01f3549138ec8ee0dd1f", size = 1710438, upload_time = "2023-11-07T00:39:55.57Z" }, +] + +[[package]] +name = "libclang" +version = "18.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6e/5c/ca35e19a4f142adffa27e3d652196b7362fa612243e2b916845d801454fc/libclang-18.1.1.tar.gz", hash = "sha256:a1214966d08d73d971287fc3ead8dfaf82eb07fb197680d8b3859dbbbbf78250", size = 39612, upload_time = "2024-03-17T16:04:37.434Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/49/f5e3e7e1419872b69f6f5e82ba56e33955a74bd537d8a1f5f1eff2f3668a/libclang-18.1.1-1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:0b2e143f0fac830156feb56f9231ff8338c20aecfe72b4ffe96f19e5a1dbb69a", size = 25836045, upload_time = "2024-06-30T17:40:31.646Z" }, + { url = "https://files.pythonhosted.org/packages/e2/e5/fc61bbded91a8830ccce94c5294ecd6e88e496cc85f6704bf350c0634b70/libclang-18.1.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:6f14c3f194704e5d09769108f03185fce7acaf1d1ae4bbb2f30a72c2400cb7c5", size = 26502641, upload_time = "2024-03-18T15:52:26.722Z" }, + { url = "https://files.pythonhosted.org/packages/db/ed/1df62b44db2583375f6a8a5e2ca5432bbdc3edb477942b9b7c848c720055/libclang-18.1.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:83ce5045d101b669ac38e6da8e58765f12da2d3aafb3b9b98d88b286a60964d8", size = 26420207, upload_time = "2024-03-17T15:00:26.63Z" }, + { url = "https://files.pythonhosted.org/packages/1d/fc/716c1e62e512ef1c160e7984a73a5fc7df45166f2ff3f254e71c58076f7c/libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl", hash = "sha256:c533091d8a3bbf7460a00cb6c1a71da93bffe148f172c7d03b1c31fbf8aa2a0b", size = 24515943, upload_time = "2024-03-17T16:03:45.942Z" }, + { url = "https://files.pythonhosted.org/packages/3c/3d/f0ac1150280d8d20d059608cf2d5ff61b7c3b7f7bcf9c0f425ab92df769a/libclang-18.1.1-py2.py3-none-manylinux2014_aarch64.whl", hash = "sha256:54dda940a4a0491a9d1532bf071ea3ef26e6dbaf03b5000ed94dd7174e8f9592", size = 23784972, upload_time = "2024-03-17T16:12:47.677Z" }, + { url = "https://files.pythonhosted.org/packages/fe/2f/d920822c2b1ce9326a4c78c0c2b4aa3fde610c7ee9f631b600acb5376c26/libclang-18.1.1-py2.py3-none-manylinux2014_armv7l.whl", hash = "sha256:cf4a99b05376513717ab5d82a0db832c56ccea4fd61a69dbb7bccf2dfb207dbe", size = 20259606, upload_time = "2024-03-17T16:17:42.437Z" }, + { url = "https://files.pythonhosted.org/packages/2d/c2/de1db8c6d413597076a4259cea409b83459b2db997c003578affdd32bf66/libclang-18.1.1-py2.py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:69f8eb8f65c279e765ffd28aaa7e9e364c776c17618af8bff22a8df58677ff4f", size = 24921494, upload_time = "2024-03-17T16:14:20.132Z" }, + { url = "https://files.pythonhosted.org/packages/0b/2d/3f480b1e1d31eb3d6de5e3ef641954e5c67430d5ac93b7fa7e07589576c7/libclang-18.1.1-py2.py3-none-win_amd64.whl", hash = "sha256:4dd2d3b82fab35e2bf9ca717d7b63ac990a3519c7e312f19fa8e86dcc712f7fb", size = 26415083, upload_time = "2024-03-17T16:42:21.703Z" }, + { url = "https://files.pythonhosted.org/packages/71/cf/e01dc4cc79779cd82d77888a88ae2fa424d93b445ad4f6c02bfc18335b70/libclang-18.1.1-py2.py3-none-win_arm64.whl", hash = "sha256:3f0e1f49f04d3cd198985fea0511576b0aee16f9ff0e0f0cad7f9c57ec3c20e8", size = 22361112, upload_time = "2024-03-17T16:42:59.565Z" }, +] + +[[package]] +name = "lxml" +version = "5.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/73/a6/0730ff6cbb87e42e1329a486fe4ccbd3f8f728cb629c2671b0d093a85918/lxml-5.1.1.tar.gz", hash = "sha256:42a8aa957e98bd8b884a8142175ec24ce4ef0a57760e8879f193bfe64b757ca9", size = 3838907, upload_time = "2024-03-29T06:46:52.155Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/01/977ac832ec441dbde7b373faef715d8f58c4052cc88ae01070be7f3d7907/lxml-5.1.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:906966babd374fdfe46e130fc656488003f0d0d63b7cba612aa5a796c8804283", size = 8756105, upload_time = "2024-03-29T06:43:08.757Z" }, + { url = "https://files.pythonhosted.org/packages/7e/0a/8ef5c87c72ba4d9a8765c829d1abc28c8482ade37735c7c2725221243d3d/lxml-5.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9c03f3715c68fc707d9383d56e482d95d198ba07cb3dad4aee9e5a5ca06b2536", size = 4751802, upload_time = "2024-03-29T06:43:13.165Z" }, + { url = "https://files.pythonhosted.org/packages/b5/2a/9096d632371ce48dafcd0459520c9afd60d3b26b6c00a5d3f8e93fdb089d/lxml-5.1.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d26243d994d4077a50056e9008848e5b421be0c6f0fd4e932a9463e1d89fc42b", size = 5202069, upload_time = "2024-03-29T06:43:16.426Z" }, + { url = "https://files.pythonhosted.org/packages/7e/03/dea246cbe3d959062751ec1aa031972e61680ae4a60c67df08bb1305b465/lxml-5.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de00750318ae6869b9dfa6429a4f82b8ecad043049414547474d09db549c2ee", size = 4921442, upload_time = "2024-03-29T06:43:19.842Z" }, + { url = "https://files.pythonhosted.org/packages/84/71/0d510fe3f99a8ddb776d7b803ed1f41b9eb64b30c5f945f241edf238adfa/lxml-5.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29b2771b4eec4e85063f10294facdd9829d010e6cc9668040d0cf936dc56733a", size = 5084186, upload_time = "2024-03-29T06:43:23.7Z" }, + { url = "https://files.pythonhosted.org/packages/de/c3/9fb0276ad05f3dc454d2f8165181039da4cbfb605f53816d7f34d5e93cca/lxml-5.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d9358f7268c161dc0a1c3216018f26c04954b5dd47ba6dead79da6598f4725d4", size = 4962146, upload_time = "2024-03-29T06:43:27.312Z" }, + { url = "https://files.pythonhosted.org/packages/eb/4f/533dd6ece9f4aa2c8455244c074f61facb23944271cc82bcceccc1eca8a1/lxml-5.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8a943826e7a9254eed661a7134fcde3c832a9fecd989d0f47c6e08c7b769cb2c", size = 5094316, upload_time = "2024-03-29T06:43:30.877Z" }, + { url = "https://files.pythonhosted.org/packages/f9/bd/62cc8a995bd34b1f44fc3706bab0c21bde489dc56482a5f4c9a6bb11ff65/lxml-5.1.1-cp311-cp311-win32.whl", hash = "sha256:74d0967c6f91eec6fe91159f9e8ccb3720fa0fbf9f462109c7bef62550df397c", size = 3560964, upload_time = "2024-03-29T06:43:34.609Z" }, + { url = "https://files.pythonhosted.org/packages/02/7e/af62091cc2c3096573458cec140a914b54f4b36892f549449cc556ed34cb/lxml-5.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:26974096654241df08a30dc2eb0e139c1ad5653660aa4b2ced66000230e96c14", size = 3909680, upload_time = "2024-03-29T06:43:38.221Z" }, + { url = "https://files.pythonhosted.org/packages/e3/0a/3901402aef812c57c27d1bb5405a29abb345fbd7e1b595d060bb065e46c6/lxml-5.1.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:55e13a19829dcdbf0c5233062977aeb6daf72e65124909128045976f659164e8", size = 8786323, upload_time = "2024-03-29T06:43:42.886Z" }, + { url = "https://files.pythonhosted.org/packages/04/92/74df36e8ccecdc96260531f0cbbf849ed25d3ff77a5655a3c89d588e982d/lxml-5.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:adedfb61be862f48907218e3a24bf051fd2ecca53358f3958b0bdb17d7881c20", size = 4764866, upload_time = "2024-03-29T06:43:46.476Z" }, + { url = "https://files.pythonhosted.org/packages/7f/b2/5dfbbec91014ffac561d51d4e3467587a646572f111fd7ddd076568d34c7/lxml-5.1.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:77425482e4311d1cff119a2b5ab26c52ec209d2a3d728a54db3223ab91995e20", size = 5153741, upload_time = "2024-03-29T06:43:50.444Z" }, + { url = "https://files.pythonhosted.org/packages/5b/cf/3da2e345dd19b509c9d269000f16888f4ef50f8ca742c268f8142a7e0b84/lxml-5.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d380f183bd03ab827899753ea96dabe27d2025eb0bfd4f2ac0eee4afa0f351d", size = 4853573, upload_time = "2024-03-29T06:43:53.449Z" }, + { url = "https://files.pythonhosted.org/packages/67/78/aad9c76bf995febcacd836e12ecc670c89737502ebe44f69c472918c8ffd/lxml-5.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8682af96b5ad5093aab9eee5e4ff24cb7a9796c78699d914dd456ebfe7484a6", size = 5035899, upload_time = "2024-03-29T06:43:57.342Z" }, + { url = "https://files.pythonhosted.org/packages/7e/88/d0cb086fb1b72fec96bb45aad1058ec31b9df3b146245747c0601490428b/lxml-5.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:68eed33377a9925aed7ba56c8611d50aaa1e45638c07a92b4b4b0a0436cc2dd2", size = 4896851, upload_time = "2024-03-29T06:44:00.471Z" }, + { url = "https://files.pythonhosted.org/packages/5d/69/8cb0a076851dcc5fa185042d3f19e61edb596d677280085873fd49043529/lxml-5.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c7c1d2f6e9c7a1c4478146ee38d16dbe0eb3be998424bc0f01346c671c38b86d", size = 5048250, upload_time = "2024-03-29T06:44:04.436Z" }, + { url = "https://files.pythonhosted.org/packages/3f/d7/a3d5f104c46231060d3e177ad946bf5c0bbc5652f960fbf2dedb66f0f9f7/lxml-5.1.1-cp312-cp312-win32.whl", hash = "sha256:81107c8de3e463052ae8fd05fd31b97c371c7a9ce4a189b8bb5f45b0b3545fb9", size = 3571032, upload_time = "2024-03-29T06:44:07.247Z" }, + { url = "https://files.pythonhosted.org/packages/d6/6b/a7c513c461b1448122d27faeb8f4b61150777816303a21fa6f9bb8be3266/lxml-5.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:0e46181d15fae102c53621bed9356b7a599a1e837b978c934a350dd00842b1d9", size = 3909299, upload_time = "2024-03-29T06:44:11.354Z" }, +] + +[[package]] +name = "mako" +version = "1.3.10" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/38/bd5b78a920a64d708fe6bc8e0a2c075e1389d53bef8413725c63ba041535/mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28", size = 392474, upload_time = "2025-04-10T12:44:31.16Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/fb/99f81ac72ae23375f22b7afdb7642aba97c00a713c217124420147681a2f/mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59", size = 78509, upload_time = "2025-04-10T12:50:53.297Z" }, +] + +[[package]] +name = "markdown" +version = "3.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2f/15/222b423b0b88689c266d9eac4e61396fe2cc53464459d6a37618ac863b24/markdown-3.8.tar.gz", hash = "sha256:7df81e63f0df5c4b24b7d156eb81e4690595239b7d70937d0409f1b0de319c6f", size = 360906, upload_time = "2025-04-11T14:42:50.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/3f/afe76f8e2246ffbc867440cbcf90525264df0e658f8a5ca1f872b3f6192a/markdown-3.8-py3-none-any.whl", hash = "sha256:794a929b79c5af141ef5ab0f2f642d0f7b1872981250230e72682346f7cc90dc", size = 106210, upload_time = "2025-04-11T14:42:49.178Z" }, +] + +[[package]] +name = "markdown-it-py" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload_time = "2023-06-03T06:41:14.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528, upload_time = "2023-06-03T06:41:11.019Z" }, +] + +[[package]] +name = "markupsafe" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537, upload_time = "2024-10-18T15:21:54.129Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/28/bbf83e3f76936960b850435576dd5e67034e200469571be53f69174a2dfd/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", size = 14353, upload_time = "2024-10-18T15:21:02.187Z" }, + { url = "https://files.pythonhosted.org/packages/6c/30/316d194b093cde57d448a4c3209f22e3046c5bb2fb0820b118292b334be7/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", size = 12392, upload_time = "2024-10-18T15:21:02.941Z" }, + { url = "https://files.pythonhosted.org/packages/f2/96/9cdafba8445d3a53cae530aaf83c38ec64c4d5427d975c974084af5bc5d2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", size = 23984, upload_time = "2024-10-18T15:21:03.953Z" }, + { url = "https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", size = 23120, upload_time = "2024-10-18T15:21:06.495Z" }, + { url = "https://files.pythonhosted.org/packages/8d/21/5e4851379f88f3fad1de30361db501300d4f07bcad047d3cb0449fc51f8c/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", size = 23032, upload_time = "2024-10-18T15:21:07.295Z" }, + { url = "https://files.pythonhosted.org/packages/00/7b/e92c64e079b2d0d7ddf69899c98842f3f9a60a1ae72657c89ce2655c999d/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", size = 24057, upload_time = "2024-10-18T15:21:08.073Z" }, + { url = "https://files.pythonhosted.org/packages/f9/ac/46f960ca323037caa0a10662ef97d0a4728e890334fc156b9f9e52bcc4ca/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", size = 23359, upload_time = "2024-10-18T15:21:09.318Z" }, + { url = "https://files.pythonhosted.org/packages/69/84/83439e16197337b8b14b6a5b9c2105fff81d42c2a7c5b58ac7b62ee2c3b1/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", size = 23306, upload_time = "2024-10-18T15:21:10.185Z" }, + { url = "https://files.pythonhosted.org/packages/9a/34/a15aa69f01e2181ed8d2b685c0d2f6655d5cca2c4db0ddea775e631918cd/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", size = 15094, upload_time = "2024-10-18T15:21:11.005Z" }, + { url = "https://files.pythonhosted.org/packages/da/b8/3a3bd761922d416f3dc5d00bfbed11f66b1ab89a0c2b6e887240a30b0f6b/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", size = 15521, upload_time = "2024-10-18T15:21:12.911Z" }, + { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274, upload_time = "2024-10-18T15:21:13.777Z" }, + { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348, upload_time = "2024-10-18T15:21:14.822Z" }, + { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149, upload_time = "2024-10-18T15:21:15.642Z" }, + { url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118, upload_time = "2024-10-18T15:21:17.133Z" }, + { url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993, upload_time = "2024-10-18T15:21:18.064Z" }, + { url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178, upload_time = "2024-10-18T15:21:18.859Z" }, + { url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319, upload_time = "2024-10-18T15:21:19.671Z" }, + { url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352, upload_time = "2024-10-18T15:21:20.971Z" }, + { url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097, upload_time = "2024-10-18T15:21:22.646Z" }, + { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601, upload_time = "2024-10-18T15:21:23.499Z" }, + { url = "https://files.pythonhosted.org/packages/83/0e/67eb10a7ecc77a0c2bbe2b0235765b98d164d81600746914bebada795e97/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", size = 14274, upload_time = "2024-10-18T15:21:24.577Z" }, + { url = "https://files.pythonhosted.org/packages/2b/6d/9409f3684d3335375d04e5f05744dfe7e9f120062c9857df4ab490a1031a/MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", size = 12352, upload_time = "2024-10-18T15:21:25.382Z" }, + { url = "https://files.pythonhosted.org/packages/d2/f5/6eadfcd3885ea85fe2a7c128315cc1bb7241e1987443d78c8fe712d03091/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", size = 24122, upload_time = "2024-10-18T15:21:26.199Z" }, + { url = "https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", size = 23085, upload_time = "2024-10-18T15:21:27.029Z" }, + { url = "https://files.pythonhosted.org/packages/c2/cf/c9d56af24d56ea04daae7ac0940232d31d5a8354f2b457c6d856b2057d69/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", size = 22978, upload_time = "2024-10-18T15:21:27.846Z" }, + { url = "https://files.pythonhosted.org/packages/2a/9f/8619835cd6a711d6272d62abb78c033bda638fdc54c4e7f4272cf1c0962b/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", size = 24208, upload_time = "2024-10-18T15:21:28.744Z" }, + { url = "https://files.pythonhosted.org/packages/f9/bf/176950a1792b2cd2102b8ffeb5133e1ed984547b75db47c25a67d3359f77/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", size = 23357, upload_time = "2024-10-18T15:21:29.545Z" }, + { url = "https://files.pythonhosted.org/packages/ce/4f/9a02c1d335caabe5c4efb90e1b6e8ee944aa245c1aaaab8e8a618987d816/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", size = 23344, upload_time = "2024-10-18T15:21:30.366Z" }, + { url = "https://files.pythonhosted.org/packages/ee/55/c271b57db36f748f0e04a759ace9f8f759ccf22b4960c270c78a394f58be/MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", size = 15101, upload_time = "2024-10-18T15:21:31.207Z" }, + { url = "https://files.pythonhosted.org/packages/29/88/07df22d2dd4df40aba9f3e402e6dc1b8ee86297dddbad4872bd5e7b0094f/MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", size = 15603, upload_time = "2024-10-18T15:21:32.032Z" }, + { url = "https://files.pythonhosted.org/packages/62/6a/8b89d24db2d32d433dffcd6a8779159da109842434f1dd2f6e71f32f738c/MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", size = 14510, upload_time = "2024-10-18T15:21:33.625Z" }, + { url = "https://files.pythonhosted.org/packages/7a/06/a10f955f70a2e5a9bf78d11a161029d278eeacbd35ef806c3fd17b13060d/MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", size = 12486, upload_time = "2024-10-18T15:21:34.611Z" }, + { url = "https://files.pythonhosted.org/packages/34/cf/65d4a571869a1a9078198ca28f39fba5fbb910f952f9dbc5220afff9f5e6/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", size = 25480, upload_time = "2024-10-18T15:21:35.398Z" }, + { url = "https://files.pythonhosted.org/packages/0c/e3/90e9651924c430b885468b56b3d597cabf6d72be4b24a0acd1fa0e12af67/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", size = 23914, upload_time = "2024-10-18T15:21:36.231Z" }, + { url = "https://files.pythonhosted.org/packages/66/8c/6c7cf61f95d63bb866db39085150df1f2a5bd3335298f14a66b48e92659c/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", size = 23796, upload_time = "2024-10-18T15:21:37.073Z" }, + { url = "https://files.pythonhosted.org/packages/bb/35/cbe9238ec3f47ac9a7c8b3df7a808e7cb50fe149dc7039f5f454b3fba218/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", size = 25473, upload_time = "2024-10-18T15:21:37.932Z" }, + { url = "https://files.pythonhosted.org/packages/e6/32/7621a4382488aa283cc05e8984a9c219abad3bca087be9ec77e89939ded9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", size = 24114, upload_time = "2024-10-18T15:21:39.799Z" }, + { url = "https://files.pythonhosted.org/packages/0d/80/0985960e4b89922cb5a0bac0ed39c5b96cbc1a536a99f30e8c220a996ed9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", size = 24098, upload_time = "2024-10-18T15:21:40.813Z" }, + { url = "https://files.pythonhosted.org/packages/82/78/fedb03c7d5380df2427038ec8d973587e90561b2d90cd472ce9254cf348b/MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", size = 15208, upload_time = "2024-10-18T15:21:41.814Z" }, + { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739, upload_time = "2024-10-18T15:21:42.784Z" }, +] + +[[package]] +name = "marshmallow" +version = "3.23.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fd/c0/d674c9de69227beafa41e1601b0c48b8b51060212abc231d4332e4b1e794/marshmallow-3.23.3.tar.gz", hash = "sha256:d586c8685ebdb80bf754e1f96e3f305aaf30951f1fc69175b977453633467e76", size = 175606, upload_time = "2025-01-03T20:18:41.52Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/af/82/d8c37cc92948ce11e5d8d71602bbac7ac4257f9e1f918fd91b1ddac4ec97/marshmallow-3.23.3-py3-none-any.whl", hash = "sha256:20c0f8c613f68bcb45b2a0d3282e2f172575560170bf220d67aafb42717910e4", size = 48911, upload_time = "2025-01-03T20:18:39.62Z" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload_time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload_time = "2022-08-14T12:40:09.779Z" }, +] + +[[package]] +name = "ml-dtypes" +version = "0.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/39/7d/8d85fcba868758b3a546e6914e727abd8f29ea6918079f816975c9eecd63/ml_dtypes-0.3.2.tar.gz", hash = "sha256:533059bc5f1764fac071ef54598db358c167c51a718f68f5bb55e3dee79d2967", size = 692014, upload_time = "2024-01-03T19:21:23.615Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/a4/6aabb78f1569550fd77c74d2c1d008b502c8ce72776bd88b14ea6c182c9e/ml_dtypes-0.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:763697ab8a88d47443997a7cdf3aac7340049aed45f7521f6b0ec8a0594821fe", size = 389791, upload_time = "2024-01-03T19:21:02.844Z" }, + { url = "https://files.pythonhosted.org/packages/d1/ed/211bf2e1c66e4ec9b712c3be848a876185c7f0d5e94bf647b60e64ef32eb/ml_dtypes-0.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b89b194e9501a92d289c1ffd411380baf5daafb9818109a4f49b0a1b6dce4462", size = 2185796, upload_time = "2024-01-03T19:21:04.291Z" }, + { url = "https://files.pythonhosted.org/packages/77/a0/d4ee9e3aca5b9101c590b58555820618e8201c2ccb7004eabb417ec046ac/ml_dtypes-0.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c34f2ba9660b21fe1034b608308a01be82bbef2a92fb8199f24dc6bad0d5226", size = 2164071, upload_time = "2024-01-03T19:21:05.78Z" }, + { url = "https://files.pythonhosted.org/packages/a4/db/1784b87285588788170f87e987bfb4bda218d62a70a81ebb66c94e7f9b95/ml_dtypes-0.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:6604877d567a29bfe7cc02969ae0f2425260e5335505cf5e7fefc3e5465f5655", size = 127681, upload_time = "2024-01-03T19:21:07.337Z" }, + { url = "https://files.pythonhosted.org/packages/ad/2d/57a8aa1ba7472a93a675bfba3f0c90d9396d01d040617a5345ce87884330/ml_dtypes-0.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:93b78f53431c93953f7850bb1b925a17f0ab5d97527e38a7e865b5b4bc5cfc18", size = 393571, upload_time = "2024-01-03T19:21:08.836Z" }, + { url = "https://files.pythonhosted.org/packages/6a/05/ec30199c791cf0d788a26f56d8efb8ee4133ede79a9680fd8cc05e706404/ml_dtypes-0.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a17ef2322e60858d93584e9c52a5be7dd6236b056b7fa1ec57f1bb6ba043e33", size = 2180925, upload_time = "2024-01-03T19:21:10.87Z" }, + { url = "https://files.pythonhosted.org/packages/e5/f1/93219c44bae4017e6e43391fa4433592de08e05def9d885227d3596f21a5/ml_dtypes-0.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8505946df1665db01332d885c2020b4cb9e84a8b1241eb4ba69d59591f65855", size = 2160573, upload_time = "2024-01-03T19:21:12.775Z" }, + { url = "https://files.pythonhosted.org/packages/47/f3/847da54c3d243ff2aa778078ecf09da199194d282744718ef325dd8afd41/ml_dtypes-0.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:f47619d978ab1ae7dfdc4052ea97c636c6263e1f19bd1be0e42c346b98d15ff4", size = 128649, upload_time = "2024-01-03T19:21:14.312Z" }, +] + +[[package]] +name = "multidict" +version = "6.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/da/2c/e367dfb4c6538614a0c9453e510d75d66099edf1c4e69da1b5ce691a1931/multidict-6.4.3.tar.gz", hash = "sha256:3ada0b058c9f213c5f95ba301f922d402ac234f1111a7d8fd70f1b99f3c281ec", size = 89372, upload_time = "2025-04-10T22:20:17.956Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/e0/53cf7f27eda48fffa53cfd4502329ed29e00efb9e4ce41362cbf8aa54310/multidict-6.4.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f6f19170197cc29baccd33ccc5b5d6a331058796485857cf34f7635aa25fb0cd", size = 65259, upload_time = "2025-04-10T22:17:59.632Z" }, + { url = "https://files.pythonhosted.org/packages/44/79/1dcd93ce7070cf01c2ee29f781c42b33c64fce20033808f1cc9ec8413d6e/multidict-6.4.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f2882bf27037eb687e49591690e5d491e677272964f9ec7bc2abbe09108bdfb8", size = 38451, upload_time = "2025-04-10T22:18:01.202Z" }, + { url = "https://files.pythonhosted.org/packages/f4/35/2292cf29ab5f0d0b3613fad1b75692148959d3834d806be1885ceb49a8ff/multidict-6.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fbf226ac85f7d6b6b9ba77db4ec0704fde88463dc17717aec78ec3c8546c70ad", size = 37706, upload_time = "2025-04-10T22:18:02.276Z" }, + { url = "https://files.pythonhosted.org/packages/f6/d1/6b157110b2b187b5a608b37714acb15ee89ec773e3800315b0107ea648cd/multidict-6.4.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e329114f82ad4b9dd291bef614ea8971ec119ecd0f54795109976de75c9a852", size = 226669, upload_time = "2025-04-10T22:18:03.436Z" }, + { url = "https://files.pythonhosted.org/packages/40/7f/61a476450651f177c5570e04bd55947f693077ba7804fe9717ee9ae8de04/multidict-6.4.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:1f4e0334d7a555c63f5c8952c57ab6f1c7b4f8c7f3442df689fc9f03df315c08", size = 223182, upload_time = "2025-04-10T22:18:04.922Z" }, + { url = "https://files.pythonhosted.org/packages/51/7b/eaf7502ac4824cdd8edcf5723e2e99f390c879866aec7b0c420267b53749/multidict-6.4.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:740915eb776617b57142ce0bb13b7596933496e2f798d3d15a20614adf30d229", size = 235025, upload_time = "2025-04-10T22:18:06.274Z" }, + { url = "https://files.pythonhosted.org/packages/3b/f6/facdbbd73c96b67a93652774edd5778ab1167854fa08ea35ad004b1b70ad/multidict-6.4.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:255dac25134d2b141c944b59a0d2f7211ca12a6d4779f7586a98b4b03ea80508", size = 231481, upload_time = "2025-04-10T22:18:07.742Z" }, + { url = "https://files.pythonhosted.org/packages/70/57/c008e861b3052405eebf921fd56a748322d8c44dcfcab164fffbccbdcdc4/multidict-6.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4e8535bd4d741039b5aad4285ecd9b902ef9e224711f0b6afda6e38d7ac02c7", size = 223492, upload_time = "2025-04-10T22:18:09.095Z" }, + { url = "https://files.pythonhosted.org/packages/30/4d/7d8440d3a12a6ae5d6b202d6e7f2ac6ab026e04e99aaf1b73f18e6bc34bc/multidict-6.4.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30c433a33be000dd968f5750722eaa0991037be0be4a9d453eba121774985bc8", size = 217279, upload_time = "2025-04-10T22:18:10.474Z" }, + { url = "https://files.pythonhosted.org/packages/7f/e7/bca0df4dd057597b94138d2d8af04eb3c27396a425b1b0a52e082f9be621/multidict-6.4.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4eb33b0bdc50acd538f45041f5f19945a1f32b909b76d7b117c0c25d8063df56", size = 228733, upload_time = "2025-04-10T22:18:11.793Z" }, + { url = "https://files.pythonhosted.org/packages/88/f5/383827c3f1c38d7c92dbad00a8a041760228573b1c542fbf245c37bbca8a/multidict-6.4.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:75482f43465edefd8a5d72724887ccdcd0c83778ded8f0cb1e0594bf71736cc0", size = 218089, upload_time = "2025-04-10T22:18:13.153Z" }, + { url = "https://files.pythonhosted.org/packages/36/8a/a5174e8a7d8b94b4c8f9c1e2cf5d07451f41368ffe94d05fc957215b8e72/multidict-6.4.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ce5b3082e86aee80b3925ab4928198450d8e5b6466e11501fe03ad2191c6d777", size = 225257, upload_time = "2025-04-10T22:18:14.654Z" }, + { url = "https://files.pythonhosted.org/packages/8c/76/1d4b7218f0fd00b8e5c90b88df2e45f8af127f652f4e41add947fa54c1c4/multidict-6.4.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e413152e3212c4d39f82cf83c6f91be44bec9ddea950ce17af87fbf4e32ca6b2", size = 234728, upload_time = "2025-04-10T22:18:16.236Z" }, + { url = "https://files.pythonhosted.org/packages/64/44/18372a4f6273fc7ca25630d7bf9ae288cde64f29593a078bff450c7170b6/multidict-6.4.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:8aac2eeff69b71f229a405c0a4b61b54bade8e10163bc7b44fcd257949620618", size = 230087, upload_time = "2025-04-10T22:18:17.979Z" }, + { url = "https://files.pythonhosted.org/packages/0f/ae/28728c314a698d8a6d9491fcacc897077348ec28dd85884d09e64df8a855/multidict-6.4.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ab583ac203af1d09034be41458feeab7863c0635c650a16f15771e1386abf2d7", size = 223137, upload_time = "2025-04-10T22:18:19.362Z" }, + { url = "https://files.pythonhosted.org/packages/22/50/785bb2b3fe16051bc91c70a06a919f26312da45c34db97fc87441d61e343/multidict-6.4.3-cp311-cp311-win32.whl", hash = "sha256:1b2019317726f41e81154df636a897de1bfe9228c3724a433894e44cd2512378", size = 34959, upload_time = "2025-04-10T22:18:20.728Z" }, + { url = "https://files.pythonhosted.org/packages/2f/63/2a22e099ae2f4d92897618c00c73a09a08a2a9aa14b12736965bf8d59fd3/multidict-6.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:43173924fa93c7486402217fab99b60baf78d33806af299c56133a3755f69589", size = 38541, upload_time = "2025-04-10T22:18:22.001Z" }, + { url = "https://files.pythonhosted.org/packages/fc/bb/3abdaf8fe40e9226ce8a2ba5ecf332461f7beec478a455d6587159f1bf92/multidict-6.4.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1f1c2f58f08b36f8475f3ec6f5aeb95270921d418bf18f90dffd6be5c7b0e676", size = 64019, upload_time = "2025-04-10T22:18:23.174Z" }, + { url = "https://files.pythonhosted.org/packages/7e/b5/1b2e8de8217d2e89db156625aa0fe4a6faad98972bfe07a7b8c10ef5dd6b/multidict-6.4.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:26ae9ad364fc61b936fb7bf4c9d8bd53f3a5b4417142cd0be5c509d6f767e2f1", size = 37925, upload_time = "2025-04-10T22:18:24.834Z" }, + { url = "https://files.pythonhosted.org/packages/b4/e2/3ca91c112644a395c8eae017144c907d173ea910c913ff8b62549dcf0bbf/multidict-6.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:659318c6c8a85f6ecfc06b4e57529e5a78dfdd697260cc81f683492ad7e9435a", size = 37008, upload_time = "2025-04-10T22:18:26.069Z" }, + { url = "https://files.pythonhosted.org/packages/60/23/79bc78146c7ac8d1ac766b2770ca2e07c2816058b8a3d5da6caed8148637/multidict-6.4.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1eb72c741fd24d5a28242ce72bb61bc91f8451877131fa3fe930edb195f7054", size = 224374, upload_time = "2025-04-10T22:18:27.714Z" }, + { url = "https://files.pythonhosted.org/packages/86/35/77950ed9ebd09136003a85c1926ba42001ca5be14feb49710e4334ee199b/multidict-6.4.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3cd06d88cb7398252284ee75c8db8e680aa0d321451132d0dba12bc995f0adcc", size = 230869, upload_time = "2025-04-10T22:18:29.162Z" }, + { url = "https://files.pythonhosted.org/packages/49/97/2a33c6e7d90bc116c636c14b2abab93d6521c0c052d24bfcc231cbf7f0e7/multidict-6.4.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4543d8dc6470a82fde92b035a92529317191ce993533c3c0c68f56811164ed07", size = 231949, upload_time = "2025-04-10T22:18:30.679Z" }, + { url = "https://files.pythonhosted.org/packages/56/ce/e9b5d9fcf854f61d6686ada7ff64893a7a5523b2a07da6f1265eaaea5151/multidict-6.4.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:30a3ebdc068c27e9d6081fca0e2c33fdf132ecea703a72ea216b81a66860adde", size = 231032, upload_time = "2025-04-10T22:18:32.146Z" }, + { url = "https://files.pythonhosted.org/packages/f0/ac/7ced59dcdfeddd03e601edb05adff0c66d81ed4a5160c443e44f2379eef0/multidict-6.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b038f10e23f277153f86f95c777ba1958bcd5993194fda26a1d06fae98b2f00c", size = 223517, upload_time = "2025-04-10T22:18:33.538Z" }, + { url = "https://files.pythonhosted.org/packages/db/e6/325ed9055ae4e085315193a1b58bdb4d7fc38ffcc1f4975cfca97d015e17/multidict-6.4.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c605a2b2dc14282b580454b9b5d14ebe0668381a3a26d0ac39daa0ca115eb2ae", size = 216291, upload_time = "2025-04-10T22:18:34.962Z" }, + { url = "https://files.pythonhosted.org/packages/fa/84/eeee6d477dd9dcb7691c3bb9d08df56017f5dd15c730bcc9383dcf201cf4/multidict-6.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8bd2b875f4ca2bb527fe23e318ddd509b7df163407b0fb717df229041c6df5d3", size = 228982, upload_time = "2025-04-10T22:18:36.443Z" }, + { url = "https://files.pythonhosted.org/packages/82/94/4d1f3e74e7acf8b0c85db350e012dcc61701cd6668bc2440bb1ecb423c90/multidict-6.4.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c2e98c840c9c8e65c0e04b40c6c5066c8632678cd50c8721fdbcd2e09f21a507", size = 226823, upload_time = "2025-04-10T22:18:37.924Z" }, + { url = "https://files.pythonhosted.org/packages/09/f0/1e54b95bda7cd01080e5732f9abb7b76ab5cc795b66605877caeb2197476/multidict-6.4.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:66eb80dd0ab36dbd559635e62fba3083a48a252633164857a1d1684f14326427", size = 222714, upload_time = "2025-04-10T22:18:39.807Z" }, + { url = "https://files.pythonhosted.org/packages/e7/a2/f6cbca875195bd65a3e53b37ab46486f3cc125bdeab20eefe5042afa31fb/multidict-6.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c23831bdee0a2a3cf21be057b5e5326292f60472fb6c6f86392bbf0de70ba731", size = 233739, upload_time = "2025-04-10T22:18:41.341Z" }, + { url = "https://files.pythonhosted.org/packages/79/68/9891f4d2b8569554723ddd6154375295f789dc65809826c6fb96a06314fd/multidict-6.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:1535cec6443bfd80d028052e9d17ba6ff8a5a3534c51d285ba56c18af97e9713", size = 230809, upload_time = "2025-04-10T22:18:42.817Z" }, + { url = "https://files.pythonhosted.org/packages/e6/72/a7be29ba1e87e4fc5ceb44dabc7940b8005fd2436a332a23547709315f70/multidict-6.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3b73e7227681f85d19dec46e5b881827cd354aabe46049e1a61d2f9aaa4e285a", size = 226934, upload_time = "2025-04-10T22:18:44.311Z" }, + { url = "https://files.pythonhosted.org/packages/12/c1/259386a9ad6840ff7afc686da96808b503d152ac4feb3a96c651dc4f5abf/multidict-6.4.3-cp312-cp312-win32.whl", hash = "sha256:8eac0c49df91b88bf91f818e0a24c1c46f3622978e2c27035bfdca98e0e18124", size = 35242, upload_time = "2025-04-10T22:18:46.193Z" }, + { url = "https://files.pythonhosted.org/packages/06/24/c8fdff4f924d37225dc0c56a28b1dca10728fc2233065fafeb27b4b125be/multidict-6.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:11990b5c757d956cd1db7cb140be50a63216af32cd6506329c2c59d732d802db", size = 38635, upload_time = "2025-04-10T22:18:47.498Z" }, + { url = "https://files.pythonhosted.org/packages/6c/4b/86fd786d03915c6f49998cf10cd5fe6b6ac9e9a071cb40885d2e080fb90d/multidict-6.4.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7a76534263d03ae0cfa721fea40fd2b5b9d17a6f85e98025931d41dc49504474", size = 63831, upload_time = "2025-04-10T22:18:48.748Z" }, + { url = "https://files.pythonhosted.org/packages/45/05/9b51fdf7aef2563340a93be0a663acba2c428c4daeaf3960d92d53a4a930/multidict-6.4.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:805031c2f599eee62ac579843555ed1ce389ae00c7e9f74c2a1b45e0564a88dd", size = 37888, upload_time = "2025-04-10T22:18:50.021Z" }, + { url = "https://files.pythonhosted.org/packages/0b/43/53fc25394386c911822419b522181227ca450cf57fea76e6188772a1bd91/multidict-6.4.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c56c179839d5dcf51d565132185409d1d5dd8e614ba501eb79023a6cab25576b", size = 36852, upload_time = "2025-04-10T22:18:51.246Z" }, + { url = "https://files.pythonhosted.org/packages/8a/68/7b99c751e822467c94a235b810a2fd4047d4ecb91caef6b5c60116991c4b/multidict-6.4.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c64f4ddb3886dd8ab71b68a7431ad4aa01a8fa5be5b11543b29674f29ca0ba3", size = 223644, upload_time = "2025-04-10T22:18:52.965Z" }, + { url = "https://files.pythonhosted.org/packages/80/1b/d458d791e4dd0f7e92596667784fbf99e5c8ba040affe1ca04f06b93ae92/multidict-6.4.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3002a856367c0b41cad6784f5b8d3ab008eda194ed7864aaa58f65312e2abcac", size = 230446, upload_time = "2025-04-10T22:18:54.509Z" }, + { url = "https://files.pythonhosted.org/packages/e2/46/9793378d988905491a7806d8987862dc5a0bae8a622dd896c4008c7b226b/multidict-6.4.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3d75e621e7d887d539d6e1d789f0c64271c250276c333480a9e1de089611f790", size = 231070, upload_time = "2025-04-10T22:18:56.019Z" }, + { url = "https://files.pythonhosted.org/packages/a7/b8/b127d3e1f8dd2a5bf286b47b24567ae6363017292dc6dec44656e6246498/multidict-6.4.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:995015cf4a3c0d72cbf453b10a999b92c5629eaf3a0c3e1efb4b5c1f602253bb", size = 229956, upload_time = "2025-04-10T22:18:59.146Z" }, + { url = "https://files.pythonhosted.org/packages/0c/93/f70a4c35b103fcfe1443059a2bb7f66e5c35f2aea7804105ff214f566009/multidict-6.4.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2b0fabae7939d09d7d16a711468c385272fa1b9b7fb0d37e51143585d8e72e0", size = 222599, upload_time = "2025-04-10T22:19:00.657Z" }, + { url = "https://files.pythonhosted.org/packages/63/8c/e28e0eb2fe34921d6aa32bfc4ac75b09570b4d6818cc95d25499fe08dc1d/multidict-6.4.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:61ed4d82f8a1e67eb9eb04f8587970d78fe7cddb4e4d6230b77eda23d27938f9", size = 216136, upload_time = "2025-04-10T22:19:02.244Z" }, + { url = "https://files.pythonhosted.org/packages/72/f5/fbc81f866585b05f89f99d108be5d6ad170e3b6c4d0723d1a2f6ba5fa918/multidict-6.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:062428944a8dc69df9fdc5d5fc6279421e5f9c75a9ee3f586f274ba7b05ab3c8", size = 228139, upload_time = "2025-04-10T22:19:04.151Z" }, + { url = "https://files.pythonhosted.org/packages/bb/ba/7d196bad6b85af2307d81f6979c36ed9665f49626f66d883d6c64d156f78/multidict-6.4.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:b90e27b4674e6c405ad6c64e515a505c6d113b832df52fdacb6b1ffd1fa9a1d1", size = 226251, upload_time = "2025-04-10T22:19:06.117Z" }, + { url = "https://files.pythonhosted.org/packages/cc/e2/fae46a370dce79d08b672422a33df721ec8b80105e0ea8d87215ff6b090d/multidict-6.4.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7d50d4abf6729921e9613d98344b74241572b751c6b37feed75fb0c37bd5a817", size = 221868, upload_time = "2025-04-10T22:19:07.981Z" }, + { url = "https://files.pythonhosted.org/packages/26/20/bbc9a3dec19d5492f54a167f08546656e7aef75d181d3d82541463450e88/multidict-6.4.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:43fe10524fb0a0514be3954be53258e61d87341008ce4914f8e8b92bee6f875d", size = 233106, upload_time = "2025-04-10T22:19:09.5Z" }, + { url = "https://files.pythonhosted.org/packages/ee/8d/f30ae8f5ff7a2461177f4d8eb0d8f69f27fb6cfe276b54ec4fd5a282d918/multidict-6.4.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:236966ca6c472ea4e2d3f02f6673ebfd36ba3f23159c323f5a496869bc8e47c9", size = 230163, upload_time = "2025-04-10T22:19:11Z" }, + { url = "https://files.pythonhosted.org/packages/15/e9/2833f3c218d3c2179f3093f766940ded6b81a49d2e2f9c46ab240d23dfec/multidict-6.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:422a5ec315018e606473ba1f5431e064cf8b2a7468019233dcf8082fabad64c8", size = 225906, upload_time = "2025-04-10T22:19:12.875Z" }, + { url = "https://files.pythonhosted.org/packages/f1/31/6edab296ac369fd286b845fa5dd4c409e63bc4655ed8c9510fcb477e9ae9/multidict-6.4.3-cp313-cp313-win32.whl", hash = "sha256:f901a5aace8e8c25d78960dcc24c870c8d356660d3b49b93a78bf38eb682aac3", size = 35238, upload_time = "2025-04-10T22:19:14.41Z" }, + { url = "https://files.pythonhosted.org/packages/23/57/2c0167a1bffa30d9a1383c3dab99d8caae985defc8636934b5668830d2ef/multidict-6.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:1c152c49e42277bc9a2f7b78bd5fa10b13e88d1b0328221e7aef89d5c60a99a5", size = 38799, upload_time = "2025-04-10T22:19:15.869Z" }, + { url = "https://files.pythonhosted.org/packages/c9/13/2ead63b9ab0d2b3080819268acb297bd66e238070aa8d42af12b08cbee1c/multidict-6.4.3-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:be8751869e28b9c0d368d94f5afcb4234db66fe8496144547b4b6d6a0645cfc6", size = 68642, upload_time = "2025-04-10T22:19:17.527Z" }, + { url = "https://files.pythonhosted.org/packages/85/45/f1a751e1eede30c23951e2ae274ce8fad738e8a3d5714be73e0a41b27b16/multidict-6.4.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0d4b31f8a68dccbcd2c0ea04f0e014f1defc6b78f0eb8b35f2265e8716a6df0c", size = 40028, upload_time = "2025-04-10T22:19:19.465Z" }, + { url = "https://files.pythonhosted.org/packages/a7/29/fcc53e886a2cc5595cc4560df333cb9630257bda65003a7eb4e4e0d8f9c1/multidict-6.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:032efeab3049e37eef2ff91271884303becc9e54d740b492a93b7e7266e23756", size = 39424, upload_time = "2025-04-10T22:19:20.762Z" }, + { url = "https://files.pythonhosted.org/packages/f6/f0/056c81119d8b88703971f937b371795cab1407cd3c751482de5bfe1a04a9/multidict-6.4.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9e78006af1a7c8a8007e4f56629d7252668344442f66982368ac06522445e375", size = 226178, upload_time = "2025-04-10T22:19:22.17Z" }, + { url = "https://files.pythonhosted.org/packages/a3/79/3b7e5fea0aa80583d3a69c9d98b7913dfd4fbc341fb10bb2fb48d35a9c21/multidict-6.4.3-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:daeac9dd30cda8703c417e4fddccd7c4dc0c73421a0b54a7da2713be125846be", size = 222617, upload_time = "2025-04-10T22:19:23.773Z" }, + { url = "https://files.pythonhosted.org/packages/06/db/3ed012b163e376fc461e1d6a67de69b408339bc31dc83d39ae9ec3bf9578/multidict-6.4.3-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f6f90700881438953eae443a9c6f8a509808bc3b185246992c4233ccee37fea", size = 227919, upload_time = "2025-04-10T22:19:25.35Z" }, + { url = "https://files.pythonhosted.org/packages/b1/db/0433c104bca380989bc04d3b841fc83e95ce0c89f680e9ea4251118b52b6/multidict-6.4.3-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f84627997008390dd15762128dcf73c3365f4ec0106739cde6c20a07ed198ec8", size = 226097, upload_time = "2025-04-10T22:19:27.183Z" }, + { url = "https://files.pythonhosted.org/packages/c2/95/910db2618175724dd254b7ae635b6cd8d2947a8b76b0376de7b96d814dab/multidict-6.4.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3307b48cd156153b117c0ea54890a3bdbf858a5b296ddd40dc3852e5f16e9b02", size = 220706, upload_time = "2025-04-10T22:19:28.882Z" }, + { url = "https://files.pythonhosted.org/packages/d1/af/aa176c6f5f1d901aac957d5258d5e22897fe13948d1e69063ae3d5d0ca01/multidict-6.4.3-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ead46b0fa1dcf5af503a46e9f1c2e80b5d95c6011526352fa5f42ea201526124", size = 211728, upload_time = "2025-04-10T22:19:30.481Z" }, + { url = "https://files.pythonhosted.org/packages/e7/42/d51cc5fc1527c3717d7f85137d6c79bb7a93cd214c26f1fc57523774dbb5/multidict-6.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1748cb2743bedc339d63eb1bca314061568793acd603a6e37b09a326334c9f44", size = 226276, upload_time = "2025-04-10T22:19:32.454Z" }, + { url = "https://files.pythonhosted.org/packages/28/6b/d836dea45e0b8432343ba4acf9a8ecaa245da4c0960fb7ab45088a5e568a/multidict-6.4.3-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:acc9fa606f76fc111b4569348cc23a771cb52c61516dcc6bcef46d612edb483b", size = 212069, upload_time = "2025-04-10T22:19:34.17Z" }, + { url = "https://files.pythonhosted.org/packages/55/34/0ee1a7adb3560e18ee9289c6e5f7db54edc312b13e5c8263e88ea373d12c/multidict-6.4.3-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:31469d5832b5885adeb70982e531ce86f8c992334edd2f2254a10fa3182ac504", size = 217858, upload_time = "2025-04-10T22:19:35.879Z" }, + { url = "https://files.pythonhosted.org/packages/04/08/586d652c2f5acefe0cf4e658eedb4d71d4ba6dfd4f189bd81b400fc1bc6b/multidict-6.4.3-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:ba46b51b6e51b4ef7bfb84b82f5db0dc5e300fb222a8a13b8cd4111898a869cf", size = 226988, upload_time = "2025-04-10T22:19:37.434Z" }, + { url = "https://files.pythonhosted.org/packages/82/e3/cc59c7e2bc49d7f906fb4ffb6d9c3a3cf21b9f2dd9c96d05bef89c2b1fd1/multidict-6.4.3-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:389cfefb599edf3fcfd5f64c0410da686f90f5f5e2c4d84e14f6797a5a337af4", size = 220435, upload_time = "2025-04-10T22:19:39.005Z" }, + { url = "https://files.pythonhosted.org/packages/e0/32/5c3a556118aca9981d883f38c4b1bfae646f3627157f70f4068e5a648955/multidict-6.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:64bc2bbc5fba7b9db5c2c8d750824f41c6994e3882e6d73c903c2afa78d091e4", size = 221494, upload_time = "2025-04-10T22:19:41.447Z" }, + { url = "https://files.pythonhosted.org/packages/b9/3b/1599631f59024b75c4d6e3069f4502409970a336647502aaf6b62fb7ac98/multidict-6.4.3-cp313-cp313t-win32.whl", hash = "sha256:0ecdc12ea44bab2807d6b4a7e5eef25109ab1c82a8240d86d3c1fc9f3b72efd5", size = 41775, upload_time = "2025-04-10T22:19:43.707Z" }, + { url = "https://files.pythonhosted.org/packages/e8/4e/09301668d675d02ca8e8e1a3e6be046619e30403f5ada2ed5b080ae28d02/multidict-6.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:7146a8742ea71b5d7d955bffcef58a9e6e04efba704b52a460134fefd10a8208", size = 45946, upload_time = "2025-04-10T22:19:45.071Z" }, + { url = "https://files.pythonhosted.org/packages/96/10/7d526c8974f017f1e7ca584c71ee62a638e9334d8d33f27d7cdfc9ae79e4/multidict-6.4.3-py3-none-any.whl", hash = "sha256:59fe01ee8e2a1e8ceb3f6dbb216b09c8d9f4ef1c22c4fc825d045a147fa2ebc9", size = 10400, upload_time = "2025-04-10T22:20:16.445Z" }, +] + +[[package]] +name = "multiprocess" +version = "0.70.16" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dill" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b5/ae/04f39c5d0d0def03247c2893d6f2b83c136bf3320a2154d7b8858f2ba72d/multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1", size = 1772603, upload_time = "2024-01-28T18:52:34.85Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/f7/7ec7fddc92e50714ea3745631f79bd9c96424cb2702632521028e57d3a36/multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02", size = 134824, upload_time = "2024-01-28T18:52:26.062Z" }, + { url = "https://files.pythonhosted.org/packages/50/15/b56e50e8debaf439f44befec5b2af11db85f6e0f344c3113ae0be0593a91/multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a", size = 143519, upload_time = "2024-01-28T18:52:28.115Z" }, + { url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741, upload_time = "2024-01-28T18:52:29.395Z" }, + { url = "https://files.pythonhosted.org/packages/ea/89/38df130f2c799090c978b366cfdf5b96d08de5b29a4a293df7f7429fa50b/multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", size = 132628, upload_time = "2024-01-28T18:52:30.853Z" }, + { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351, upload_time = "2024-01-28T18:52:31.981Z" }, +] + +[[package]] +name = "numpy" +version = "1.26.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/6e/09db70a523a96d25e115e71cc56a6f9031e7b8cd166c1ac8438307c14058/numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010", size = 15786129, upload_time = "2024-02-06T00:26:44.495Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/57/baae43d14fe163fa0e4c47f307b6b2511ab8d7d30177c491960504252053/numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71", size = 20630554, upload_time = "2024-02-05T23:51:50.149Z" }, + { url = "https://files.pythonhosted.org/packages/1a/2e/151484f49fd03944c4a3ad9c418ed193cfd02724e138ac8a9505d056c582/numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef", size = 13997127, upload_time = "2024-02-05T23:52:15.314Z" }, + { url = "https://files.pythonhosted.org/packages/79/ae/7e5b85136806f9dadf4878bf73cf223fe5c2636818ba3ab1c585d0403164/numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e", size = 14222994, upload_time = "2024-02-05T23:52:47.569Z" }, + { url = "https://files.pythonhosted.org/packages/3a/d0/edc009c27b406c4f9cbc79274d6e46d634d139075492ad055e3d68445925/numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5", size = 18252005, upload_time = "2024-02-05T23:53:15.637Z" }, + { url = "https://files.pythonhosted.org/packages/09/bf/2b1aaf8f525f2923ff6cfcf134ae5e750e279ac65ebf386c75a0cf6da06a/numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a", size = 13885297, upload_time = "2024-02-05T23:53:42.16Z" }, + { url = "https://files.pythonhosted.org/packages/df/a0/4e0f14d847cfc2a633a1c8621d00724f3206cfeddeb66d35698c4e2cf3d2/numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a", size = 18093567, upload_time = "2024-02-05T23:54:11.696Z" }, + { url = "https://files.pythonhosted.org/packages/d2/b7/a734c733286e10a7f1a8ad1ae8c90f2d33bf604a96548e0a4a3a6739b468/numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20", size = 5968812, upload_time = "2024-02-05T23:54:26.453Z" }, + { url = "https://files.pythonhosted.org/packages/3f/6b/5610004206cf7f8e7ad91c5a85a8c71b2f2f8051a0c0c4d5916b76d6cbb2/numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2", size = 15811913, upload_time = "2024-02-05T23:54:53.933Z" }, + { url = "https://files.pythonhosted.org/packages/95/12/8f2020a8e8b8383ac0177dc9570aad031a3beb12e38847f7129bacd96228/numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218", size = 20335901, upload_time = "2024-02-05T23:55:32.801Z" }, + { url = "https://files.pythonhosted.org/packages/75/5b/ca6c8bd14007e5ca171c7c03102d17b4f4e0ceb53957e8c44343a9546dcc/numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b", size = 13685868, upload_time = "2024-02-05T23:55:56.28Z" }, + { url = "https://files.pythonhosted.org/packages/79/f8/97f10e6755e2a7d027ca783f63044d5b1bc1ae7acb12afe6a9b4286eac17/numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b", size = 13925109, upload_time = "2024-02-05T23:56:20.368Z" }, + { url = "https://files.pythonhosted.org/packages/0f/50/de23fde84e45f5c4fda2488c759b69990fd4512387a8632860f3ac9cd225/numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed", size = 17950613, upload_time = "2024-02-05T23:56:56.054Z" }, + { url = "https://files.pythonhosted.org/packages/4c/0c/9c603826b6465e82591e05ca230dfc13376da512b25ccd0894709b054ed0/numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a", size = 13572172, upload_time = "2024-02-05T23:57:21.56Z" }, + { url = "https://files.pythonhosted.org/packages/76/8c/2ba3902e1a0fc1c74962ea9bb33a534bb05984ad7ff9515bf8d07527cadd/numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0", size = 17786643, upload_time = "2024-02-05T23:57:56.585Z" }, + { url = "https://files.pythonhosted.org/packages/28/4a/46d9e65106879492374999e76eb85f87b15328e06bd1550668f79f7b18c6/numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110", size = 5677803, upload_time = "2024-02-05T23:58:08.963Z" }, + { url = "https://files.pythonhosted.org/packages/16/2e/86f24451c2d530c88daf997cb8d6ac622c1d40d19f5a031ed68a4b73a374/numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818", size = 15517754, upload_time = "2024-02-05T23:58:36.364Z" }, +] + +[[package]] +name = "oauthlib" +version = "3.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6d/fa/fbf4001037904031639e6bfbfc02badfc7e12f137a8afa254df6c4c8a670/oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918", size = 177352, upload_time = "2022-10-17T20:04:27.471Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/80/cab10959dc1faead58dc8384a781dfbf93cb4d33d50988f7a69f1b7c9bbe/oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca", size = 151688, upload_time = "2022-10-17T20:04:24.037Z" }, +] + +[[package]] +name = "openai" +version = "1.60.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/08/ae/8d9706b8ff2363287b4a8807de2dd29cdbdad5424e9d05d345df724320f5/openai-1.60.2.tar.gz", hash = "sha256:a8f843e10f2855713007f491d96afb2694b11b5e02cb97c7d01a0be60bc5bb51", size = 348185, upload_time = "2025-01-27T19:37:03.72Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/5a/d5474ca67a547dde9b87b5bc8a8f90eadf29f523d410f2ba23d63c9b82ec/openai-1.60.2-py3-none-any.whl", hash = "sha256:993bd11b96900b9098179c728026f016b4982ded7ee30dfcf4555eab1171fff9", size = 456107, upload_time = "2025-01-27T19:37:01.065Z" }, +] + +[[package]] +name = "opt-einsum" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/b9/2ac072041e899a52f20cf9510850ff58295003aa75525e58343591b0cbfb/opt_einsum-3.4.0.tar.gz", hash = "sha256:96ca72f1b886d148241348783498194c577fa30a8faac108586b14f1ba4473ac", size = 63004, upload_time = "2024-09-26T14:33:24.483Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/23/cd/066e86230ae37ed0be70aae89aabf03ca8d9f39c8aea0dec8029455b5540/opt_einsum-3.4.0-py3-none-any.whl", hash = "sha256:69bb92469f86a1565195ece4ac0323943e83477171b91d24c35afe028a90d7cd", size = 71932, upload_time = "2024-09-26T14:33:23.039Z" }, +] + +[[package]] +name = "orderly-set" +version = "5.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/03/4a/38030da31c13dcd5a531490006e63a0954083fb115113be9393179738e25/orderly_set-5.4.1.tar.gz", hash = "sha256:a1fb5a4fdc5e234e9e8d8e5c1bbdbc4540f4dfe50d12bf17c8bc5dbf1c9c878d", size = 20943, upload_time = "2025-05-06T22:34:13.512Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/bc/e0dfb4db9210d92b44e49d6e61ba5caefbd411958357fa9d7ff489eeb835/orderly_set-5.4.1-py3-none-any.whl", hash = "sha256:b5e21d21680bd9ef456885db800c5cb4f76a03879880c0175e1b077fb166fd83", size = 12339, upload_time = "2025-05-06T22:34:12.564Z" }, +] + +[[package]] +name = "packaging" +version = "25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload_time = "2025-04-19T11:48:59.673Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload_time = "2025-04-19T11:48:57.875Z" }, +] + +[[package]] +name = "pandas" +version = "2.2.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "tzdata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213, upload_time = "2024-09-20T13:10:04.827Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/44/d9502bf0ed197ba9bf1103c9867d5904ddcaf869e52329787fc54ed70cc8/pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039", size = 12602222, upload_time = "2024-09-20T13:08:56.254Z" }, + { url = "https://files.pythonhosted.org/packages/52/11/9eac327a38834f162b8250aab32a6781339c69afe7574368fffe46387edf/pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd", size = 11321274, upload_time = "2024-09-20T13:08:58.645Z" }, + { url = "https://files.pythonhosted.org/packages/45/fb/c4beeb084718598ba19aa9f5abbc8aed8b42f90930da861fcb1acdb54c3a/pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698", size = 15579836, upload_time = "2024-09-20T19:01:57.571Z" }, + { url = "https://files.pythonhosted.org/packages/cd/5f/4dba1d39bb9c38d574a9a22548c540177f78ea47b32f99c0ff2ec499fac5/pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc", size = 13058505, upload_time = "2024-09-20T13:09:01.501Z" }, + { url = "https://files.pythonhosted.org/packages/b9/57/708135b90391995361636634df1f1130d03ba456e95bcf576fada459115a/pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3", size = 16744420, upload_time = "2024-09-20T19:02:00.678Z" }, + { url = "https://files.pythonhosted.org/packages/86/4a/03ed6b7ee323cf30404265c284cee9c65c56a212e0a08d9ee06984ba2240/pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32", size = 14440457, upload_time = "2024-09-20T13:09:04.105Z" }, + { url = "https://files.pythonhosted.org/packages/ed/8c/87ddf1fcb55d11f9f847e3c69bb1c6f8e46e2f40ab1a2d2abadb2401b007/pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5", size = 11617166, upload_time = "2024-09-20T13:09:06.917Z" }, + { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893, upload_time = "2024-09-20T13:09:09.655Z" }, + { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475, upload_time = "2024-09-20T13:09:14.718Z" }, + { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645, upload_time = "2024-09-20T19:02:03.88Z" }, + { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445, upload_time = "2024-09-20T13:09:17.621Z" }, + { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235, upload_time = "2024-09-20T19:02:07.094Z" }, + { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756, upload_time = "2024-09-20T13:09:20.474Z" }, + { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248, upload_time = "2024-09-20T13:09:23.137Z" }, + { url = "https://files.pythonhosted.org/packages/64/22/3b8f4e0ed70644e85cfdcd57454686b9057c6c38d2f74fe4b8bc2527214a/pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015", size = 12477643, upload_time = "2024-09-20T13:09:25.522Z" }, + { url = "https://files.pythonhosted.org/packages/e4/93/b3f5d1838500e22c8d793625da672f3eec046b1a99257666c94446969282/pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28", size = 11281573, upload_time = "2024-09-20T13:09:28.012Z" }, + { url = "https://files.pythonhosted.org/packages/f5/94/6c79b07f0e5aab1dcfa35a75f4817f5c4f677931d4234afcd75f0e6a66ca/pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0", size = 15196085, upload_time = "2024-09-20T19:02:10.451Z" }, + { url = "https://files.pythonhosted.org/packages/e8/31/aa8da88ca0eadbabd0a639788a6da13bb2ff6edbbb9f29aa786450a30a91/pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24", size = 12711809, upload_time = "2024-09-20T13:09:30.814Z" }, + { url = "https://files.pythonhosted.org/packages/ee/7c/c6dbdb0cb2a4344cacfb8de1c5808ca885b2e4dcfde8008266608f9372af/pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659", size = 16356316, upload_time = "2024-09-20T19:02:13.825Z" }, + { url = "https://files.pythonhosted.org/packages/57/b7/8b757e7d92023b832869fa8881a992696a0bfe2e26f72c9ae9f255988d42/pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb", size = 14022055, upload_time = "2024-09-20T13:09:33.462Z" }, + { url = "https://files.pythonhosted.org/packages/3b/bc/4b18e2b8c002572c5a441a64826252ce5da2aa738855747247a971988043/pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d", size = 11481175, upload_time = "2024-09-20T13:09:35.871Z" }, + { url = "https://files.pythonhosted.org/packages/76/a3/a5d88146815e972d40d19247b2c162e88213ef51c7c25993942c39dbf41d/pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468", size = 12615650, upload_time = "2024-09-20T13:09:38.685Z" }, + { url = "https://files.pythonhosted.org/packages/9c/8c/f0fd18f6140ddafc0c24122c8a964e48294acc579d47def376fef12bcb4a/pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18", size = 11290177, upload_time = "2024-09-20T13:09:41.141Z" }, + { url = "https://files.pythonhosted.org/packages/ed/f9/e995754eab9c0f14c6777401f7eece0943840b7a9fc932221c19d1abee9f/pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2", size = 14651526, upload_time = "2024-09-20T19:02:16.905Z" }, + { url = "https://files.pythonhosted.org/packages/25/b0/98d6ae2e1abac4f35230aa756005e8654649d305df9a28b16b9ae4353bff/pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4", size = 11871013, upload_time = "2024-09-20T13:09:44.39Z" }, + { url = "https://files.pythonhosted.org/packages/cc/57/0f72a10f9db6a4628744c8e8f0df4e6e21de01212c7c981d31e50ffc8328/pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d", size = 15711620, upload_time = "2024-09-20T19:02:20.639Z" }, + { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436, upload_time = "2024-09-20T13:09:48.112Z" }, +] + +[[package]] +name = "pdap-access-manager" +version = "0.3.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "boltons" }, + { name = "pydantic" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/54/c0e76d1d54ff2f542f18b289db96c417d3bcd7e8e948de07921b492717e7/pdap_access_manager-0.3.5.tar.gz", hash = "sha256:5f8bbe0f25ef68810a0936ca22d40d3869d77391bae3c8ba1c885f8fe74154bd", size = 4120, upload_time = "2025-05-13T13:40:24.189Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/01/d4ba10d0d7be759e59011f4235c533b1bc31d5e99db86424cfd82284ce53/pdap_access_manager-0.3.5-py3-none-any.whl", hash = "sha256:b53a006e535d7733ca884560f41aa305068fec648c89524e397967a21e69a0d0", size = 4980, upload_time = "2025-05-13T13:40:23.223Z" }, +] + +[[package]] +name = "pendulum" +version = "3.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, + { name = "tzdata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/23/7c/009c12b86c7cc6c403aec80f8a4308598dfc5995e5c523a5491faaa3952e/pendulum-3.1.0.tar.gz", hash = "sha256:66f96303560f41d097bee7d2dc98ffca716fbb3a832c4b3062034c2d45865015", size = 85930, upload_time = "2025-04-19T14:30:01.675Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/6e/d28d3c22e6708b819a94c05bd05a3dfaed5c685379e8b6dc4b34b473b942/pendulum-3.1.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:61a03d14f8c64d13b2f7d5859e4b4053c4a7d3b02339f6c71f3e4606bfd67423", size = 338596, upload_time = "2025-04-19T14:01:11.306Z" }, + { url = "https://files.pythonhosted.org/packages/e1/e6/43324d58021d463c2eeb6146b169d2c935f2f840f9e45ac2d500453d954c/pendulum-3.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e674ed2d158afa5c361e60f1f67872dc55b492a10cacdaa7fcd7b7da5f158f24", size = 325854, upload_time = "2025-04-19T14:01:13.156Z" }, + { url = "https://files.pythonhosted.org/packages/b0/a7/d2ae79b960bfdea94dab67e2f118697b08bc9e98eb6bd8d32c4d99240da3/pendulum-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c75377eb16e58bbe7e03ea89eeea49be6fc5de0934a4aef0e263f8b4fa71bc2", size = 344334, upload_time = "2025-04-19T14:01:15.151Z" }, + { url = "https://files.pythonhosted.org/packages/96/94/941f071212e23c29aae7def891fb636930c648386e059ce09ea0dcd43933/pendulum-3.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:656b8b0ce070f0f2e5e2668247d3c783c55336534aa1f13bd0969535878955e1", size = 382259, upload_time = "2025-04-19T14:01:16.924Z" }, + { url = "https://files.pythonhosted.org/packages/51/ad/a78a701656aec00d16fee636704445c23ca11617a0bfe7c3848d1caa5157/pendulum-3.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48962903e6c1afe1f13548cb6252666056086c107d59e3d64795c58c9298bc2e", size = 436361, upload_time = "2025-04-19T14:01:18.796Z" }, + { url = "https://files.pythonhosted.org/packages/da/93/83f59ccbf4435c29dca8c63a6560fcbe4783079a468a5f91d9f886fd21f0/pendulum-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d364ec3f8e65010fefd4b0aaf7be5eb97e5df761b107a06f5e743b7c3f52c311", size = 353653, upload_time = "2025-04-19T14:01:20.159Z" }, + { url = "https://files.pythonhosted.org/packages/6f/0f/42d6644ec6339b41066f594e52d286162aecd2e9735aaf994d7e00c9e09d/pendulum-3.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dd52caffc2afb86612ec43bbeb226f204ea12ebff9f3d12f900a7d3097210fcc", size = 524567, upload_time = "2025-04-19T14:01:21.457Z" }, + { url = "https://files.pythonhosted.org/packages/de/45/d84d909202755ab9d3379e5481fdf70f53344ebefbd68d6f5803ddde98a6/pendulum-3.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d439fccaa35c91f686bd59d30604dab01e8b5c1d0dd66e81648c432fd3f8a539", size = 525571, upload_time = "2025-04-19T14:01:23.329Z" }, + { url = "https://files.pythonhosted.org/packages/0d/e0/4de160773ce3c2f7843c310db19dd919a0cd02cc1c0384866f63b18a6251/pendulum-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:43288773a86d9c5c0ddb645f88f615ff6bd12fd1410b34323662beccb18f3b49", size = 260259, upload_time = "2025-04-19T14:01:24.689Z" }, + { url = "https://files.pythonhosted.org/packages/c1/7f/ffa278f78112c6c6e5130a702042f52aab5c649ae2edf814df07810bbba5/pendulum-3.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:569ea5072ae0f11d625e03b36d865f8037b76e838a3b621f6967314193896a11", size = 253899, upload_time = "2025-04-19T14:01:26.442Z" }, + { url = "https://files.pythonhosted.org/packages/7a/d7/b1bfe15a742f2c2713acb1fdc7dc3594ff46ef9418ac6a96fcb12a6ba60b/pendulum-3.1.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:4dfd53e7583ccae138be86d6c0a0b324c7547df2afcec1876943c4d481cf9608", size = 336209, upload_time = "2025-04-19T14:01:27.815Z" }, + { url = "https://files.pythonhosted.org/packages/eb/87/0392da0c603c828b926d9f7097fbdddaafc01388cb8a00888635d04758c3/pendulum-3.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6a6e06a28f3a7d696546347805536f6f38be458cb79de4f80754430696bea9e6", size = 323130, upload_time = "2025-04-19T14:01:29.336Z" }, + { url = "https://files.pythonhosted.org/packages/c0/61/95f1eec25796be6dddf71440ee16ec1fd0c573fc61a73bd1ef6daacd529a/pendulum-3.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e68d6a51880708084afd8958af42dc8c5e819a70a6c6ae903b1c4bfc61e0f25", size = 341509, upload_time = "2025-04-19T14:01:31.1Z" }, + { url = "https://files.pythonhosted.org/packages/b5/7b/eb0f5e6aa87d5e1b467a1611009dbdc92f0f72425ebf07669bfadd8885a6/pendulum-3.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e3f1e5da39a7ea7119efda1dd96b529748c1566f8a983412d0908455d606942", size = 378674, upload_time = "2025-04-19T14:01:32.974Z" }, + { url = "https://files.pythonhosted.org/packages/29/68/5a4c1b5de3e54e16cab21d2ec88f9cd3f18599e96cc90a441c0b0ab6b03f/pendulum-3.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9af1e5eeddb4ebbe1b1c9afb9fd8077d73416ade42dd61264b3f3b87742e0bb", size = 436133, upload_time = "2025-04-19T14:01:34.349Z" }, + { url = "https://files.pythonhosted.org/packages/87/5d/f7a1d693e5c0f789185117d5c1d5bee104f5b0d9fbf061d715fb61c840a8/pendulum-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20f74aa8029a42e327bfc150472e0e4d2358fa5d795f70460160ba81b94b6945", size = 351232, upload_time = "2025-04-19T14:01:35.669Z" }, + { url = "https://files.pythonhosted.org/packages/30/77/c97617eb31f1d0554edb073201a294019b9e0a9bd2f73c68e6d8d048cd6b/pendulum-3.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:cf6229e5ee70c2660148523f46c472e677654d0097bec010d6730f08312a4931", size = 521562, upload_time = "2025-04-19T14:01:37.05Z" }, + { url = "https://files.pythonhosted.org/packages/76/22/0d0ef3393303877e757b848ecef8a9a8c7627e17e7590af82d14633b2cd1/pendulum-3.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:350cabb23bf1aec7c7694b915d3030bff53a2ad4aeabc8c8c0d807c8194113d6", size = 523221, upload_time = "2025-04-19T14:01:38.444Z" }, + { url = "https://files.pythonhosted.org/packages/99/f3/aefb579aa3cebd6f2866b205fc7a60d33e9a696e9e629024752107dc3cf5/pendulum-3.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:42959341e843077c41d47420f28c3631de054abd64da83f9b956519b5c7a06a7", size = 260502, upload_time = "2025-04-19T14:01:39.814Z" }, + { url = "https://files.pythonhosted.org/packages/02/74/4332b5d6e34c63d4df8e8eab2249e74c05513b1477757463f7fdca99e9be/pendulum-3.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:006758e2125da2e624493324dfd5d7d1b02b0c44bc39358e18bf0f66d0767f5f", size = 253089, upload_time = "2025-04-19T14:01:41.171Z" }, + { url = "https://files.pythonhosted.org/packages/8e/1f/af928ba4aa403dac9569f787adcf024005e7654433d71f7a84e608716837/pendulum-3.1.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:28658b0baf4b30eb31d096a375983cfed033e60c0a7bbe94fa23f06cd779b50b", size = 336209, upload_time = "2025-04-19T14:01:42.775Z" }, + { url = "https://files.pythonhosted.org/packages/b6/16/b010643007ba964c397da7fa622924423883c1bbff1a53f9d1022cd7f024/pendulum-3.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b114dcb99ce511cb8f5495c7b6f0056b2c3dba444ef1ea6e48030d7371bd531a", size = 323132, upload_time = "2025-04-19T14:01:44.577Z" }, + { url = "https://files.pythonhosted.org/packages/64/19/c3c47aeecb5d9bceb0e89faafd800d39809b696c5b7bba8ec8370ad5052c/pendulum-3.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2404a6a54c80252ea393291f0b7f35525a61abae3d795407f34e118a8f133a18", size = 341509, upload_time = "2025-04-19T14:01:46.084Z" }, + { url = "https://files.pythonhosted.org/packages/38/cf/c06921ff6b860ff7e62e70b8e5d4dc70e36f5abb66d168bd64d51760bc4e/pendulum-3.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d06999790d9ee9962a1627e469f98568bf7ad1085553fa3c30ed08b3944a14d7", size = 378674, upload_time = "2025-04-19T14:01:47.727Z" }, + { url = "https://files.pythonhosted.org/packages/62/0b/a43953b9eba11e82612b033ac5133f716f1b76b6108a65da6f408b3cc016/pendulum-3.1.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94751c52f6b7c306734d1044c2c6067a474237e1e5afa2f665d1fbcbbbcf24b3", size = 436133, upload_time = "2025-04-19T14:01:49.126Z" }, + { url = "https://files.pythonhosted.org/packages/eb/a0/ec3d70b3b96e23ae1d039f132af35e17704c22a8250d1887aaefea4d78a6/pendulum-3.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5553ac27be05e997ec26d7f004cf72788f4ce11fe60bb80dda604a64055b29d0", size = 351232, upload_time = "2025-04-19T14:01:50.575Z" }, + { url = "https://files.pythonhosted.org/packages/f4/97/aba23f1716b82f6951ba2b1c9178a2d107d1e66c102762a9bf19988547ea/pendulum-3.1.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:f8dee234ca6142bf0514368d01a72945a44685aaa2fc4c14c98d09da9437b620", size = 521563, upload_time = "2025-04-19T14:01:51.9Z" }, + { url = "https://files.pythonhosted.org/packages/01/33/2c0d5216cc53d16db0c4b3d510f141ee0a540937f8675948541190fbd48b/pendulum-3.1.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:7378084fe54faab4ee481897a00b710876f2e901ded6221671e827a253e643f2", size = 523221, upload_time = "2025-04-19T14:01:53.275Z" }, + { url = "https://files.pythonhosted.org/packages/51/89/8de955c339c31aeae77fd86d3225509b998c81875e9dba28cb88b8cbf4b3/pendulum-3.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:8539db7ae2c8da430ac2515079e288948c8ebf7eb1edd3e8281b5cdf433040d6", size = 260501, upload_time = "2025-04-19T14:01:54.749Z" }, + { url = "https://files.pythonhosted.org/packages/15/c3/226a3837363e94f8722461848feec18bfdd7d5172564d53aa3c3397ff01e/pendulum-3.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:1ce26a608e1f7387cd393fba2a129507c4900958d4f47b90757ec17656856571", size = 253087, upload_time = "2025-04-19T14:01:55.998Z" }, + { url = "https://files.pythonhosted.org/packages/6e/23/e98758924d1b3aac11a626268eabf7f3cf177e7837c28d47bf84c64532d0/pendulum-3.1.0-py3-none-any.whl", hash = "sha256:f9178c2a8e291758ade1e8dd6371b1d26d08371b4c7730a6e9a3ef8b16ebae0f", size = 111799, upload_time = "2025-04-19T14:02:34.739Z" }, +] + +[[package]] +name = "playwright" +version = "1.49.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "greenlet" }, + { name = "pyee" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/ab/be/01025581052e43eb698092c4328d7497ca62bcb5c83f15a611d4a71b4b92/playwright-1.49.1-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:1041ffb45a0d0bc44d698d3a5aa3ac4b67c9bd03540da43a0b70616ad52592b8", size = 39559859, upload_time = "2024-12-10T17:32:14.907Z" }, + { url = "https://files.pythonhosted.org/packages/79/25/ef1010a42cc7d576282015d983c5451d73e369b198b6eb32a177fae281f8/playwright-1.49.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9f38ed3d0c1f4e0a6d1c92e73dd9a61f8855133249d6f0cec28648d38a7137be", size = 38808973, upload_time = "2024-12-10T17:32:22.516Z" }, + { url = "https://files.pythonhosted.org/packages/70/4b/3930cf10f303a10d493a382e4448aaff898b4065698b3b8d92f902e53e08/playwright-1.49.1-py3-none-macosx_11_0_universal2.whl", hash = "sha256:3be48c6d26dc819ca0a26567c1ae36a980a0303dcd4249feb6f59e115aaddfb8", size = 39559863, upload_time = "2024-12-10T17:32:29.12Z" }, + { url = "https://files.pythonhosted.org/packages/9a/c1/ea765e72a746dc7ec2ce155ffea29d454e7171db78f3c09185e888387246/playwright-1.49.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:753ca90ee31b4b03d165cfd36e477309ebf2b4381953f2a982ff612d85b147d2", size = 44163300, upload_time = "2024-12-10T17:32:35.647Z" }, + { url = "https://files.pythonhosted.org/packages/5a/52/95efac704bf36b770a2522d88a6dee298042845d10bfb35f7ca0fcc36d91/playwright-1.49.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd9bc8dab37aa25198a01f555f0a2e2c3813fe200fef018ac34dfe86b34994b9", size = 43744353, upload_time = "2024-12-10T17:32:43.189Z" }, + { url = "https://files.pythonhosted.org/packages/f9/97/a3fccc9aaa6da83890772e9980703b0ea6b1e1ad42042fb50df3aef6c641/playwright-1.49.1-py3-none-win32.whl", hash = "sha256:43b304be67f096058e587dac453ece550eff87b8fbed28de30f4f022cc1745bb", size = 34060663, upload_time = "2024-12-10T17:32:49.904Z" }, + { url = "https://files.pythonhosted.org/packages/71/a9/bd88ac0bd498c91aab3aba2e393d1fa59f72a7243e9265ccbf4861ca4f64/playwright-1.49.1-py3-none-win_amd64.whl", hash = "sha256:47b23cb346283278f5b4d1e1990bcb6d6302f80c0aa0ca93dd0601a1400191df", size = 34060667, upload_time = "2024-12-10T17:32:56.459Z" }, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955, upload_time = "2024-04-20T21:34:42.531Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556, upload_time = "2024-04-20T21:34:40.434Z" }, +] + +[[package]] +name = "propcache" +version = "0.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/07/c8/fdc6686a986feae3541ea23dcaa661bd93972d3940460646c6bb96e21c40/propcache-0.3.1.tar.gz", hash = "sha256:40d980c33765359098837527e18eddefc9a24cea5b45e078a7f3bb5b032c6ecf", size = 43651, upload_time = "2025-03-26T03:06:12.05Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/0f/5a5319ee83bd651f75311fcb0c492c21322a7fc8f788e4eef23f44243427/propcache-0.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7f30241577d2fef2602113b70ef7231bf4c69a97e04693bde08ddab913ba0ce5", size = 80243, upload_time = "2025-03-26T03:04:01.912Z" }, + { url = "https://files.pythonhosted.org/packages/ce/84/3db5537e0879942783e2256616ff15d870a11d7ac26541336fe1b673c818/propcache-0.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:43593c6772aa12abc3af7784bff4a41ffa921608dd38b77cf1dfd7f5c4e71371", size = 46503, upload_time = "2025-03-26T03:04:03.704Z" }, + { url = "https://files.pythonhosted.org/packages/e2/c8/b649ed972433c3f0d827d7f0cf9ea47162f4ef8f4fe98c5f3641a0bc63ff/propcache-0.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a75801768bbe65499495660b777e018cbe90c7980f07f8aa57d6be79ea6f71da", size = 45934, upload_time = "2025-03-26T03:04:05.257Z" }, + { url = "https://files.pythonhosted.org/packages/59/f9/4c0a5cf6974c2c43b1a6810c40d889769cc8f84cea676cbe1e62766a45f8/propcache-0.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6f1324db48f001c2ca26a25fa25af60711e09b9aaf4b28488602776f4f9a744", size = 233633, upload_time = "2025-03-26T03:04:07.044Z" }, + { url = "https://files.pythonhosted.org/packages/e7/64/66f2f4d1b4f0007c6e9078bd95b609b633d3957fe6dd23eac33ebde4b584/propcache-0.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cdb0f3e1eb6dfc9965d19734d8f9c481b294b5274337a8cb5cb01b462dcb7e0", size = 241124, upload_time = "2025-03-26T03:04:08.676Z" }, + { url = "https://files.pythonhosted.org/packages/aa/bf/7b8c9fd097d511638fa9b6af3d986adbdf567598a567b46338c925144c1b/propcache-0.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1eb34d90aac9bfbced9a58b266f8946cb5935869ff01b164573a7634d39fbcb5", size = 240283, upload_time = "2025-03-26T03:04:10.172Z" }, + { url = "https://files.pythonhosted.org/packages/fa/c9/e85aeeeaae83358e2a1ef32d6ff50a483a5d5248bc38510d030a6f4e2816/propcache-0.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f35c7070eeec2cdaac6fd3fe245226ed2a6292d3ee8c938e5bb645b434c5f256", size = 232498, upload_time = "2025-03-26T03:04:11.616Z" }, + { url = "https://files.pythonhosted.org/packages/8e/66/acb88e1f30ef5536d785c283af2e62931cb934a56a3ecf39105887aa8905/propcache-0.3.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b23c11c2c9e6d4e7300c92e022046ad09b91fd00e36e83c44483df4afa990073", size = 221486, upload_time = "2025-03-26T03:04:13.102Z" }, + { url = "https://files.pythonhosted.org/packages/f5/f9/233ddb05ffdcaee4448508ee1d70aa7deff21bb41469ccdfcc339f871427/propcache-0.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3e19ea4ea0bf46179f8a3652ac1426e6dcbaf577ce4b4f65be581e237340420d", size = 222675, upload_time = "2025-03-26T03:04:14.658Z" }, + { url = "https://files.pythonhosted.org/packages/98/b8/eb977e28138f9e22a5a789daf608d36e05ed93093ef12a12441030da800a/propcache-0.3.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:bd39c92e4c8f6cbf5f08257d6360123af72af9f4da75a690bef50da77362d25f", size = 215727, upload_time = "2025-03-26T03:04:16.207Z" }, + { url = "https://files.pythonhosted.org/packages/89/2d/5f52d9c579f67b8ee1edd9ec073c91b23cc5b7ff7951a1e449e04ed8fdf3/propcache-0.3.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:b0313e8b923b3814d1c4a524c93dfecea5f39fa95601f6a9b1ac96cd66f89ea0", size = 217878, upload_time = "2025-03-26T03:04:18.11Z" }, + { url = "https://files.pythonhosted.org/packages/7a/fd/5283e5ed8a82b00c7a989b99bb6ea173db1ad750bf0bf8dff08d3f4a4e28/propcache-0.3.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e861ad82892408487be144906a368ddbe2dc6297074ade2d892341b35c59844a", size = 230558, upload_time = "2025-03-26T03:04:19.562Z" }, + { url = "https://files.pythonhosted.org/packages/90/38/ab17d75938ef7ac87332c588857422ae126b1c76253f0f5b1242032923ca/propcache-0.3.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:61014615c1274df8da5991a1e5da85a3ccb00c2d4701ac6f3383afd3ca47ab0a", size = 233754, upload_time = "2025-03-26T03:04:21.065Z" }, + { url = "https://files.pythonhosted.org/packages/06/5d/3b921b9c60659ae464137508d3b4c2b3f52f592ceb1964aa2533b32fcf0b/propcache-0.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:71ebe3fe42656a2328ab08933d420df5f3ab121772eef78f2dc63624157f0ed9", size = 226088, upload_time = "2025-03-26T03:04:22.718Z" }, + { url = "https://files.pythonhosted.org/packages/54/6e/30a11f4417d9266b5a464ac5a8c5164ddc9dd153dfa77bf57918165eb4ae/propcache-0.3.1-cp311-cp311-win32.whl", hash = "sha256:58aa11f4ca8b60113d4b8e32d37e7e78bd8af4d1a5b5cb4979ed856a45e62005", size = 40859, upload_time = "2025-03-26T03:04:24.039Z" }, + { url = "https://files.pythonhosted.org/packages/1d/3a/8a68dd867da9ca2ee9dfd361093e9cb08cb0f37e5ddb2276f1b5177d7731/propcache-0.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:9532ea0b26a401264b1365146c440a6d78269ed41f83f23818d4b79497aeabe7", size = 45153, upload_time = "2025-03-26T03:04:25.211Z" }, + { url = "https://files.pythonhosted.org/packages/41/aa/ca78d9be314d1e15ff517b992bebbed3bdfef5b8919e85bf4940e57b6137/propcache-0.3.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f78eb8422acc93d7b69964012ad7048764bb45a54ba7a39bb9e146c72ea29723", size = 80430, upload_time = "2025-03-26T03:04:26.436Z" }, + { url = "https://files.pythonhosted.org/packages/1a/d8/f0c17c44d1cda0ad1979af2e593ea290defdde9eaeb89b08abbe02a5e8e1/propcache-0.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:89498dd49c2f9a026ee057965cdf8192e5ae070ce7d7a7bd4b66a8e257d0c976", size = 46637, upload_time = "2025-03-26T03:04:27.932Z" }, + { url = "https://files.pythonhosted.org/packages/ae/bd/c1e37265910752e6e5e8a4c1605d0129e5b7933c3dc3cf1b9b48ed83b364/propcache-0.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:09400e98545c998d57d10035ff623266927cb784d13dd2b31fd33b8a5316b85b", size = 46123, upload_time = "2025-03-26T03:04:30.659Z" }, + { url = "https://files.pythonhosted.org/packages/d4/b0/911eda0865f90c0c7e9f0415d40a5bf681204da5fd7ca089361a64c16b28/propcache-0.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa8efd8c5adc5a2c9d3b952815ff8f7710cefdcaf5f2c36d26aff51aeca2f12f", size = 243031, upload_time = "2025-03-26T03:04:31.977Z" }, + { url = "https://files.pythonhosted.org/packages/0a/06/0da53397c76a74271621807265b6eb61fb011451b1ddebf43213df763669/propcache-0.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2fe5c910f6007e716a06d269608d307b4f36e7babee5f36533722660e8c4a70", size = 249100, upload_time = "2025-03-26T03:04:33.45Z" }, + { url = "https://files.pythonhosted.org/packages/f1/eb/13090e05bf6b963fc1653cdc922133ced467cb4b8dab53158db5a37aa21e/propcache-0.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a0ab8cf8cdd2194f8ff979a43ab43049b1df0b37aa64ab7eca04ac14429baeb7", size = 250170, upload_time = "2025-03-26T03:04:35.542Z" }, + { url = "https://files.pythonhosted.org/packages/3b/4c/f72c9e1022b3b043ec7dc475a0f405d4c3e10b9b1d378a7330fecf0652da/propcache-0.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:563f9d8c03ad645597b8d010ef4e9eab359faeb11a0a2ac9f7b4bc8c28ebef25", size = 245000, upload_time = "2025-03-26T03:04:37.501Z" }, + { url = "https://files.pythonhosted.org/packages/e8/fd/970ca0e22acc829f1adf5de3724085e778c1ad8a75bec010049502cb3a86/propcache-0.3.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb6e0faf8cb6b4beea5d6ed7b5a578254c6d7df54c36ccd3d8b3eb00d6770277", size = 230262, upload_time = "2025-03-26T03:04:39.532Z" }, + { url = "https://files.pythonhosted.org/packages/c4/42/817289120c6b9194a44f6c3e6b2c3277c5b70bbad39e7df648f177cc3634/propcache-0.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1c5c7ab7f2bb3f573d1cb921993006ba2d39e8621019dffb1c5bc94cdbae81e8", size = 236772, upload_time = "2025-03-26T03:04:41.109Z" }, + { url = "https://files.pythonhosted.org/packages/7c/9c/3b3942b302badd589ad6b672da3ca7b660a6c2f505cafd058133ddc73918/propcache-0.3.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:050b571b2e96ec942898f8eb46ea4bfbb19bd5502424747e83badc2d4a99a44e", size = 231133, upload_time = "2025-03-26T03:04:42.544Z" }, + { url = "https://files.pythonhosted.org/packages/98/a1/75f6355f9ad039108ff000dfc2e19962c8dea0430da9a1428e7975cf24b2/propcache-0.3.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e1c4d24b804b3a87e9350f79e2371a705a188d292fd310e663483af6ee6718ee", size = 230741, upload_time = "2025-03-26T03:04:44.06Z" }, + { url = "https://files.pythonhosted.org/packages/67/0c/3e82563af77d1f8731132166da69fdfd95e71210e31f18edce08a1eb11ea/propcache-0.3.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:e4fe2a6d5ce975c117a6bb1e8ccda772d1e7029c1cca1acd209f91d30fa72815", size = 244047, upload_time = "2025-03-26T03:04:45.983Z" }, + { url = "https://files.pythonhosted.org/packages/f7/50/9fb7cca01532a08c4d5186d7bb2da6c4c587825c0ae134b89b47c7d62628/propcache-0.3.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:feccd282de1f6322f56f6845bf1207a537227812f0a9bf5571df52bb418d79d5", size = 246467, upload_time = "2025-03-26T03:04:47.699Z" }, + { url = "https://files.pythonhosted.org/packages/a9/02/ccbcf3e1c604c16cc525309161d57412c23cf2351523aedbb280eb7c9094/propcache-0.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ec314cde7314d2dd0510c6787326bbffcbdc317ecee6b7401ce218b3099075a7", size = 241022, upload_time = "2025-03-26T03:04:49.195Z" }, + { url = "https://files.pythonhosted.org/packages/db/19/e777227545e09ca1e77a6e21274ae9ec45de0f589f0ce3eca2a41f366220/propcache-0.3.1-cp312-cp312-win32.whl", hash = "sha256:7d2d5a0028d920738372630870e7d9644ce437142197f8c827194fca404bf03b", size = 40647, upload_time = "2025-03-26T03:04:50.595Z" }, + { url = "https://files.pythonhosted.org/packages/24/bb/3b1b01da5dd04c77a204c84e538ff11f624e31431cfde7201d9110b092b1/propcache-0.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:88c423efef9d7a59dae0614eaed718449c09a5ac79a5f224a8b9664d603f04a3", size = 44784, upload_time = "2025-03-26T03:04:51.791Z" }, + { url = "https://files.pythonhosted.org/packages/58/60/f645cc8b570f99be3cf46714170c2de4b4c9d6b827b912811eff1eb8a412/propcache-0.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f1528ec4374617a7a753f90f20e2f551121bb558fcb35926f99e3c42367164b8", size = 77865, upload_time = "2025-03-26T03:04:53.406Z" }, + { url = "https://files.pythonhosted.org/packages/6f/d4/c1adbf3901537582e65cf90fd9c26fde1298fde5a2c593f987112c0d0798/propcache-0.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dc1915ec523b3b494933b5424980831b636fe483d7d543f7afb7b3bf00f0c10f", size = 45452, upload_time = "2025-03-26T03:04:54.624Z" }, + { url = "https://files.pythonhosted.org/packages/d1/b5/fe752b2e63f49f727c6c1c224175d21b7d1727ce1d4873ef1c24c9216830/propcache-0.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a110205022d077da24e60b3df8bcee73971be9575dec5573dd17ae5d81751111", size = 44800, upload_time = "2025-03-26T03:04:55.844Z" }, + { url = "https://files.pythonhosted.org/packages/62/37/fc357e345bc1971e21f76597028b059c3d795c5ca7690d7a8d9a03c9708a/propcache-0.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d249609e547c04d190e820d0d4c8ca03ed4582bcf8e4e160a6969ddfb57b62e5", size = 225804, upload_time = "2025-03-26T03:04:57.158Z" }, + { url = "https://files.pythonhosted.org/packages/0d/f1/16e12c33e3dbe7f8b737809bad05719cff1dccb8df4dafbcff5575002c0e/propcache-0.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ced33d827625d0a589e831126ccb4f5c29dfdf6766cac441d23995a65825dcb", size = 230650, upload_time = "2025-03-26T03:04:58.61Z" }, + { url = "https://files.pythonhosted.org/packages/3e/a2/018b9f2ed876bf5091e60153f727e8f9073d97573f790ff7cdf6bc1d1fb8/propcache-0.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4114c4ada8f3181af20808bedb250da6bae56660e4b8dfd9cd95d4549c0962f7", size = 234235, upload_time = "2025-03-26T03:05:00.599Z" }, + { url = "https://files.pythonhosted.org/packages/45/5f/3faee66fc930dfb5da509e34c6ac7128870631c0e3582987fad161fcb4b1/propcache-0.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:975af16f406ce48f1333ec5e912fe11064605d5c5b3f6746969077cc3adeb120", size = 228249, upload_time = "2025-03-26T03:05:02.11Z" }, + { url = "https://files.pythonhosted.org/packages/62/1e/a0d5ebda5da7ff34d2f5259a3e171a94be83c41eb1e7cd21a2105a84a02e/propcache-0.3.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a34aa3a1abc50740be6ac0ab9d594e274f59960d3ad253cd318af76b996dd654", size = 214964, upload_time = "2025-03-26T03:05:03.599Z" }, + { url = "https://files.pythonhosted.org/packages/db/a0/d72da3f61ceab126e9be1f3bc7844b4e98c6e61c985097474668e7e52152/propcache-0.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9cec3239c85ed15bfaded997773fdad9fb5662b0a7cbc854a43f291eb183179e", size = 222501, upload_time = "2025-03-26T03:05:05.107Z" }, + { url = "https://files.pythonhosted.org/packages/18/6d/a008e07ad7b905011253adbbd97e5b5375c33f0b961355ca0a30377504ac/propcache-0.3.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:05543250deac8e61084234d5fc54f8ebd254e8f2b39a16b1dce48904f45b744b", size = 217917, upload_time = "2025-03-26T03:05:06.59Z" }, + { url = "https://files.pythonhosted.org/packages/98/37/02c9343ffe59e590e0e56dc5c97d0da2b8b19fa747ebacf158310f97a79a/propcache-0.3.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5cb5918253912e088edbf023788de539219718d3b10aef334476b62d2b53de53", size = 217089, upload_time = "2025-03-26T03:05:08.1Z" }, + { url = "https://files.pythonhosted.org/packages/53/1b/d3406629a2c8a5666d4674c50f757a77be119b113eedd47b0375afdf1b42/propcache-0.3.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f3bbecd2f34d0e6d3c543fdb3b15d6b60dd69970c2b4c822379e5ec8f6f621d5", size = 228102, upload_time = "2025-03-26T03:05:09.982Z" }, + { url = "https://files.pythonhosted.org/packages/cd/a7/3664756cf50ce739e5f3abd48febc0be1a713b1f389a502ca819791a6b69/propcache-0.3.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:aca63103895c7d960a5b9b044a83f544b233c95e0dcff114389d64d762017af7", size = 230122, upload_time = "2025-03-26T03:05:11.408Z" }, + { url = "https://files.pythonhosted.org/packages/35/36/0bbabaacdcc26dac4f8139625e930f4311864251276033a52fd52ff2a274/propcache-0.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5a0a9898fdb99bf11786265468571e628ba60af80dc3f6eb89a3545540c6b0ef", size = 226818, upload_time = "2025-03-26T03:05:12.909Z" }, + { url = "https://files.pythonhosted.org/packages/cc/27/4e0ef21084b53bd35d4dae1634b6d0bad35e9c58ed4f032511acca9d4d26/propcache-0.3.1-cp313-cp313-win32.whl", hash = "sha256:3a02a28095b5e63128bcae98eb59025924f121f048a62393db682f049bf4ac24", size = 40112, upload_time = "2025-03-26T03:05:14.289Z" }, + { url = "https://files.pythonhosted.org/packages/a6/2c/a54614d61895ba6dd7ac8f107e2b2a0347259ab29cbf2ecc7b94fa38c4dc/propcache-0.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:813fbb8b6aea2fc9659815e585e548fe706d6f663fa73dff59a1677d4595a037", size = 44034, upload_time = "2025-03-26T03:05:15.616Z" }, + { url = "https://files.pythonhosted.org/packages/5a/a8/0a4fd2f664fc6acc66438370905124ce62e84e2e860f2557015ee4a61c7e/propcache-0.3.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:a444192f20f5ce8a5e52761a031b90f5ea6288b1eef42ad4c7e64fef33540b8f", size = 82613, upload_time = "2025-03-26T03:05:16.913Z" }, + { url = "https://files.pythonhosted.org/packages/4d/e5/5ef30eb2cd81576256d7b6caaa0ce33cd1d2c2c92c8903cccb1af1a4ff2f/propcache-0.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0fbe94666e62ebe36cd652f5fc012abfbc2342de99b523f8267a678e4dfdee3c", size = 47763, upload_time = "2025-03-26T03:05:18.607Z" }, + { url = "https://files.pythonhosted.org/packages/87/9a/87091ceb048efeba4d28e903c0b15bcc84b7c0bf27dc0261e62335d9b7b8/propcache-0.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f011f104db880f4e2166bcdcf7f58250f7a465bc6b068dc84c824a3d4a5c94dc", size = 47175, upload_time = "2025-03-26T03:05:19.85Z" }, + { url = "https://files.pythonhosted.org/packages/3e/2f/854e653c96ad1161f96194c6678a41bbb38c7947d17768e8811a77635a08/propcache-0.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e584b6d388aeb0001d6d5c2bd86b26304adde6d9bb9bfa9c4889805021b96de", size = 292265, upload_time = "2025-03-26T03:05:21.654Z" }, + { url = "https://files.pythonhosted.org/packages/40/8d/090955e13ed06bc3496ba4a9fb26c62e209ac41973cb0d6222de20c6868f/propcache-0.3.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8a17583515a04358b034e241f952f1715243482fc2c2945fd99a1b03a0bd77d6", size = 294412, upload_time = "2025-03-26T03:05:23.147Z" }, + { url = "https://files.pythonhosted.org/packages/39/e6/d51601342e53cc7582449e6a3c14a0479fab2f0750c1f4d22302e34219c6/propcache-0.3.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5aed8d8308215089c0734a2af4f2e95eeb360660184ad3912686c181e500b2e7", size = 294290, upload_time = "2025-03-26T03:05:24.577Z" }, + { url = "https://files.pythonhosted.org/packages/3b/4d/be5f1a90abc1881884aa5878989a1acdafd379a91d9c7e5e12cef37ec0d7/propcache-0.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d8e309ff9a0503ef70dc9a0ebd3e69cf7b3894c9ae2ae81fc10943c37762458", size = 282926, upload_time = "2025-03-26T03:05:26.459Z" }, + { url = "https://files.pythonhosted.org/packages/57/2b/8f61b998c7ea93a2b7eca79e53f3e903db1787fca9373af9e2cf8dc22f9d/propcache-0.3.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b655032b202028a582d27aeedc2e813299f82cb232f969f87a4fde491a233f11", size = 267808, upload_time = "2025-03-26T03:05:28.188Z" }, + { url = "https://files.pythonhosted.org/packages/11/1c/311326c3dfce59c58a6098388ba984b0e5fb0381ef2279ec458ef99bd547/propcache-0.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9f64d91b751df77931336b5ff7bafbe8845c5770b06630e27acd5dbb71e1931c", size = 290916, upload_time = "2025-03-26T03:05:29.757Z" }, + { url = "https://files.pythonhosted.org/packages/4b/74/91939924b0385e54dc48eb2e4edd1e4903ffd053cf1916ebc5347ac227f7/propcache-0.3.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:19a06db789a4bd896ee91ebc50d059e23b3639c25d58eb35be3ca1cbe967c3bf", size = 262661, upload_time = "2025-03-26T03:05:31.472Z" }, + { url = "https://files.pythonhosted.org/packages/c2/d7/e6079af45136ad325c5337f5dd9ef97ab5dc349e0ff362fe5c5db95e2454/propcache-0.3.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:bef100c88d8692864651b5f98e871fb090bd65c8a41a1cb0ff2322db39c96c27", size = 264384, upload_time = "2025-03-26T03:05:32.984Z" }, + { url = "https://files.pythonhosted.org/packages/b7/d5/ba91702207ac61ae6f1c2da81c5d0d6bf6ce89e08a2b4d44e411c0bbe867/propcache-0.3.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:87380fb1f3089d2a0b8b00f006ed12bd41bd858fabfa7330c954c70f50ed8757", size = 291420, upload_time = "2025-03-26T03:05:34.496Z" }, + { url = "https://files.pythonhosted.org/packages/58/70/2117780ed7edcd7ba6b8134cb7802aada90b894a9810ec56b7bb6018bee7/propcache-0.3.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e474fc718e73ba5ec5180358aa07f6aded0ff5f2abe700e3115c37d75c947e18", size = 290880, upload_time = "2025-03-26T03:05:36.256Z" }, + { url = "https://files.pythonhosted.org/packages/4a/1f/ecd9ce27710021ae623631c0146719280a929d895a095f6d85efb6a0be2e/propcache-0.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:17d1c688a443355234f3c031349da69444be052613483f3e4158eef751abcd8a", size = 287407, upload_time = "2025-03-26T03:05:37.799Z" }, + { url = "https://files.pythonhosted.org/packages/3e/66/2e90547d6b60180fb29e23dc87bd8c116517d4255240ec6d3f7dc23d1926/propcache-0.3.1-cp313-cp313t-win32.whl", hash = "sha256:359e81a949a7619802eb601d66d37072b79b79c2505e6d3fd8b945538411400d", size = 42573, upload_time = "2025-03-26T03:05:39.193Z" }, + { url = "https://files.pythonhosted.org/packages/cb/8f/50ad8599399d1861b4d2b6b45271f0ef6af1b09b0a2386a46dbaf19c9535/propcache-0.3.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e7fb9a84c9abbf2b2683fa3e7b0d7da4d8ecf139a1c635732a8bda29c5214b0e", size = 46757, upload_time = "2025-03-26T03:05:40.811Z" }, + { url = "https://files.pythonhosted.org/packages/b8/d3/c3cb8f1d6ae3b37f83e1de806713a9b3642c5895f0215a62e1a4bd6e5e34/propcache-0.3.1-py3-none-any.whl", hash = "sha256:9a8ecf38de50a7f518c21568c80f985e776397b902f1ce0b01f799aba1608b40", size = 12376, upload_time = "2025-03-26T03:06:10.5Z" }, +] + +[[package]] +name = "proto-plus" +version = "1.26.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f4/ac/87285f15f7cce6d4a008f33f1757fb5a13611ea8914eb58c3d0d26243468/proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012", size = 56142, upload_time = "2025-03-10T15:54:38.843Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/6d/280c4c2ce28b1593a19ad5239c8b826871fc6ec275c21afc8e1820108039/proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66", size = 50163, upload_time = "2025-03-10T15:54:37.335Z" }, +] + +[[package]] +name = "protobuf" +version = "4.25.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/74/63/84fdeac1f03864c2b8b9f0b7fe711c4af5f95759ee281d2026530086b2f5/protobuf-4.25.7.tar.gz", hash = "sha256:28f65ae8c14523cc2c76c1e91680958700d3eac69f45c96512c12c63d9a38807", size = 380612, upload_time = "2025-04-24T02:56:58.685Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/ed/9a58076cfb8edc237c92617f1d3744660e9b4457d54f3c2fdf1a4bbae5c7/protobuf-4.25.7-cp310-abi3-win32.whl", hash = "sha256:dc582cf1a73a6b40aa8e7704389b8d8352da616bc8ed5c6cc614bdd0b5ce3f7a", size = 392457, upload_time = "2025-04-24T02:56:40.798Z" }, + { url = "https://files.pythonhosted.org/packages/28/b3/e00870528029fe252cf3bd6fa535821c276db3753b44a4691aee0d52ff9e/protobuf-4.25.7-cp310-abi3-win_amd64.whl", hash = "sha256:cd873dbddb28460d1706ff4da2e7fac175f62f2a0bebc7b33141f7523c5a2399", size = 413446, upload_time = "2025-04-24T02:56:44.199Z" }, + { url = "https://files.pythonhosted.org/packages/60/1d/f450a193f875a20099d4492d2c1cb23091d65d512956fb1e167ee61b4bf0/protobuf-4.25.7-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:4c899f09b0502eb39174c717ccf005b844ea93e31137c167ddcacf3e09e49610", size = 394248, upload_time = "2025-04-24T02:56:45.75Z" }, + { url = "https://files.pythonhosted.org/packages/c8/b8/ea88e9857484a0618c74121618b9e620fc50042de43cdabbebe1b93a83e0/protobuf-4.25.7-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:6d2f5dede3d112e573f0e5f9778c0c19d9f9e209727abecae1d39db789f522c6", size = 293717, upload_time = "2025-04-24T02:56:47.427Z" }, + { url = "https://files.pythonhosted.org/packages/a7/81/d0b68e9a9a76804113b6dedc6fffed868b97048bbe6f1bedc675bdb8523c/protobuf-4.25.7-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:d41fb7ae72a25fcb79b2d71e4247f0547a02e8185ed51587c22827a87e5736ed", size = 294636, upload_time = "2025-04-24T02:56:48.976Z" }, + { url = "https://files.pythonhosted.org/packages/17/d7/1e7c80cb2ea2880cfe38580dcfbb22b78b746640c9c13fc3337a6967dc4c/protobuf-4.25.7-py3-none-any.whl", hash = "sha256:e9d969f5154eaeab41404def5dcf04e62162178f4b9de98b2d3c1c70f5f84810", size = 156468, upload_time = "2025-04-24T02:56:56.957Z" }, +] + +[[package]] +name = "psycopg" +version = "3.1.20" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5c/6d/0939210f3ba089b360cf0d3741494719152567bc81303cca2c0f1e67c78a/psycopg-3.1.20.tar.gz", hash = "sha256:32f5862ab79f238496236f97fe374a7ab55b4b4bb839a74802026544735f9a07", size = 147567, upload_time = "2024-06-30T17:03:55.421Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2d/e9/126bbfd5dded758bb109526c5f5f2c2538fe293b15b6fa208db7078c72c4/psycopg-3.1.20-py3-none-any.whl", hash = "sha256:898a29f49ac9c903d554f5a6cdc44a8fc564325557c18f82e51f39c1f4fc2aeb", size = 179473, upload_time = "2024-06-30T16:57:04.093Z" }, +] + +[package.optional-dependencies] +binary = [ + { name = "psycopg-binary", marker = "implementation_name != 'pypy'" }, +] + +[[package]] +name = "psycopg-binary" +version = "3.1.20" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/1c/45e5f240765e80076b08c3ed02c5dfeb5e97d549769b81f8382485d70a15/psycopg_binary-3.1.20-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:802989350fcbc783732bfef660afb34439a62727642a05e8bb9acf7d68993627", size = 3350503, upload_time = "2024-06-30T16:58:27.18Z" }, + { url = "https://files.pythonhosted.org/packages/52/b8/acf96d388692d0bbf2346286f8b175778bc24046aca9181f50d9df9f4714/psycopg_binary-3.1.20-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:01b0e39128715fc37fed6cdc50ab58278eacb75709af503eb607654030975f09", size = 3480091, upload_time = "2024-06-30T16:58:33.872Z" }, + { url = "https://files.pythonhosted.org/packages/41/d4/20604282ff08823d0e90cf092738ea21b339f56a172d8583565b272fc4be/psycopg_binary-3.1.20-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77af1086bedfa0729465565c636de3519079ba523d7b7ee6e8b9486beb1ee905", size = 4434555, upload_time = "2024-06-30T16:58:40.795Z" }, + { url = "https://files.pythonhosted.org/packages/73/e0/3917b766508bb749e08225492d45ba7463b559de1c8a41d3f8f3cf0927cb/psycopg_binary-3.1.20-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9b9562395d441e225f354e8c6303ee6993a93aaeb0dbb5b94368f3249ab2388", size = 4231402, upload_time = "2024-06-30T16:58:48.586Z" }, + { url = "https://files.pythonhosted.org/packages/b4/9b/251435896f7459beda355ef3e3919b6b20d067582cd6838ba248d3cff188/psycopg_binary-3.1.20-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e814d69e5447a93e7b98117ec95a8ce606d3742092fd120960551ed67c376fea", size = 4484218, upload_time = "2024-06-30T16:58:56.911Z" }, + { url = "https://files.pythonhosted.org/packages/a1/12/b2057f9bb8b5f408139266a5b48bfd7578340296d7314d964b9f09e5b18f/psycopg_binary-3.1.20-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adf1c2061600235ae9b11d7ad357cab89ac583a76bdb0199f7a29ac947939c20", size = 4176668, upload_time = "2024-06-30T16:59:02.496Z" }, + { url = "https://files.pythonhosted.org/packages/80/9c/a62fe4167427a06e69882d274ba90903507afc89caf6bcc3671790a20875/psycopg_binary-3.1.20-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:50f1d807b4167f973a6f67bca39bf656b737f7426be158a1dc9cb0000d020744", size = 3102502, upload_time = "2024-06-30T16:59:07.216Z" }, + { url = "https://files.pythonhosted.org/packages/98/83/bceca23dd830d4069949e70dec9feb03c114cc551b104f0e2b48b1e598c6/psycopg_binary-3.1.20-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4cf6ec1490232a5b208dae94a8269dc739e6762684c8658a0f3570402db934ae", size = 3080005, upload_time = "2024-06-30T16:59:14.927Z" }, + { url = "https://files.pythonhosted.org/packages/fc/83/bab7c8495e0eb11bf710663afb2849c2d3c91a2bf61b2bd597941f57f80b/psycopg_binary-3.1.20-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:309c09ec50a9c5c8492c2922ee666df1e30a08b08a9b63083d0daa414eccd09c", size = 3182315, upload_time = "2024-06-30T16:59:21.18Z" }, + { url = "https://files.pythonhosted.org/packages/ca/9b/bd4970faed24ae4a850ee8c6ebd621e98fd86e2962e13038603a726e2504/psycopg_binary-3.1.20-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e2c33a01799f93ef8c11a023df66280e39ca3c3249a2581adb2a0e5e80801088", size = 3222552, upload_time = "2024-06-30T16:59:27.663Z" }, + { url = "https://files.pythonhosted.org/packages/5d/0b/7ab0744f282df53968f5066d5fd8bf3f994f90bf2a8003ab40278818d0f2/psycopg_binary-3.1.20-cp311-cp311-win_amd64.whl", hash = "sha256:2c67532057fda72579b02d9d61e9cc8975982844bd5c3c9dc7f84ce8bcac859c", size = 2899115, upload_time = "2024-06-30T16:59:35.512Z" }, + { url = "https://files.pythonhosted.org/packages/94/12/6e909d3a20f7bfa6915c1fdf64ab47bb9ca44b837adb468841aad51bab6c/psycopg_binary-3.1.20-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:ef08de60f1b8503a6f6b6f5bee612de36373c09bc0e3f84409fab09e1ff72107", size = 3326944, upload_time = "2024-06-30T16:59:41.783Z" }, + { url = "https://files.pythonhosted.org/packages/e1/4e/dc425f5c8c102045486f2fa39c3cb379b073557d6bd2cf5d06de81036d7c/psycopg_binary-3.1.20-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a4847fa31c8d3a6dd3536cf1e130dfcc454ed26be471ef274e4358bf7f709cda", size = 3475444, upload_time = "2024-06-30T16:59:48.547Z" }, + { url = "https://files.pythonhosted.org/packages/cd/cd/6484cbdb82dc29bfe43ae8c401a0be309402c304d1aaabcccf1e21908663/psycopg_binary-3.1.20-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b72e9c8c79dcc30e34e996079cfe0374b7c7233d2b5f6f25a0bc8872fe2babef", size = 4412872, upload_time = "2024-06-30T16:59:54.853Z" }, + { url = "https://files.pythonhosted.org/packages/25/d3/d403dc61f9d8b56683a6a1db47ab156807d2e1c442b044fba5763e786893/psycopg_binary-3.1.20-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:836246f3c486ef7edfce6cf6cc760173e244826ebecd54c1b63c91d4cc0341f7", size = 4216654, upload_time = "2024-06-30T16:59:58.935Z" }, + { url = "https://files.pythonhosted.org/packages/d3/ff/389198638ad10ec0e80fcc97b5c8092987214d9ac529b1224bf0f7e221da/psycopg_binary-3.1.20-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:015f70b17539ec0ecfb0f87bcaface0c7fa1289b6e7e2313dc7cdfdc513e3235", size = 4451310, upload_time = "2024-06-30T17:00:05.647Z" }, + { url = "https://files.pythonhosted.org/packages/84/94/9ae70af00caf9ce98f857a883ff64c5d236dfea5b7b4b8528d28e80515aa/psycopg_binary-3.1.20-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f52498dc7b41fee74e971823ede4519e3a9597d416f7a2044dbe4b98cc61ff35", size = 4153667, upload_time = "2024-06-30T17:00:12.309Z" }, + { url = "https://files.pythonhosted.org/packages/b8/57/b8a34174803683ef0f3f2fe18304f7048d31bab431f21cf511598b894ed7/psycopg_binary-3.1.20-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:92b61bae0ac881580faa1c89bf2167db7041cb01cc0bd686244f9c20a010036a", size = 3081906, upload_time = "2024-06-30T17:00:17.223Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e7/5df8c4794f13004787cd7ddfe456eec90f49d1b99f1a10947f7ba2a67487/psycopg_binary-3.1.20-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3532b8677666aadb64a4e31f6e97fe4ab71b862ab100d337faf497198339fd4d", size = 3061376, upload_time = "2024-06-30T17:00:22.232Z" }, + { url = "https://files.pythonhosted.org/packages/8e/c6/ec4abb814f54af4b659896ce10386be0c538dad8111b3daeaf672b4daa03/psycopg_binary-3.1.20-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f7df27f50a7db84c28e58be3df41f39618161096c3379ad68bc665a454c53e93", size = 3150174, upload_time = "2024-06-30T17:00:26.982Z" }, + { url = "https://files.pythonhosted.org/packages/0c/50/7b4382e5f5d256ac720ee0bd6470c7aa7d28f78570bd44d5e0b1c29eeb96/psycopg_binary-3.1.20-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:12b33c511f0be79d5a68231a10972ef9c68d954d30d176679472057ecc22891a", size = 3198871, upload_time = "2024-06-30T17:00:32.17Z" }, + { url = "https://files.pythonhosted.org/packages/76/2f/eda1b86c01d2803ac05714b94283af1e5012437dcc63dfe0679cc4d445ad/psycopg_binary-3.1.20-cp312-cp312-win_amd64.whl", hash = "sha256:6f3c0b05fc3cbd4d99aaacf5c7afa13b086df5777b9fefb78d31bf81fc70bd04", size = 2884414, upload_time = "2024-06-30T17:00:40.26Z" }, +] + +[[package]] +name = "psycopg2-binary" +version = "2.9.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/0e/bdc8274dc0585090b4e3432267d7be4dfbfd8971c0fa59167c711105a6bf/psycopg2-binary-2.9.10.tar.gz", hash = "sha256:4b3df0e6990aa98acda57d983942eff13d824135fe2250e6522edaa782a06de2", size = 385764, upload_time = "2024-10-16T11:24:58.126Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9c/8f/9feb01291d0d7a0a4c6a6bab24094135c2b59c6a81943752f632c75896d6/psycopg2_binary-2.9.10-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:04392983d0bb89a8717772a193cfaac58871321e3ec69514e1c4e0d4957b5aff", size = 3043397, upload_time = "2024-10-16T11:19:40.033Z" }, + { url = "https://files.pythonhosted.org/packages/15/30/346e4683532011561cd9c8dfeac6a8153dd96452fee0b12666058ab7893c/psycopg2_binary-2.9.10-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:1a6784f0ce3fec4edc64e985865c17778514325074adf5ad8f80636cd029ef7c", size = 3274806, upload_time = "2024-10-16T11:19:43.5Z" }, + { url = "https://files.pythonhosted.org/packages/66/6e/4efebe76f76aee7ec99166b6c023ff8abdc4e183f7b70913d7c047701b79/psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5f86c56eeb91dc3135b3fd8a95dc7ae14c538a2f3ad77a19645cf55bab1799c", size = 2851370, upload_time = "2024-10-16T11:19:46.986Z" }, + { url = "https://files.pythonhosted.org/packages/7f/fd/ff83313f86b50f7ca089b161b8e0a22bb3c319974096093cd50680433fdb/psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b3d2491d4d78b6b14f76881905c7a8a8abcf974aad4a8a0b065273a0ed7a2cb", size = 3080780, upload_time = "2024-10-16T11:19:50.242Z" }, + { url = "https://files.pythonhosted.org/packages/e6/c4/bfadd202dcda8333a7ccafdc51c541dbdfce7c2c7cda89fa2374455d795f/psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2286791ececda3a723d1910441c793be44625d86d1a4e79942751197f4d30341", size = 3264583, upload_time = "2024-10-16T11:19:54.424Z" }, + { url = "https://files.pythonhosted.org/packages/5d/f1/09f45ac25e704ac954862581f9f9ae21303cc5ded3d0b775532b407f0e90/psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:512d29bb12608891e349af6a0cccedce51677725a921c07dba6342beaf576f9a", size = 3019831, upload_time = "2024-10-16T11:19:57.762Z" }, + { url = "https://files.pythonhosted.org/packages/9e/2e/9beaea078095cc558f215e38f647c7114987d9febfc25cb2beed7c3582a5/psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5a507320c58903967ef7384355a4da7ff3f28132d679aeb23572753cbf2ec10b", size = 2871822, upload_time = "2024-10-16T11:20:04.693Z" }, + { url = "https://files.pythonhosted.org/packages/01/9e/ef93c5d93f3dc9fc92786ffab39e323b9aed066ba59fdc34cf85e2722271/psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6d4fa1079cab9018f4d0bd2db307beaa612b0d13ba73b5c6304b9fe2fb441ff7", size = 2820975, upload_time = "2024-10-16T11:20:11.401Z" }, + { url = "https://files.pythonhosted.org/packages/a5/f0/049e9631e3268fe4c5a387f6fc27e267ebe199acf1bc1bc9cbde4bd6916c/psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:851485a42dbb0bdc1edcdabdb8557c09c9655dfa2ca0460ff210522e073e319e", size = 2919320, upload_time = "2024-10-16T11:20:17.959Z" }, + { url = "https://files.pythonhosted.org/packages/dc/9a/bcb8773b88e45fb5a5ea8339e2104d82c863a3b8558fbb2aadfe66df86b3/psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:35958ec9e46432d9076286dda67942ed6d968b9c3a6a2fd62b48939d1d78bf68", size = 2957617, upload_time = "2024-10-16T11:20:24.711Z" }, + { url = "https://files.pythonhosted.org/packages/e2/6b/144336a9bf08a67d217b3af3246abb1d027095dab726f0687f01f43e8c03/psycopg2_binary-2.9.10-cp311-cp311-win32.whl", hash = "sha256:ecced182e935529727401b24d76634a357c71c9275b356efafd8a2a91ec07392", size = 1024618, upload_time = "2024-10-16T11:20:27.718Z" }, + { url = "https://files.pythonhosted.org/packages/61/69/3b3d7bd583c6d3cbe5100802efa5beacaacc86e37b653fc708bf3d6853b8/psycopg2_binary-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:ee0e8c683a7ff25d23b55b11161c2663d4b099770f6085ff0a20d4505778d6b4", size = 1163816, upload_time = "2024-10-16T11:20:30.777Z" }, + { url = "https://files.pythonhosted.org/packages/49/7d/465cc9795cf76f6d329efdafca74693714556ea3891813701ac1fee87545/psycopg2_binary-2.9.10-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:880845dfe1f85d9d5f7c412efea7a08946a46894537e4e5d091732eb1d34d9a0", size = 3044771, upload_time = "2024-10-16T11:20:35.234Z" }, + { url = "https://files.pythonhosted.org/packages/8b/31/6d225b7b641a1a2148e3ed65e1aa74fc86ba3fee850545e27be9e1de893d/psycopg2_binary-2.9.10-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9440fa522a79356aaa482aa4ba500b65f28e5d0e63b801abf6aa152a29bd842a", size = 3275336, upload_time = "2024-10-16T11:20:38.742Z" }, + { url = "https://files.pythonhosted.org/packages/30/b7/a68c2b4bff1cbb1728e3ec864b2d92327c77ad52edcd27922535a8366f68/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3923c1d9870c49a2d44f795df0c889a22380d36ef92440ff618ec315757e539", size = 2851637, upload_time = "2024-10-16T11:20:42.145Z" }, + { url = "https://files.pythonhosted.org/packages/0b/b1/cfedc0e0e6f9ad61f8657fd173b2f831ce261c02a08c0b09c652b127d813/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b2c956c028ea5de47ff3a8d6b3cc3330ab45cf0b7c3da35a2d6ff8420896526", size = 3082097, upload_time = "2024-10-16T11:20:46.185Z" }, + { url = "https://files.pythonhosted.org/packages/18/ed/0a8e4153c9b769f59c02fb5e7914f20f0b2483a19dae7bf2db54b743d0d0/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f758ed67cab30b9a8d2833609513ce4d3bd027641673d4ebc9c067e4d208eec1", size = 3264776, upload_time = "2024-10-16T11:20:50.879Z" }, + { url = "https://files.pythonhosted.org/packages/10/db/d09da68c6a0cdab41566b74e0a6068a425f077169bed0946559b7348ebe9/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cd9b4f2cfab88ed4a9106192de509464b75a906462fb846b936eabe45c2063e", size = 3020968, upload_time = "2024-10-16T11:20:56.819Z" }, + { url = "https://files.pythonhosted.org/packages/94/28/4d6f8c255f0dfffb410db2b3f9ac5218d959a66c715c34cac31081e19b95/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dc08420625b5a20b53551c50deae6e231e6371194fa0651dbe0fb206452ae1f", size = 2872334, upload_time = "2024-10-16T11:21:02.411Z" }, + { url = "https://files.pythonhosted.org/packages/05/f7/20d7bf796593c4fea95e12119d6cc384ff1f6141a24fbb7df5a668d29d29/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d7cd730dfa7c36dbe8724426bf5612798734bff2d3c3857f36f2733f5bfc7c00", size = 2822722, upload_time = "2024-10-16T11:21:09.01Z" }, + { url = "https://files.pythonhosted.org/packages/4d/e4/0c407ae919ef626dbdb32835a03b6737013c3cc7240169843965cada2bdf/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:155e69561d54d02b3c3209545fb08938e27889ff5a10c19de8d23eb5a41be8a5", size = 2920132, upload_time = "2024-10-16T11:21:16.339Z" }, + { url = "https://files.pythonhosted.org/packages/2d/70/aa69c9f69cf09a01da224909ff6ce8b68faeef476f00f7ec377e8f03be70/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c3cc28a6fd5a4a26224007712e79b81dbaee2ffb90ff406256158ec4d7b52b47", size = 2959312, upload_time = "2024-10-16T11:21:25.584Z" }, + { url = "https://files.pythonhosted.org/packages/d3/bd/213e59854fafe87ba47814bf413ace0dcee33a89c8c8c814faca6bc7cf3c/psycopg2_binary-2.9.10-cp312-cp312-win32.whl", hash = "sha256:ec8a77f521a17506a24a5f626cb2aee7850f9b69a0afe704586f63a464f3cd64", size = 1025191, upload_time = "2024-10-16T11:21:29.912Z" }, + { url = "https://files.pythonhosted.org/packages/92/29/06261ea000e2dc1e22907dbbc483a1093665509ea586b29b8986a0e56733/psycopg2_binary-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:18c5ee682b9c6dd3696dad6e54cc7ff3a1a9020df6a5c0f861ef8bfd338c3ca0", size = 1164031, upload_time = "2024-10-16T11:21:34.211Z" }, + { url = "https://files.pythonhosted.org/packages/3e/30/d41d3ba765609c0763505d565c4d12d8f3c79793f0d0f044ff5a28bf395b/psycopg2_binary-2.9.10-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:26540d4a9a4e2b096f1ff9cce51253d0504dca5a85872c7f7be23be5a53eb18d", size = 3044699, upload_time = "2024-10-16T11:21:42.841Z" }, + { url = "https://files.pythonhosted.org/packages/35/44/257ddadec7ef04536ba71af6bc6a75ec05c5343004a7ec93006bee66c0bc/psycopg2_binary-2.9.10-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e217ce4d37667df0bc1c397fdcd8de5e81018ef305aed9415c3b093faaeb10fb", size = 3275245, upload_time = "2024-10-16T11:21:51.989Z" }, + { url = "https://files.pythonhosted.org/packages/1b/11/48ea1cd11de67f9efd7262085588790a95d9dfcd9b8a687d46caf7305c1a/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:245159e7ab20a71d989da00f280ca57da7641fa2cdcf71749c193cea540a74f7", size = 2851631, upload_time = "2024-10-16T11:21:57.584Z" }, + { url = "https://files.pythonhosted.org/packages/62/e0/62ce5ee650e6c86719d621a761fe4bc846ab9eff8c1f12b1ed5741bf1c9b/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c4ded1a24b20021ebe677b7b08ad10bf09aac197d6943bfe6fec70ac4e4690d", size = 3082140, upload_time = "2024-10-16T11:22:02.005Z" }, + { url = "https://files.pythonhosted.org/packages/27/ce/63f946c098611f7be234c0dd7cb1ad68b0b5744d34f68062bb3c5aa510c8/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3abb691ff9e57d4a93355f60d4f4c1dd2d68326c968e7db17ea96df3c023ef73", size = 3264762, upload_time = "2024-10-16T11:22:06.412Z" }, + { url = "https://files.pythonhosted.org/packages/43/25/c603cd81402e69edf7daa59b1602bd41eb9859e2824b8c0855d748366ac9/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8608c078134f0b3cbd9f89b34bd60a943b23fd33cc5f065e8d5f840061bd0673", size = 3020967, upload_time = "2024-10-16T11:22:11.583Z" }, + { url = "https://files.pythonhosted.org/packages/5f/d6/8708d8c6fca531057fa170cdde8df870e8b6a9b136e82b361c65e42b841e/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:230eeae2d71594103cd5b93fd29d1ace6420d0b86f4778739cb1a5a32f607d1f", size = 2872326, upload_time = "2024-10-16T11:22:16.406Z" }, + { url = "https://files.pythonhosted.org/packages/ce/ac/5b1ea50fc08a9df82de7e1771537557f07c2632231bbab652c7e22597908/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909", size = 2822712, upload_time = "2024-10-16T11:22:21.366Z" }, + { url = "https://files.pythonhosted.org/packages/c4/fc/504d4503b2abc4570fac3ca56eb8fed5e437bf9c9ef13f36b6621db8ef00/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1", size = 2920155, upload_time = "2024-10-16T11:22:25.684Z" }, + { url = "https://files.pythonhosted.org/packages/b2/d1/323581e9273ad2c0dbd1902f3fb50c441da86e894b6e25a73c3fda32c57e/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567", size = 2959356, upload_time = "2024-10-16T11:22:30.562Z" }, + { url = "https://files.pythonhosted.org/packages/08/50/d13ea0a054189ae1bc21af1d85b6f8bb9bbc5572991055d70ad9006fe2d6/psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142", size = 2569224, upload_time = "2025-01-04T20:09:19.234Z" }, +] + +[[package]] +name = "pyarrow" +version = "20.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/ee/a7810cb9f3d6e9238e61d312076a9859bf3668fd21c69744de9532383912/pyarrow-20.0.0.tar.gz", hash = "sha256:febc4a913592573c8d5805091a6c2b5064c8bd6e002131f01061797d91c783c1", size = 1125187, upload_time = "2025-04-27T12:34:23.264Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/a2/b7930824181ceadd0c63c1042d01fa4ef63eee233934826a7a2a9af6e463/pyarrow-20.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:24ca380585444cb2a31324c546a9a56abbe87e26069189e14bdba19c86c049f0", size = 30856035, upload_time = "2025-04-27T12:28:40.78Z" }, + { url = "https://files.pythonhosted.org/packages/9b/18/c765770227d7f5bdfa8a69f64b49194352325c66a5c3bb5e332dfd5867d9/pyarrow-20.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:95b330059ddfdc591a3225f2d272123be26c8fa76e8c9ee1a77aad507361cfdb", size = 32309552, upload_time = "2025-04-27T12:28:47.051Z" }, + { url = "https://files.pythonhosted.org/packages/44/fb/dfb2dfdd3e488bb14f822d7335653092dde150cffc2da97de6e7500681f9/pyarrow-20.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f0fb1041267e9968c6d0d2ce3ff92e3928b243e2b6d11eeb84d9ac547308232", size = 41334704, upload_time = "2025-04-27T12:28:55.064Z" }, + { url = "https://files.pythonhosted.org/packages/58/0d/08a95878d38808051a953e887332d4a76bc06c6ee04351918ee1155407eb/pyarrow-20.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8ff87cc837601532cc8242d2f7e09b4e02404de1b797aee747dd4ba4bd6313f", size = 42399836, upload_time = "2025-04-27T12:29:02.13Z" }, + { url = "https://files.pythonhosted.org/packages/f3/cd/efa271234dfe38f0271561086eedcad7bc0f2ddd1efba423916ff0883684/pyarrow-20.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:7a3a5dcf54286e6141d5114522cf31dd67a9e7c9133d150799f30ee302a7a1ab", size = 40711789, upload_time = "2025-04-27T12:29:09.951Z" }, + { url = "https://files.pythonhosted.org/packages/46/1f/7f02009bc7fc8955c391defee5348f510e589a020e4b40ca05edcb847854/pyarrow-20.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a6ad3e7758ecf559900261a4df985662df54fb7fdb55e8e3b3aa99b23d526b62", size = 42301124, upload_time = "2025-04-27T12:29:17.187Z" }, + { url = "https://files.pythonhosted.org/packages/4f/92/692c562be4504c262089e86757a9048739fe1acb4024f92d39615e7bab3f/pyarrow-20.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6bb830757103a6cb300a04610e08d9636f0cd223d32f388418ea893a3e655f1c", size = 42916060, upload_time = "2025-04-27T12:29:24.253Z" }, + { url = "https://files.pythonhosted.org/packages/a4/ec/9f5c7e7c828d8e0a3c7ef50ee62eca38a7de2fa6eb1b8fa43685c9414fef/pyarrow-20.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:96e37f0766ecb4514a899d9a3554fadda770fb57ddf42b63d80f14bc20aa7db3", size = 44547640, upload_time = "2025-04-27T12:29:32.782Z" }, + { url = "https://files.pythonhosted.org/packages/54/96/46613131b4727f10fd2ffa6d0d6f02efcc09a0e7374eff3b5771548aa95b/pyarrow-20.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:3346babb516f4b6fd790da99b98bed9708e3f02e734c84971faccb20736848dc", size = 25781491, upload_time = "2025-04-27T12:29:38.464Z" }, + { url = "https://files.pythonhosted.org/packages/a1/d6/0c10e0d54f6c13eb464ee9b67a68b8c71bcf2f67760ef5b6fbcddd2ab05f/pyarrow-20.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:75a51a5b0eef32727a247707d4755322cb970be7e935172b6a3a9f9ae98404ba", size = 30815067, upload_time = "2025-04-27T12:29:44.384Z" }, + { url = "https://files.pythonhosted.org/packages/7e/e2/04e9874abe4094a06fd8b0cbb0f1312d8dd7d707f144c2ec1e5e8f452ffa/pyarrow-20.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:211d5e84cecc640c7a3ab900f930aaff5cd2702177e0d562d426fb7c4f737781", size = 32297128, upload_time = "2025-04-27T12:29:52.038Z" }, + { url = "https://files.pythonhosted.org/packages/31/fd/c565e5dcc906a3b471a83273039cb75cb79aad4a2d4a12f76cc5ae90a4b8/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ba3cf4182828be7a896cbd232aa8dd6a31bd1f9e32776cc3796c012855e1199", size = 41334890, upload_time = "2025-04-27T12:29:59.452Z" }, + { url = "https://files.pythonhosted.org/packages/af/a9/3bdd799e2c9b20c1ea6dc6fa8e83f29480a97711cf806e823f808c2316ac/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c3a01f313ffe27ac4126f4c2e5ea0f36a5fc6ab51f8726cf41fee4b256680bd", size = 42421775, upload_time = "2025-04-27T12:30:06.875Z" }, + { url = "https://files.pythonhosted.org/packages/10/f7/da98ccd86354c332f593218101ae56568d5dcedb460e342000bd89c49cc1/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:a2791f69ad72addd33510fec7bb14ee06c2a448e06b649e264c094c5b5f7ce28", size = 40687231, upload_time = "2025-04-27T12:30:13.954Z" }, + { url = "https://files.pythonhosted.org/packages/bb/1b/2168d6050e52ff1e6cefc61d600723870bf569cbf41d13db939c8cf97a16/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4250e28a22302ce8692d3a0e8ec9d9dde54ec00d237cff4dfa9c1fbf79e472a8", size = 42295639, upload_time = "2025-04-27T12:30:21.949Z" }, + { url = "https://files.pythonhosted.org/packages/b2/66/2d976c0c7158fd25591c8ca55aee026e6d5745a021915a1835578707feb3/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:89e030dc58fc760e4010148e6ff164d2f44441490280ef1e97a542375e41058e", size = 42908549, upload_time = "2025-04-27T12:30:29.551Z" }, + { url = "https://files.pythonhosted.org/packages/31/a9/dfb999c2fc6911201dcbf348247f9cc382a8990f9ab45c12eabfd7243a38/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6102b4864d77102dbbb72965618e204e550135a940c2534711d5ffa787df2a5a", size = 44557216, upload_time = "2025-04-27T12:30:36.977Z" }, + { url = "https://files.pythonhosted.org/packages/a0/8e/9adee63dfa3911be2382fb4d92e4b2e7d82610f9d9f668493bebaa2af50f/pyarrow-20.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:96d6a0a37d9c98be08f5ed6a10831d88d52cac7b13f5287f1e0f625a0de8062b", size = 25660496, upload_time = "2025-04-27T12:30:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/9b/aa/daa413b81446d20d4dad2944110dcf4cf4f4179ef7f685dd5a6d7570dc8e/pyarrow-20.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a15532e77b94c61efadde86d10957950392999503b3616b2ffcef7621a002893", size = 30798501, upload_time = "2025-04-27T12:30:48.351Z" }, + { url = "https://files.pythonhosted.org/packages/ff/75/2303d1caa410925de902d32ac215dc80a7ce7dd8dfe95358c165f2adf107/pyarrow-20.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:dd43f58037443af715f34f1322c782ec463a3c8a94a85fdb2d987ceb5658e061", size = 32277895, upload_time = "2025-04-27T12:30:55.238Z" }, + { url = "https://files.pythonhosted.org/packages/92/41/fe18c7c0b38b20811b73d1bdd54b1fccba0dab0e51d2048878042d84afa8/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa0d288143a8585806e3cc7c39566407aab646fb9ece164609dac1cfff45f6ae", size = 41327322, upload_time = "2025-04-27T12:31:05.587Z" }, + { url = "https://files.pythonhosted.org/packages/da/ab/7dbf3d11db67c72dbf36ae63dcbc9f30b866c153b3a22ef728523943eee6/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6953f0114f8d6f3d905d98e987d0924dabce59c3cda380bdfaa25a6201563b4", size = 42411441, upload_time = "2025-04-27T12:31:15.675Z" }, + { url = "https://files.pythonhosted.org/packages/90/c3/0c7da7b6dac863af75b64e2f827e4742161128c350bfe7955b426484e226/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:991f85b48a8a5e839b2128590ce07611fae48a904cae6cab1f089c5955b57eb5", size = 40677027, upload_time = "2025-04-27T12:31:24.631Z" }, + { url = "https://files.pythonhosted.org/packages/be/27/43a47fa0ff9053ab5203bb3faeec435d43c0d8bfa40179bfd076cdbd4e1c/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:97c8dc984ed09cb07d618d57d8d4b67a5100a30c3818c2fb0b04599f0da2de7b", size = 42281473, upload_time = "2025-04-27T12:31:31.311Z" }, + { url = "https://files.pythonhosted.org/packages/bc/0b/d56c63b078876da81bbb9ba695a596eabee9b085555ed12bf6eb3b7cab0e/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9b71daf534f4745818f96c214dbc1e6124d7daf059167330b610fc69b6f3d3e3", size = 42893897, upload_time = "2025-04-27T12:31:39.406Z" }, + { url = "https://files.pythonhosted.org/packages/92/ac/7d4bd020ba9145f354012838692d48300c1b8fe5634bfda886abcada67ed/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8b88758f9303fa5a83d6c90e176714b2fd3852e776fc2d7e42a22dd6c2fb368", size = 44543847, upload_time = "2025-04-27T12:31:45.997Z" }, + { url = "https://files.pythonhosted.org/packages/9d/07/290f4abf9ca702c5df7b47739c1b2c83588641ddfa2cc75e34a301d42e55/pyarrow-20.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:30b3051b7975801c1e1d387e17c588d8ab05ced9b1e14eec57915f79869b5031", size = 25653219, upload_time = "2025-04-27T12:31:54.11Z" }, + { url = "https://files.pythonhosted.org/packages/95/df/720bb17704b10bd69dde086e1400b8eefb8f58df3f8ac9cff6c425bf57f1/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ca151afa4f9b7bc45bcc791eb9a89e90a9eb2772767d0b1e5389609c7d03db63", size = 30853957, upload_time = "2025-04-27T12:31:59.215Z" }, + { url = "https://files.pythonhosted.org/packages/d9/72/0d5f875efc31baef742ba55a00a25213a19ea64d7176e0fe001c5d8b6e9a/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:4680f01ecd86e0dd63e39eb5cd59ef9ff24a9d166db328679e36c108dc993d4c", size = 32247972, upload_time = "2025-04-27T12:32:05.369Z" }, + { url = "https://files.pythonhosted.org/packages/d5/bc/e48b4fa544d2eea72f7844180eb77f83f2030b84c8dad860f199f94307ed/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f4c8534e2ff059765647aa69b75d6543f9fef59e2cd4c6d18015192565d2b70", size = 41256434, upload_time = "2025-04-27T12:32:11.814Z" }, + { url = "https://files.pythonhosted.org/packages/c3/01/974043a29874aa2cf4f87fb07fd108828fc7362300265a2a64a94965e35b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e1f8a47f4b4ae4c69c4d702cfbdfe4d41e18e5c7ef6f1bb1c50918c1e81c57b", size = 42353648, upload_time = "2025-04-27T12:32:20.766Z" }, + { url = "https://files.pythonhosted.org/packages/68/95/cc0d3634cde9ca69b0e51cbe830d8915ea32dda2157560dda27ff3b3337b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a1f60dc14658efaa927f8214734f6a01a806d7690be4b3232ba526836d216122", size = 40619853, upload_time = "2025-04-27T12:32:28.1Z" }, + { url = "https://files.pythonhosted.org/packages/29/c2/3ad40e07e96a3e74e7ed7cc8285aadfa84eb848a798c98ec0ad009eb6bcc/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:204a846dca751428991346976b914d6d2a82ae5b8316a6ed99789ebf976551e6", size = 42241743, upload_time = "2025-04-27T12:32:35.792Z" }, + { url = "https://files.pythonhosted.org/packages/eb/cb/65fa110b483339add6a9bc7b6373614166b14e20375d4daa73483755f830/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f3b117b922af5e4c6b9a9115825726cac7d8b1421c37c2b5e24fbacc8930612c", size = 42839441, upload_time = "2025-04-27T12:32:46.64Z" }, + { url = "https://files.pythonhosted.org/packages/98/7b/f30b1954589243207d7a0fbc9997401044bf9a033eec78f6cb50da3f304a/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e724a3fd23ae5b9c010e7be857f4405ed5e679db5c93e66204db1a69f733936a", size = 44503279, upload_time = "2025-04-27T12:32:56.503Z" }, + { url = "https://files.pythonhosted.org/packages/37/40/ad395740cd641869a13bcf60851296c89624662575621968dcfafabaa7f6/pyarrow-20.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:82f1ee5133bd8f49d31be1299dc07f585136679666b502540db854968576faf9", size = 25944982, upload_time = "2025-04-27T12:33:04.72Z" }, +] + +[[package]] +name = "pyarrow-hotfix" +version = "0.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d2/ed/c3e8677f7abf3981838c2af7b5ac03e3589b3ef94fcb31d575426abae904/pyarrow_hotfix-0.7.tar.gz", hash = "sha256:59399cd58bdd978b2e42816a4183a55c6472d4e33d183351b6069f11ed42661d", size = 9910, upload_time = "2025-04-25T10:17:06.247Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/c3/94ade4906a2f88bc935772f59c934013b4205e773bcb4239db114a6da136/pyarrow_hotfix-0.7-py3-none-any.whl", hash = "sha256:3236f3b5f1260f0e2ac070a55c1a7b339c4bb7267839bd2015e283234e758100", size = 7923, upload_time = "2025-04-25T10:17:05.224Z" }, +] + +[[package]] +name = "pyasn1" +version = "0.6.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/e9/01f1a64245b89f039897cb0130016d79f77d52669aae6ee7b159a6c4c018/pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034", size = 145322, upload_time = "2024-09-10T22:41:42.55Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/f1/d6a797abb14f6283c0ddff96bbdd46937f64122b8c925cab503dd37f8214/pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629", size = 83135, upload_time = "2024-09-11T16:00:36.122Z" }, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload_time = "2025-03-28T02:41:22.17Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload_time = "2025-03-28T02:41:19.028Z" }, +] + +[[package]] +name = "pydantic" +version = "2.11.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/10/2e/ca897f093ee6c5f3b0bee123ee4465c50e75431c3d5b6a3b44a47134e891/pydantic-2.11.3.tar.gz", hash = "sha256:7471657138c16adad9322fe3070c0116dd6c3ad8d649300e3cbdfe91f4db4ec3", size = 785513, upload_time = "2025-04-08T13:27:06.399Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b0/1d/407b29780a289868ed696d1616f4aad49d6388e5a77f567dcd2629dcd7b8/pydantic-2.11.3-py3-none-any.whl", hash = "sha256:a082753436a07f9ba1289c6ffa01cd93db3548776088aa917cc43b63f68fa60f", size = 443591, upload_time = "2025-04-08T13:27:03.789Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.33.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/17/19/ed6a078a5287aea7922de6841ef4c06157931622c89c2a47940837b5eecd/pydantic_core-2.33.1.tar.gz", hash = "sha256:bcc9c6fdb0ced789245b02b7d6603e17d1563064ddcfc36f046b61c0c05dd9df", size = 434395, upload_time = "2025-04-02T09:49:41.8Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d6/7f/c6298830cb780c46b4f46bb24298d01019ffa4d21769f39b908cd14bbd50/pydantic_core-2.33.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e966fc3caaf9f1d96b349b0341c70c8d6573bf1bac7261f7b0ba88f96c56c24", size = 2044224, upload_time = "2025-04-02T09:47:04.199Z" }, + { url = "https://files.pythonhosted.org/packages/a8/65/6ab3a536776cad5343f625245bd38165d6663256ad43f3a200e5936afd6c/pydantic_core-2.33.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bfd0adeee563d59c598ceabddf2c92eec77abcb3f4a391b19aa7366170bd9e30", size = 1858845, upload_time = "2025-04-02T09:47:05.686Z" }, + { url = "https://files.pythonhosted.org/packages/e9/15/9a22fd26ba5ee8c669d4b8c9c244238e940cd5d818649603ca81d1c69861/pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91815221101ad3c6b507804178a7bb5cb7b2ead9ecd600041669c8d805ebd595", size = 1910029, upload_time = "2025-04-02T09:47:07.042Z" }, + { url = "https://files.pythonhosted.org/packages/d5/33/8cb1a62818974045086f55f604044bf35b9342900318f9a2a029a1bec460/pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9fea9c1869bb4742d174a57b4700c6dadea951df8b06de40c2fedb4f02931c2e", size = 1997784, upload_time = "2025-04-02T09:47:08.63Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ca/49958e4df7715c71773e1ea5be1c74544923d10319173264e6db122543f9/pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d20eb4861329bb2484c021b9d9a977566ab16d84000a57e28061151c62b349a", size = 2141075, upload_time = "2025-04-02T09:47:10.267Z" }, + { url = "https://files.pythonhosted.org/packages/7b/a6/0b3a167a9773c79ba834b959b4e18c3ae9216b8319bd8422792abc8a41b1/pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb935c5591573ae3201640579f30128ccc10739b45663f93c06796854405505", size = 2745849, upload_time = "2025-04-02T09:47:11.724Z" }, + { url = "https://files.pythonhosted.org/packages/0b/60/516484135173aa9e5861d7a0663dce82e4746d2e7f803627d8c25dfa5578/pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c964fd24e6166420d18fb53996d8c9fd6eac9bf5ae3ec3d03015be4414ce497f", size = 2005794, upload_time = "2025-04-02T09:47:13.099Z" }, + { url = "https://files.pythonhosted.org/packages/86/70/05b1eb77459ad47de00cf78ee003016da0cedf8b9170260488d7c21e9181/pydantic_core-2.33.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:681d65e9011f7392db5aa002b7423cc442d6a673c635668c227c6c8d0e5a4f77", size = 2123237, upload_time = "2025-04-02T09:47:14.355Z" }, + { url = "https://files.pythonhosted.org/packages/c7/57/12667a1409c04ae7dc95d3b43158948eb0368e9c790be8b095cb60611459/pydantic_core-2.33.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e100c52f7355a48413e2999bfb4e139d2977a904495441b374f3d4fb4a170961", size = 2086351, upload_time = "2025-04-02T09:47:15.676Z" }, + { url = "https://files.pythonhosted.org/packages/57/61/cc6d1d1c1664b58fdd6ecc64c84366c34ec9b606aeb66cafab6f4088974c/pydantic_core-2.33.1-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:048831bd363490be79acdd3232f74a0e9951b11b2b4cc058aeb72b22fdc3abe1", size = 2258914, upload_time = "2025-04-02T09:47:17Z" }, + { url = "https://files.pythonhosted.org/packages/d1/0a/edb137176a1f5419b2ddee8bde6a0a548cfa3c74f657f63e56232df8de88/pydantic_core-2.33.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bdc84017d28459c00db6f918a7272a5190bec3090058334e43a76afb279eac7c", size = 2257385, upload_time = "2025-04-02T09:47:18.631Z" }, + { url = "https://files.pythonhosted.org/packages/26/3c/48ca982d50e4b0e1d9954919c887bdc1c2b462801bf408613ccc641b3daa/pydantic_core-2.33.1-cp311-cp311-win32.whl", hash = "sha256:32cd11c5914d1179df70406427097c7dcde19fddf1418c787540f4b730289896", size = 1923765, upload_time = "2025-04-02T09:47:20.34Z" }, + { url = "https://files.pythonhosted.org/packages/33/cd/7ab70b99e5e21559f5de38a0928ea84e6f23fdef2b0d16a6feaf942b003c/pydantic_core-2.33.1-cp311-cp311-win_amd64.whl", hash = "sha256:2ea62419ba8c397e7da28a9170a16219d310d2cf4970dbc65c32faf20d828c83", size = 1950688, upload_time = "2025-04-02T09:47:22.029Z" }, + { url = "https://files.pythonhosted.org/packages/4b/ae/db1fc237b82e2cacd379f63e3335748ab88b5adde98bf7544a1b1bd10a84/pydantic_core-2.33.1-cp311-cp311-win_arm64.whl", hash = "sha256:fc903512177361e868bc1f5b80ac8c8a6e05fcdd574a5fb5ffeac5a9982b9e89", size = 1908185, upload_time = "2025-04-02T09:47:23.385Z" }, + { url = "https://files.pythonhosted.org/packages/c8/ce/3cb22b07c29938f97ff5f5bb27521f95e2ebec399b882392deb68d6c440e/pydantic_core-2.33.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1293d7febb995e9d3ec3ea09caf1a26214eec45b0f29f6074abb004723fc1de8", size = 2026640, upload_time = "2025-04-02T09:47:25.394Z" }, + { url = "https://files.pythonhosted.org/packages/19/78/f381d643b12378fee782a72126ec5d793081ef03791c28a0fd542a5bee64/pydantic_core-2.33.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:99b56acd433386c8f20be5c4000786d1e7ca0523c8eefc995d14d79c7a081498", size = 1852649, upload_time = "2025-04-02T09:47:27.417Z" }, + { url = "https://files.pythonhosted.org/packages/9d/2b/98a37b80b15aac9eb2c6cfc6dbd35e5058a352891c5cce3a8472d77665a6/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35a5ec3fa8c2fe6c53e1b2ccc2454398f95d5393ab398478f53e1afbbeb4d939", size = 1892472, upload_time = "2025-04-02T09:47:29.006Z" }, + { url = "https://files.pythonhosted.org/packages/4e/d4/3c59514e0f55a161004792b9ff3039da52448f43f5834f905abef9db6e4a/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b172f7b9d2f3abc0efd12e3386f7e48b576ef309544ac3a63e5e9cdd2e24585d", size = 1977509, upload_time = "2025-04-02T09:47:33.464Z" }, + { url = "https://files.pythonhosted.org/packages/a9/b6/c2c7946ef70576f79a25db59a576bce088bdc5952d1b93c9789b091df716/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9097b9f17f91eea659b9ec58148c0747ec354a42f7389b9d50701610d86f812e", size = 2128702, upload_time = "2025-04-02T09:47:34.812Z" }, + { url = "https://files.pythonhosted.org/packages/88/fe/65a880f81e3f2a974312b61f82a03d85528f89a010ce21ad92f109d94deb/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc77ec5b7e2118b152b0d886c7514a4653bcb58c6b1d760134a9fab915f777b3", size = 2679428, upload_time = "2025-04-02T09:47:37.315Z" }, + { url = "https://files.pythonhosted.org/packages/6f/ff/4459e4146afd0462fb483bb98aa2436d69c484737feaceba1341615fb0ac/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3d15245b08fa4a84cefc6c9222e6f37c98111c8679fbd94aa145f9a0ae23d", size = 2008753, upload_time = "2025-04-02T09:47:39.013Z" }, + { url = "https://files.pythonhosted.org/packages/7c/76/1c42e384e8d78452ededac8b583fe2550c84abfef83a0552e0e7478ccbc3/pydantic_core-2.33.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef99779001d7ac2e2461d8ab55d3373fe7315caefdbecd8ced75304ae5a6fc6b", size = 2114849, upload_time = "2025-04-02T09:47:40.427Z" }, + { url = "https://files.pythonhosted.org/packages/00/72/7d0cf05095c15f7ffe0eb78914b166d591c0eed72f294da68378da205101/pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fc6bf8869e193855e8d91d91f6bf59699a5cdfaa47a404e278e776dd7f168b39", size = 2069541, upload_time = "2025-04-02T09:47:42.01Z" }, + { url = "https://files.pythonhosted.org/packages/b3/69/94a514066bb7d8be499aa764926937409d2389c09be0b5107a970286ef81/pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:b1caa0bc2741b043db7823843e1bde8aaa58a55a58fda06083b0569f8b45693a", size = 2239225, upload_time = "2025-04-02T09:47:43.425Z" }, + { url = "https://files.pythonhosted.org/packages/84/b0/e390071eadb44b41f4f54c3cef64d8bf5f9612c92686c9299eaa09e267e2/pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ec259f62538e8bf364903a7d0d0239447059f9434b284f5536e8402b7dd198db", size = 2248373, upload_time = "2025-04-02T09:47:44.979Z" }, + { url = "https://files.pythonhosted.org/packages/d6/b2/288b3579ffc07e92af66e2f1a11be3b056fe1214aab314748461f21a31c3/pydantic_core-2.33.1-cp312-cp312-win32.whl", hash = "sha256:e14f369c98a7c15772b9da98987f58e2b509a93235582838bd0d1d8c08b68fda", size = 1907034, upload_time = "2025-04-02T09:47:46.843Z" }, + { url = "https://files.pythonhosted.org/packages/02/28/58442ad1c22b5b6742b992ba9518420235adced665513868f99a1c2638a5/pydantic_core-2.33.1-cp312-cp312-win_amd64.whl", hash = "sha256:1c607801d85e2e123357b3893f82c97a42856192997b95b4d8325deb1cd0c5f4", size = 1956848, upload_time = "2025-04-02T09:47:48.404Z" }, + { url = "https://files.pythonhosted.org/packages/a1/eb/f54809b51c7e2a1d9f439f158b8dd94359321abcc98767e16fc48ae5a77e/pydantic_core-2.33.1-cp312-cp312-win_arm64.whl", hash = "sha256:8d13f0276806ee722e70a1c93da19748594f19ac4299c7e41237fc791d1861ea", size = 1903986, upload_time = "2025-04-02T09:47:49.839Z" }, + { url = "https://files.pythonhosted.org/packages/7a/24/eed3466a4308d79155f1cdd5c7432c80ddcc4530ba8623b79d5ced021641/pydantic_core-2.33.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:70af6a21237b53d1fe7b9325b20e65cbf2f0a848cf77bed492b029139701e66a", size = 2033551, upload_time = "2025-04-02T09:47:51.648Z" }, + { url = "https://files.pythonhosted.org/packages/ab/14/df54b1a0bc9b6ded9b758b73139d2c11b4e8eb43e8ab9c5847c0a2913ada/pydantic_core-2.33.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:282b3fe1bbbe5ae35224a0dbd05aed9ccabccd241e8e6b60370484234b456266", size = 1852785, upload_time = "2025-04-02T09:47:53.149Z" }, + { url = "https://files.pythonhosted.org/packages/fa/96/e275f15ff3d34bb04b0125d9bc8848bf69f25d784d92a63676112451bfb9/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b315e596282bbb5822d0c7ee9d255595bd7506d1cb20c2911a4da0b970187d3", size = 1897758, upload_time = "2025-04-02T09:47:55.006Z" }, + { url = "https://files.pythonhosted.org/packages/b7/d8/96bc536e975b69e3a924b507d2a19aedbf50b24e08c80fb00e35f9baaed8/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1dfae24cf9921875ca0ca6a8ecb4bb2f13c855794ed0d468d6abbec6e6dcd44a", size = 1986109, upload_time = "2025-04-02T09:47:56.532Z" }, + { url = "https://files.pythonhosted.org/packages/90/72/ab58e43ce7e900b88cb571ed057b2fcd0e95b708a2e0bed475b10130393e/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6dd8ecfde08d8bfadaea669e83c63939af76f4cf5538a72597016edfa3fad516", size = 2129159, upload_time = "2025-04-02T09:47:58.088Z" }, + { url = "https://files.pythonhosted.org/packages/dc/3f/52d85781406886c6870ac995ec0ba7ccc028b530b0798c9080531b409fdb/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f593494876eae852dc98c43c6f260f45abdbfeec9e4324e31a481d948214764", size = 2680222, upload_time = "2025-04-02T09:47:59.591Z" }, + { url = "https://files.pythonhosted.org/packages/f4/56/6e2ef42f363a0eec0fd92f74a91e0ac48cd2e49b695aac1509ad81eee86a/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:948b73114f47fd7016088e5186d13faf5e1b2fe83f5e320e371f035557fd264d", size = 2006980, upload_time = "2025-04-02T09:48:01.397Z" }, + { url = "https://files.pythonhosted.org/packages/4c/c0/604536c4379cc78359f9ee0aa319f4aedf6b652ec2854953f5a14fc38c5a/pydantic_core-2.33.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e11f3864eb516af21b01e25fac915a82e9ddad3bb0fb9e95a246067398b435a4", size = 2120840, upload_time = "2025-04-02T09:48:03.056Z" }, + { url = "https://files.pythonhosted.org/packages/1f/46/9eb764814f508f0edfb291a0f75d10854d78113fa13900ce13729aaec3ae/pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:549150be302428b56fdad0c23c2741dcdb5572413776826c965619a25d9c6bde", size = 2072518, upload_time = "2025-04-02T09:48:04.662Z" }, + { url = "https://files.pythonhosted.org/packages/42/e3/fb6b2a732b82d1666fa6bf53e3627867ea3131c5f39f98ce92141e3e3dc1/pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:495bc156026efafd9ef2d82372bd38afce78ddd82bf28ef5276c469e57c0c83e", size = 2248025, upload_time = "2025-04-02T09:48:06.226Z" }, + { url = "https://files.pythonhosted.org/packages/5c/9d/fbe8fe9d1aa4dac88723f10a921bc7418bd3378a567cb5e21193a3c48b43/pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ec79de2a8680b1a67a07490bddf9636d5c2fab609ba8c57597e855fa5fa4dacd", size = 2254991, upload_time = "2025-04-02T09:48:08.114Z" }, + { url = "https://files.pythonhosted.org/packages/aa/99/07e2237b8a66438d9b26482332cda99a9acccb58d284af7bc7c946a42fd3/pydantic_core-2.33.1-cp313-cp313-win32.whl", hash = "sha256:ee12a7be1742f81b8a65b36c6921022301d466b82d80315d215c4c691724986f", size = 1915262, upload_time = "2025-04-02T09:48:09.708Z" }, + { url = "https://files.pythonhosted.org/packages/8a/f4/e457a7849beeed1e5defbcf5051c6f7b3c91a0624dd31543a64fc9adcf52/pydantic_core-2.33.1-cp313-cp313-win_amd64.whl", hash = "sha256:ede9b407e39949d2afc46385ce6bd6e11588660c26f80576c11c958e6647bc40", size = 1956626, upload_time = "2025-04-02T09:48:11.288Z" }, + { url = "https://files.pythonhosted.org/packages/20/d0/e8d567a7cff7b04e017ae164d98011f1e1894269fe8e90ea187a3cbfb562/pydantic_core-2.33.1-cp313-cp313-win_arm64.whl", hash = "sha256:aa687a23d4b7871a00e03ca96a09cad0f28f443690d300500603bd0adba4b523", size = 1909590, upload_time = "2025-04-02T09:48:12.861Z" }, + { url = "https://files.pythonhosted.org/packages/ef/fd/24ea4302d7a527d672c5be06e17df16aabfb4e9fdc6e0b345c21580f3d2a/pydantic_core-2.33.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:401d7b76e1000d0dd5538e6381d28febdcacb097c8d340dde7d7fc6e13e9f95d", size = 1812963, upload_time = "2025-04-02T09:48:14.553Z" }, + { url = "https://files.pythonhosted.org/packages/5f/95/4fbc2ecdeb5c1c53f1175a32d870250194eb2fdf6291b795ab08c8646d5d/pydantic_core-2.33.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7aeb055a42d734c0255c9e489ac67e75397d59c6fbe60d155851e9782f276a9c", size = 1986896, upload_time = "2025-04-02T09:48:16.222Z" }, + { url = "https://files.pythonhosted.org/packages/71/ae/fe31e7f4a62431222d8f65a3bd02e3fa7e6026d154a00818e6d30520ea77/pydantic_core-2.33.1-cp313-cp313t-win_amd64.whl", hash = "sha256:338ea9b73e6e109f15ab439e62cb3b78aa752c7fd9536794112e14bee02c8d18", size = 1931810, upload_time = "2025-04-02T09:48:17.97Z" }, + { url = "https://files.pythonhosted.org/packages/0b/76/1794e440c1801ed35415238d2c728f26cd12695df9057154ad768b7b991c/pydantic_core-2.33.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3a371dc00282c4b84246509a5ddc808e61b9864aa1eae9ecc92bb1268b82db4a", size = 2042858, upload_time = "2025-04-02T09:49:03.419Z" }, + { url = "https://files.pythonhosted.org/packages/73/b4/9cd7b081fb0b1b4f8150507cd59d27b275c3e22ad60b35cb19ea0977d9b9/pydantic_core-2.33.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:f59295ecc75a1788af8ba92f2e8c6eeaa5a94c22fc4d151e8d9638814f85c8fc", size = 1873745, upload_time = "2025-04-02T09:49:05.391Z" }, + { url = "https://files.pythonhosted.org/packages/e1/d7/9ddb7575d4321e40d0363903c2576c8c0c3280ebea137777e5ab58d723e3/pydantic_core-2.33.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08530b8ac922003033f399128505f513e30ca770527cc8bbacf75a84fcc2c74b", size = 1904188, upload_time = "2025-04-02T09:49:07.352Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a8/3194ccfe461bb08da19377ebec8cb4f13c9bd82e13baebc53c5c7c39a029/pydantic_core-2.33.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bae370459da6a5466978c0eacf90690cb57ec9d533f8e63e564ef3822bfa04fe", size = 2083479, upload_time = "2025-04-02T09:49:09.304Z" }, + { url = "https://files.pythonhosted.org/packages/42/c7/84cb569555d7179ca0b3f838cef08f66f7089b54432f5b8599aac6e9533e/pydantic_core-2.33.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e3de2777e3b9f4d603112f78006f4ae0acb936e95f06da6cb1a45fbad6bdb4b5", size = 2118415, upload_time = "2025-04-02T09:49:11.25Z" }, + { url = "https://files.pythonhosted.org/packages/3b/67/72abb8c73e0837716afbb58a59cc9e3ae43d1aa8677f3b4bc72c16142716/pydantic_core-2.33.1-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3a64e81e8cba118e108d7126362ea30e021291b7805d47e4896e52c791be2761", size = 2079623, upload_time = "2025-04-02T09:49:13.292Z" }, + { url = "https://files.pythonhosted.org/packages/0b/cd/c59707e35a47ba4cbbf153c3f7c56420c58653b5801b055dc52cccc8e2dc/pydantic_core-2.33.1-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:52928d8c1b6bda03cc6d811e8923dffc87a2d3c8b3bfd2ce16471c7147a24850", size = 2250175, upload_time = "2025-04-02T09:49:15.597Z" }, + { url = "https://files.pythonhosted.org/packages/84/32/e4325a6676b0bed32d5b084566ec86ed7fd1e9bcbfc49c578b1755bde920/pydantic_core-2.33.1-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:1b30d92c9412beb5ac6b10a3eb7ef92ccb14e3f2a8d7732e2d739f58b3aa7544", size = 2254674, upload_time = "2025-04-02T09:49:17.61Z" }, + { url = "https://files.pythonhosted.org/packages/12/6f/5596dc418f2e292ffc661d21931ab34591952e2843e7168ea5a52591f6ff/pydantic_core-2.33.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f995719707e0e29f0f41a8aa3bcea6e761a36c9136104d3189eafb83f5cec5e5", size = 2080951, upload_time = "2025-04-02T09:49:19.559Z" }, +] + +[[package]] +name = "pyee" +version = "12.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d2/a7/8faaa62a488a2a1e0d56969757f087cbd2729e9bcfa508c230299f366b4c/pyee-12.0.0.tar.gz", hash = "sha256:c480603f4aa2927d4766eb41fa82793fe60a82cbfdb8d688e0d08c55a534e145", size = 29675, upload_time = "2024-08-30T19:40:43.555Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/0d/95993c08c721ec68892547f2117e8f9dfbcef2ca71e098533541b4a54d5f/pyee-12.0.0-py3-none-any.whl", hash = "sha256:7b14b74320600049ccc7d0e0b1becd3b4bd0a03c745758225e31a59f4095c990", size = 14831, upload_time = "2024-08-30T19:40:42.132Z" }, +] + +[[package]] +name = "pygments" +version = "2.19.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581, upload_time = "2025-01-06T17:26:30.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293, upload_time = "2025-01-06T17:26:25.553Z" }, +] + +[[package]] +name = "pyjwt" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785, upload_time = "2024-11-28T03:43:29.933Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload_time = "2024-11-28T03:43:27.893Z" }, +] + +[[package]] +name = "pyparsing" +version = "3.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bb/22/f1129e69d94ffff626bdb5c835506b3a5b4f3d070f17ea295e12c2c6f60f/pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be", size = 1088608, upload_time = "2025-03-25T05:01:28.114Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120, upload_time = "2025-03-25T05:01:24.908Z" }, +] + +[[package]] +name = "pytest" +version = "8.3.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891, upload_time = "2025-03-02T12:54:54.503Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634, upload_time = "2025-03-02T12:54:52.069Z" }, +] + +[[package]] +name = "pytest-asyncio" +version = "0.25.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f2/a8/ecbc8ede70921dd2f544ab1cadd3ff3bf842af27f87bbdea774c7baa1d38/pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a", size = 54239, upload_time = "2025-01-28T18:37:58.729Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/17/3493c5624e48fd97156ebaec380dcaafee9506d7e2c46218ceebbb57d7de/pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3", size = 19467, upload_time = "2025-01-28T18:37:56.798Z" }, +] + +[[package]] +name = "pytest-mock" +version = "3.12.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/96/25588c55fbe330b751bd7c7d723c3544957566bc090f6d506551b514f488/pytest-mock-3.12.0.tar.gz", hash = "sha256:31a40f038c22cad32287bb43932054451ff5583ff094bca6f675df2f8bc1a6e9", size = 32139, upload_time = "2023-10-19T16:25:57.7Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b9/25/b29fd10dd062cf41e66787a7951b3842881a2a2d7e3a41fcbb58a8466046/pytest_mock-3.12.0-py3-none-any.whl", hash = "sha256:0972719a7263072da3a21c7f4773069bcc7486027d7e8e1f81d98a47e701bc4f", size = 9771, upload_time = "2023-10-19T16:25:55.764Z" }, +] + +[[package]] +name = "pytest-timeout" +version = "2.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/93/0d/04719abc7a4bdb3a7a1f968f24b0f5253d698c9cc94975330e9d3145befb/pytest-timeout-2.3.1.tar.gz", hash = "sha256:12397729125c6ecbdaca01035b9e5239d4db97352320af155b3f5de1ba5165d9", size = 17697, upload_time = "2024-03-07T21:04:01.069Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/27/14af9ef8321f5edc7527e47def2a21d8118c6f329a9342cc61387a0c0599/pytest_timeout-2.3.1-py3-none-any.whl", hash = "sha256:68188cb703edfc6a18fad98dc25a3c61e9f24d644b0b70f33af545219fc7813e", size = 14148, upload_time = "2024-03-07T21:03:58.764Z" }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload_time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload_time = "2024-03-01T18:36:18.57Z" }, +] + +[[package]] +name = "python-dotenv" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bc/57/e84d88dfe0aec03b7a2d4327012c1627ab5f03652216c63d49846d7a6c58/python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca", size = 39115, upload_time = "2024-01-23T06:33:00.505Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863, upload_time = "2024-01-23T06:32:58.246Z" }, +] + +[[package]] +name = "python-multipart" +version = "0.0.20" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/87/f44d7c9f274c7ee665a29b885ec97089ec5dc034c7f3fafa03da9e39a09e/python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13", size = 37158, upload_time = "2024-12-16T19:45:46.972Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546, upload_time = "2024-12-16T19:45:44.423Z" }, +] + +[[package]] +name = "python-slugify" +version = "8.0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "text-unidecode" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/87/c7/5e1547c44e31da50a460df93af11a535ace568ef89d7a811069ead340c4a/python-slugify-8.0.4.tar.gz", hash = "sha256:59202371d1d05b54a9e7720c5e038f928f45daaffe41dd10822f3907b937c856", size = 10921, upload_time = "2024-02-08T18:32:45.488Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/62/02da182e544a51a5c3ccf4b03ab79df279f9c60c5e82d5e8bec7ca26ac11/python_slugify-8.0.4-py2.py3-none-any.whl", hash = "sha256:276540b79961052b66b7d116620b36518847f52d5fd9e3a70164fc8c50faa6b8", size = 10051, upload_time = "2024-02-08T18:32:43.911Z" }, +] + +[[package]] +name = "pytz" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload_time = "2025-03-25T02:25:00.538Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload_time = "2025-03-25T02:24:58.468Z" }, +] + +[[package]] +name = "pywin32" +version = "310" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/b1/68aa2986129fb1011dabbe95f0136f44509afaf072b12b8f815905a39f33/pywin32-310-cp311-cp311-win32.whl", hash = "sha256:1e765f9564e83011a63321bb9d27ec456a0ed90d3732c4b2e312b855365ed8bd", size = 8784284, upload_time = "2025-03-17T00:55:53.124Z" }, + { url = "https://files.pythonhosted.org/packages/b3/bd/d1592635992dd8db5bb8ace0551bc3a769de1ac8850200cfa517e72739fb/pywin32-310-cp311-cp311-win_amd64.whl", hash = "sha256:126298077a9d7c95c53823934f000599f66ec9296b09167810eb24875f32689c", size = 9520748, upload_time = "2025-03-17T00:55:55.203Z" }, + { url = "https://files.pythonhosted.org/packages/90/b1/ac8b1ffce6603849eb45a91cf126c0fa5431f186c2e768bf56889c46f51c/pywin32-310-cp311-cp311-win_arm64.whl", hash = "sha256:19ec5fc9b1d51c4350be7bb00760ffce46e6c95eaf2f0b2f1150657b1a43c582", size = 8455941, upload_time = "2025-03-17T00:55:57.048Z" }, + { url = "https://files.pythonhosted.org/packages/6b/ec/4fdbe47932f671d6e348474ea35ed94227fb5df56a7c30cbbb42cd396ed0/pywin32-310-cp312-cp312-win32.whl", hash = "sha256:8a75a5cc3893e83a108c05d82198880704c44bbaee4d06e442e471d3c9ea4f3d", size = 8796239, upload_time = "2025-03-17T00:55:58.807Z" }, + { url = "https://files.pythonhosted.org/packages/e3/e5/b0627f8bb84e06991bea89ad8153a9e50ace40b2e1195d68e9dff6b03d0f/pywin32-310-cp312-cp312-win_amd64.whl", hash = "sha256:bf5c397c9a9a19a6f62f3fb821fbf36cac08f03770056711f765ec1503972060", size = 9503839, upload_time = "2025-03-17T00:56:00.8Z" }, + { url = "https://files.pythonhosted.org/packages/1f/32/9ccf53748df72301a89713936645a664ec001abd35ecc8578beda593d37d/pywin32-310-cp312-cp312-win_arm64.whl", hash = "sha256:2349cc906eae872d0663d4d6290d13b90621eaf78964bb1578632ff20e152966", size = 8459470, upload_time = "2025-03-17T00:56:02.601Z" }, + { url = "https://files.pythonhosted.org/packages/1c/09/9c1b978ffc4ae53999e89c19c77ba882d9fce476729f23ef55211ea1c034/pywin32-310-cp313-cp313-win32.whl", hash = "sha256:5d241a659c496ada3253cd01cfaa779b048e90ce4b2b38cd44168ad555ce74ab", size = 8794384, upload_time = "2025-03-17T00:56:04.383Z" }, + { url = "https://files.pythonhosted.org/packages/45/3c/b4640f740ffebadd5d34df35fecba0e1cfef8fde9f3e594df91c28ad9b50/pywin32-310-cp313-cp313-win_amd64.whl", hash = "sha256:667827eb3a90208ddbdcc9e860c81bde63a135710e21e4cb3348968e4bd5249e", size = 9503039, upload_time = "2025-03-17T00:56:06.207Z" }, + { url = "https://files.pythonhosted.org/packages/b4/f4/f785020090fb050e7fb6d34b780f2231f302609dc964672f72bfaeb59a28/pywin32-310-cp313-cp313-win_arm64.whl", hash = "sha256:e308f831de771482b7cf692a1f308f8fca701b2d8f9dde6cc440c7da17e47b33", size = 8458152, upload_time = "2025-03-17T00:56:07.819Z" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631, upload_time = "2024-08-06T20:33:50.674Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/aa/7af4e81f7acba21a4c6be026da38fd2b872ca46226673c89a758ebdc4fd2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", size = 184612, upload_time = "2024-08-06T20:32:03.408Z" }, + { url = "https://files.pythonhosted.org/packages/8b/62/b9faa998fd185f65c1371643678e4d58254add437edb764a08c5a98fb986/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", size = 172040, upload_time = "2024-08-06T20:32:04.926Z" }, + { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829, upload_time = "2024-08-06T20:32:06.459Z" }, + { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167, upload_time = "2024-08-06T20:32:08.338Z" }, + { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952, upload_time = "2024-08-06T20:32:14.124Z" }, + { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301, upload_time = "2024-08-06T20:32:16.17Z" }, + { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638, upload_time = "2024-08-06T20:32:18.555Z" }, + { url = "https://files.pythonhosted.org/packages/22/5f/956f0f9fc65223a58fbc14459bf34b4cc48dec52e00535c79b8db361aabd/PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", size = 143850, upload_time = "2024-08-06T20:32:19.889Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/8da0bbe2ab9dcdd11f4f4557ccaf95c10b9811b13ecced089d43ce59c3c8/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", size = 161980, upload_time = "2024-08-06T20:32:21.273Z" }, + { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873, upload_time = "2024-08-06T20:32:25.131Z" }, + { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302, upload_time = "2024-08-06T20:32:26.511Z" }, + { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154, upload_time = "2024-08-06T20:32:28.363Z" }, + { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223, upload_time = "2024-08-06T20:32:30.058Z" }, + { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542, upload_time = "2024-08-06T20:32:31.881Z" }, + { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164, upload_time = "2024-08-06T20:32:37.083Z" }, + { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611, upload_time = "2024-08-06T20:32:38.898Z" }, + { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591, upload_time = "2024-08-06T20:32:40.241Z" }, + { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338, upload_time = "2024-08-06T20:32:41.93Z" }, + { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309, upload_time = "2024-08-06T20:32:43.4Z" }, + { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679, upload_time = "2024-08-06T20:32:44.801Z" }, + { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428, upload_time = "2024-08-06T20:32:46.432Z" }, + { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361, upload_time = "2024-08-06T20:32:51.188Z" }, + { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523, upload_time = "2024-08-06T20:32:53.019Z" }, + { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660, upload_time = "2024-08-06T20:32:54.708Z" }, + { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597, upload_time = "2024-08-06T20:32:56.985Z" }, + { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527, upload_time = "2024-08-06T20:33:03.001Z" }, + { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload_time = "2024-08-06T20:33:04.33Z" }, +] + +[[package]] +name = "regex" +version = "2024.11.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8e/5f/bd69653fbfb76cf8604468d3b4ec4c403197144c7bfe0e6a5fc9e02a07cb/regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519", size = 399494, upload_time = "2024-11-06T20:12:31.635Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/58/7e4d9493a66c88a7da6d205768119f51af0f684fe7be7bac8328e217a52c/regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638", size = 482669, upload_time = "2024-11-06T20:09:31.064Z" }, + { url = "https://files.pythonhosted.org/packages/34/4c/8f8e631fcdc2ff978609eaeef1d6994bf2f028b59d9ac67640ed051f1218/regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7", size = 287684, upload_time = "2024-11-06T20:09:32.915Z" }, + { url = "https://files.pythonhosted.org/packages/c5/1b/f0e4d13e6adf866ce9b069e191f303a30ab1277e037037a365c3aad5cc9c/regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20", size = 284589, upload_time = "2024-11-06T20:09:35.504Z" }, + { url = "https://files.pythonhosted.org/packages/25/4d/ab21047f446693887f25510887e6820b93f791992994f6498b0318904d4a/regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114", size = 792121, upload_time = "2024-11-06T20:09:37.701Z" }, + { url = "https://files.pythonhosted.org/packages/45/ee/c867e15cd894985cb32b731d89576c41a4642a57850c162490ea34b78c3b/regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3", size = 831275, upload_time = "2024-11-06T20:09:40.371Z" }, + { url = "https://files.pythonhosted.org/packages/b3/12/b0f480726cf1c60f6536fa5e1c95275a77624f3ac8fdccf79e6727499e28/regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f", size = 818257, upload_time = "2024-11-06T20:09:43.059Z" }, + { url = "https://files.pythonhosted.org/packages/bf/ce/0d0e61429f603bac433910d99ef1a02ce45a8967ffbe3cbee48599e62d88/regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0", size = 792727, upload_time = "2024-11-06T20:09:48.19Z" }, + { url = "https://files.pythonhosted.org/packages/e4/c1/243c83c53d4a419c1556f43777ccb552bccdf79d08fda3980e4e77dd9137/regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55", size = 780667, upload_time = "2024-11-06T20:09:49.828Z" }, + { url = "https://files.pythonhosted.org/packages/c5/f4/75eb0dd4ce4b37f04928987f1d22547ddaf6c4bae697623c1b05da67a8aa/regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89", size = 776963, upload_time = "2024-11-06T20:09:51.819Z" }, + { url = "https://files.pythonhosted.org/packages/16/5d/95c568574e630e141a69ff8a254c2f188b4398e813c40d49228c9bbd9875/regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d", size = 784700, upload_time = "2024-11-06T20:09:53.982Z" }, + { url = "https://files.pythonhosted.org/packages/8e/b5/f8495c7917f15cc6fee1e7f395e324ec3e00ab3c665a7dc9d27562fd5290/regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34", size = 848592, upload_time = "2024-11-06T20:09:56.222Z" }, + { url = "https://files.pythonhosted.org/packages/1c/80/6dd7118e8cb212c3c60b191b932dc57db93fb2e36fb9e0e92f72a5909af9/regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d", size = 852929, upload_time = "2024-11-06T20:09:58.642Z" }, + { url = "https://files.pythonhosted.org/packages/11/9b/5a05d2040297d2d254baf95eeeb6df83554e5e1df03bc1a6687fc4ba1f66/regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45", size = 781213, upload_time = "2024-11-06T20:10:00.867Z" }, + { url = "https://files.pythonhosted.org/packages/26/b7/b14e2440156ab39e0177506c08c18accaf2b8932e39fb092074de733d868/regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9", size = 261734, upload_time = "2024-11-06T20:10:03.361Z" }, + { url = "https://files.pythonhosted.org/packages/80/32/763a6cc01d21fb3819227a1cc3f60fd251c13c37c27a73b8ff4315433a8e/regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60", size = 274052, upload_time = "2024-11-06T20:10:05.179Z" }, + { url = "https://files.pythonhosted.org/packages/ba/30/9a87ce8336b172cc232a0db89a3af97929d06c11ceaa19d97d84fa90a8f8/regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a", size = 483781, upload_time = "2024-11-06T20:10:07.07Z" }, + { url = "https://files.pythonhosted.org/packages/01/e8/00008ad4ff4be8b1844786ba6636035f7ef926db5686e4c0f98093612add/regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9", size = 288455, upload_time = "2024-11-06T20:10:09.117Z" }, + { url = "https://files.pythonhosted.org/packages/60/85/cebcc0aff603ea0a201667b203f13ba75d9fc8668fab917ac5b2de3967bc/regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2", size = 284759, upload_time = "2024-11-06T20:10:11.155Z" }, + { url = "https://files.pythonhosted.org/packages/94/2b/701a4b0585cb05472a4da28ee28fdfe155f3638f5e1ec92306d924e5faf0/regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4", size = 794976, upload_time = "2024-11-06T20:10:13.24Z" }, + { url = "https://files.pythonhosted.org/packages/4b/bf/fa87e563bf5fee75db8915f7352e1887b1249126a1be4813837f5dbec965/regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577", size = 833077, upload_time = "2024-11-06T20:10:15.37Z" }, + { url = "https://files.pythonhosted.org/packages/a1/56/7295e6bad94b047f4d0834e4779491b81216583c00c288252ef625c01d23/regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3", size = 823160, upload_time = "2024-11-06T20:10:19.027Z" }, + { url = "https://files.pythonhosted.org/packages/fb/13/e3b075031a738c9598c51cfbc4c7879e26729c53aa9cca59211c44235314/regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e", size = 796896, upload_time = "2024-11-06T20:10:21.85Z" }, + { url = "https://files.pythonhosted.org/packages/24/56/0b3f1b66d592be6efec23a795b37732682520b47c53da5a32c33ed7d84e3/regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe", size = 783997, upload_time = "2024-11-06T20:10:24.329Z" }, + { url = "https://files.pythonhosted.org/packages/f9/a1/eb378dada8b91c0e4c5f08ffb56f25fcae47bf52ad18f9b2f33b83e6d498/regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e", size = 781725, upload_time = "2024-11-06T20:10:28.067Z" }, + { url = "https://files.pythonhosted.org/packages/83/f2/033e7dec0cfd6dda93390089864732a3409246ffe8b042e9554afa9bff4e/regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29", size = 789481, upload_time = "2024-11-06T20:10:31.612Z" }, + { url = "https://files.pythonhosted.org/packages/83/23/15d4552ea28990a74e7696780c438aadd73a20318c47e527b47a4a5a596d/regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39", size = 852896, upload_time = "2024-11-06T20:10:34.054Z" }, + { url = "https://files.pythonhosted.org/packages/e3/39/ed4416bc90deedbfdada2568b2cb0bc1fdb98efe11f5378d9892b2a88f8f/regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51", size = 860138, upload_time = "2024-11-06T20:10:36.142Z" }, + { url = "https://files.pythonhosted.org/packages/93/2d/dd56bb76bd8e95bbce684326302f287455b56242a4f9c61f1bc76e28360e/regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad", size = 787692, upload_time = "2024-11-06T20:10:38.394Z" }, + { url = "https://files.pythonhosted.org/packages/0b/55/31877a249ab7a5156758246b9c59539abbeba22461b7d8adc9e8475ff73e/regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54", size = 262135, upload_time = "2024-11-06T20:10:40.367Z" }, + { url = "https://files.pythonhosted.org/packages/38/ec/ad2d7de49a600cdb8dd78434a1aeffe28b9d6fc42eb36afab4a27ad23384/regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b", size = 273567, upload_time = "2024-11-06T20:10:43.467Z" }, + { url = "https://files.pythonhosted.org/packages/90/73/bcb0e36614601016552fa9344544a3a2ae1809dc1401b100eab02e772e1f/regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84", size = 483525, upload_time = "2024-11-06T20:10:45.19Z" }, + { url = "https://files.pythonhosted.org/packages/0f/3f/f1a082a46b31e25291d830b369b6b0c5576a6f7fb89d3053a354c24b8a83/regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4", size = 288324, upload_time = "2024-11-06T20:10:47.177Z" }, + { url = "https://files.pythonhosted.org/packages/09/c9/4e68181a4a652fb3ef5099e077faf4fd2a694ea6e0f806a7737aff9e758a/regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0", size = 284617, upload_time = "2024-11-06T20:10:49.312Z" }, + { url = "https://files.pythonhosted.org/packages/fc/fd/37868b75eaf63843165f1d2122ca6cb94bfc0271e4428cf58c0616786dce/regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0", size = 795023, upload_time = "2024-11-06T20:10:51.102Z" }, + { url = "https://files.pythonhosted.org/packages/c4/7c/d4cd9c528502a3dedb5c13c146e7a7a539a3853dc20209c8e75d9ba9d1b2/regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7", size = 833072, upload_time = "2024-11-06T20:10:52.926Z" }, + { url = "https://files.pythonhosted.org/packages/4f/db/46f563a08f969159c5a0f0e722260568425363bea43bb7ae370becb66a67/regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7", size = 823130, upload_time = "2024-11-06T20:10:54.828Z" }, + { url = "https://files.pythonhosted.org/packages/db/60/1eeca2074f5b87df394fccaa432ae3fc06c9c9bfa97c5051aed70e6e00c2/regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c", size = 796857, upload_time = "2024-11-06T20:10:56.634Z" }, + { url = "https://files.pythonhosted.org/packages/10/db/ac718a08fcee981554d2f7bb8402f1faa7e868c1345c16ab1ebec54b0d7b/regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3", size = 784006, upload_time = "2024-11-06T20:10:59.369Z" }, + { url = "https://files.pythonhosted.org/packages/c2/41/7da3fe70216cea93144bf12da2b87367590bcf07db97604edeea55dac9ad/regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07", size = 781650, upload_time = "2024-11-06T20:11:02.042Z" }, + { url = "https://files.pythonhosted.org/packages/a7/d5/880921ee4eec393a4752e6ab9f0fe28009435417c3102fc413f3fe81c4e5/regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e", size = 789545, upload_time = "2024-11-06T20:11:03.933Z" }, + { url = "https://files.pythonhosted.org/packages/dc/96/53770115e507081122beca8899ab7f5ae28ae790bfcc82b5e38976df6a77/regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6", size = 853045, upload_time = "2024-11-06T20:11:06.497Z" }, + { url = "https://files.pythonhosted.org/packages/31/d3/1372add5251cc2d44b451bd94f43b2ec78e15a6e82bff6a290ef9fd8f00a/regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4", size = 860182, upload_time = "2024-11-06T20:11:09.06Z" }, + { url = "https://files.pythonhosted.org/packages/ed/e3/c446a64984ea9f69982ba1a69d4658d5014bc7a0ea468a07e1a1265db6e2/regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d", size = 787733, upload_time = "2024-11-06T20:11:11.256Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f1/e40c8373e3480e4f29f2692bd21b3e05f296d3afebc7e5dcf21b9756ca1c/regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff", size = 262122, upload_time = "2024-11-06T20:11:13.161Z" }, + { url = "https://files.pythonhosted.org/packages/45/94/bc295babb3062a731f52621cdc992d123111282e291abaf23faa413443ea/regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a", size = 273545, upload_time = "2024-11-06T20:11:15Z" }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218, upload_time = "2024-05-29T15:37:49.536Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928, upload_time = "2024-05-29T15:37:47.027Z" }, +] + +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "oauthlib" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650, upload_time = "2024-03-22T20:32:29.939Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179, upload_time = "2024-03-22T20:32:28.055Z" }, +] + +[[package]] +name = "rich" +version = "14.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078, upload_time = "2025-03-30T14:15:14.23Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229, upload_time = "2025-03-30T14:15:12.283Z" }, +] + +[[package]] +name = "rich-toolkit" +version = "0.14.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "rich" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f6/31/b6d055f291a660a7bcaec4bcc9457b9fef8ecb6293e527b1eef1840aefd4/rich_toolkit-0.14.6.tar.gz", hash = "sha256:9dbd40e83414b84e828bf899115fff8877ce5951b73175f44db142902f07645d", size = 110805, upload_time = "2025-05-12T19:19:15.284Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/3c/7a824c0514e87c61000583ac22c8321da6dc8e58a93d5f56e583482a2ee0/rich_toolkit-0.14.6-py3-none-any.whl", hash = "sha256:764f3a5f9e4b539ce805596863299e8982599514906dc5e3ccc2d390ef74c301", size = 24815, upload_time = "2025-05-12T19:19:13.713Z" }, +] + +[[package]] +name = "rsa" +version = "4.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034, upload_time = "2025-04-16T09:51:18.218Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload_time = "2025-04-16T09:51:17.142Z" }, +] + +[[package]] +name = "safetensors" +version = "0.5.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/71/7e/2d5d6ee7b40c0682315367ec7475693d110f512922d582fef1bd4a63adc3/safetensors-0.5.3.tar.gz", hash = "sha256:b6b0d6ecacec39a4fdd99cc19f4576f5219ce858e6fd8dbe7609df0b8dc56965", size = 67210, upload_time = "2025-02-26T09:15:13.155Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/ae/88f6c49dbd0cc4da0e08610019a3c78a7d390879a919411a410a1876d03a/safetensors-0.5.3-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bd20eb133db8ed15b40110b7c00c6df51655a2998132193de2f75f72d99c7073", size = 436917, upload_time = "2025-02-26T09:15:03.702Z" }, + { url = "https://files.pythonhosted.org/packages/b8/3b/11f1b4a2f5d2ab7da34ecc062b0bc301f2be024d110a6466726bec8c055c/safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:21d01c14ff6c415c485616b8b0bf961c46b3b343ca59110d38d744e577f9cce7", size = 418419, upload_time = "2025-02-26T09:15:01.765Z" }, + { url = "https://files.pythonhosted.org/packages/5d/9a/add3e6fef267658075c5a41573c26d42d80c935cdc992384dfae435feaef/safetensors-0.5.3-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11bce6164887cd491ca75c2326a113ba934be596e22b28b1742ce27b1d076467", size = 459493, upload_time = "2025-02-26T09:14:51.812Z" }, + { url = "https://files.pythonhosted.org/packages/df/5c/bf2cae92222513cc23b3ff85c4a1bb2811a2c3583ac0f8e8d502751de934/safetensors-0.5.3-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4a243be3590bc3301c821da7a18d87224ef35cbd3e5f5727e4e0728b8172411e", size = 472400, upload_time = "2025-02-26T09:14:53.549Z" }, + { url = "https://files.pythonhosted.org/packages/58/11/7456afb740bd45782d0f4c8e8e1bb9e572f1bf82899fb6ace58af47b4282/safetensors-0.5.3-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8bd84b12b1670a6f8e50f01e28156422a2bc07fb16fc4e98bded13039d688a0d", size = 522891, upload_time = "2025-02-26T09:14:55.717Z" }, + { url = "https://files.pythonhosted.org/packages/57/3d/fe73a9d2ace487e7285f6e157afee2383bd1ddb911b7cb44a55cf812eae3/safetensors-0.5.3-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:391ac8cab7c829452175f871fcaf414aa1e292b5448bd02620f675a7f3e7abb9", size = 537694, upload_time = "2025-02-26T09:14:57.036Z" }, + { url = "https://files.pythonhosted.org/packages/a6/f8/dae3421624fcc87a89d42e1898a798bc7ff72c61f38973a65d60df8f124c/safetensors-0.5.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cead1fa41fc54b1e61089fa57452e8834f798cb1dc7a09ba3524f1eb08e0317a", size = 471642, upload_time = "2025-02-26T09:15:00.544Z" }, + { url = "https://files.pythonhosted.org/packages/ce/20/1fbe16f9b815f6c5a672f5b760951e20e17e43f67f231428f871909a37f6/safetensors-0.5.3-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1077f3e94182d72618357b04b5ced540ceb71c8a813d3319f1aba448e68a770d", size = 502241, upload_time = "2025-02-26T09:14:58.303Z" }, + { url = "https://files.pythonhosted.org/packages/5f/18/8e108846b506487aa4629fe4116b27db65c3dde922de2c8e0cc1133f3f29/safetensors-0.5.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:799021e78287bac619c7b3f3606730a22da4cda27759ddf55d37c8db7511c74b", size = 638001, upload_time = "2025-02-26T09:15:05.79Z" }, + { url = "https://files.pythonhosted.org/packages/82/5a/c116111d8291af6c8c8a8b40628fe833b9db97d8141c2a82359d14d9e078/safetensors-0.5.3-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:df26da01aaac504334644e1b7642fa000bfec820e7cef83aeac4e355e03195ff", size = 734013, upload_time = "2025-02-26T09:15:07.892Z" }, + { url = "https://files.pythonhosted.org/packages/7d/ff/41fcc4d3b7de837963622e8610d998710705bbde9a8a17221d85e5d0baad/safetensors-0.5.3-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:32c3ef2d7af8b9f52ff685ed0bc43913cdcde135089ae322ee576de93eae5135", size = 670687, upload_time = "2025-02-26T09:15:09.979Z" }, + { url = "https://files.pythonhosted.org/packages/40/ad/2b113098e69c985a3d8fbda4b902778eae4a35b7d5188859b4a63d30c161/safetensors-0.5.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:37f1521be045e56fc2b54c606d4455573e717b2d887c579ee1dbba5f868ece04", size = 643147, upload_time = "2025-02-26T09:15:11.185Z" }, + { url = "https://files.pythonhosted.org/packages/0a/0c/95aeb51d4246bd9a3242d3d8349c1112b4ee7611a4b40f0c5c93b05f001d/safetensors-0.5.3-cp38-abi3-win32.whl", hash = "sha256:cfc0ec0846dcf6763b0ed3d1846ff36008c6e7290683b61616c4b040f6a54ace", size = 296677, upload_time = "2025-02-26T09:15:16.554Z" }, + { url = "https://files.pythonhosted.org/packages/69/e2/b011c38e5394c4c18fb5500778a55ec43ad6106126e74723ffaee246f56e/safetensors-0.5.3-cp38-abi3-win_amd64.whl", hash = "sha256:836cbbc320b47e80acd40e44c8682db0e8ad7123209f69b093def21ec7cafd11", size = 308878, upload_time = "2025-02-26T09:15:14.99Z" }, +] + +[[package]] +name = "setuptools" +version = "80.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/32/0cc40fe41fd2adb80a2f388987f4f8db3c866c69e33e0b4c8b093fdf700e/setuptools-80.4.0.tar.gz", hash = "sha256:5a78f61820bc088c8e4add52932ae6b8cf423da2aff268c23f813cfbb13b4006", size = 1315008, upload_time = "2025-05-09T20:42:27.972Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/93/dba5ed08c2e31ec7cdc2ce75705a484ef0be1a2fecac8a58272489349de8/setuptools-80.4.0-py3-none-any.whl", hash = "sha256:6cdc8cb9a7d590b237dbe4493614a9b75d0559b888047c1f67d49ba50fc3edb2", size = 1200812, upload_time = "2025-05-09T20:42:25.325Z" }, +] + +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload_time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload_time = "2023-10-24T04:13:38.866Z" }, +] + +[[package]] +name = "simplejson" +version = "3.20.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/92/51b417685abd96b31308b61b9acce7ec50d8e1de8fbc39a7fd4962c60689/simplejson-3.20.1.tar.gz", hash = "sha256:e64139b4ec4f1f24c142ff7dcafe55a22b811a74d86d66560c8815687143037d", size = 85591, upload_time = "2025-02-15T05:18:53.15Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/59/74bc90d1c051bc2432c96b34bd4e8036875ab58b4fcbe4d6a5a76985f853/simplejson-3.20.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:325b8c107253d3217e89d7b50c71015b5b31e2433e6c5bf38967b2f80630a8ca", size = 92132, upload_time = "2025-02-15T05:16:15.743Z" }, + { url = "https://files.pythonhosted.org/packages/71/c7/1970916e0c51794fff89f76da2f632aaf0b259b87753c88a8c409623d3e1/simplejson-3.20.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88a7baa8211089b9e58d78fbc1b0b322103f3f3d459ff16f03a36cece0d0fcf0", size = 74956, upload_time = "2025-02-15T05:16:17.062Z" }, + { url = "https://files.pythonhosted.org/packages/c8/0d/98cc5909180463f1d75fac7180de62d4cdb4e82c4fef276b9e591979372c/simplejson-3.20.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:299b1007b8101d50d95bc0db1bf5c38dc372e85b504cf77f596462083ee77e3f", size = 74772, upload_time = "2025-02-15T05:16:19.204Z" }, + { url = "https://files.pythonhosted.org/packages/e1/94/a30a5211a90d67725a3e8fcc1c788189f2ae2ed2b96b63ed15d0b7f5d6bb/simplejson-3.20.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ec618ed65caab48e81e3ed29586236a8e57daef792f1f3bb59504a7e98cd10", size = 143575, upload_time = "2025-02-15T05:16:21.337Z" }, + { url = "https://files.pythonhosted.org/packages/ee/08/cdb6821f1058eb5db46d252de69ff7e6c53f05f1bae6368fe20d5b51d37e/simplejson-3.20.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2cdead1d3197f0ff43373cf4730213420523ba48697743e135e26f3d179f38", size = 153241, upload_time = "2025-02-15T05:16:22.859Z" }, + { url = "https://files.pythonhosted.org/packages/4c/2d/ca3caeea0bdc5efc5503d5f57a2dfb56804898fb196dfada121323ee0ccb/simplejson-3.20.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3466d2839fdc83e1af42e07b90bc8ff361c4e8796cd66722a40ba14e458faddd", size = 141500, upload_time = "2025-02-15T05:16:25.068Z" }, + { url = "https://files.pythonhosted.org/packages/e1/33/d3e0779d5c58245e7370c98eb969275af6b7a4a5aec3b97cbf85f09ad328/simplejson-3.20.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d492ed8e92f3a9f9be829205f44b1d0a89af6582f0cf43e0d129fa477b93fe0c", size = 144757, upload_time = "2025-02-15T05:16:28.301Z" }, + { url = "https://files.pythonhosted.org/packages/54/53/2d93128bb55861b2fa36c5944f38da51a0bc6d83e513afc6f7838440dd15/simplejson-3.20.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f924b485537b640dc69434565463fd6fc0c68c65a8c6e01a823dd26c9983cf79", size = 144409, upload_time = "2025-02-15T05:16:29.687Z" }, + { url = "https://files.pythonhosted.org/packages/99/4c/dac310a98f897ad3435b4bdc836d92e78f09e38c5dbf28211ed21dc59fa2/simplejson-3.20.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9e8eacf6a3491bf76ea91a8d46726368a6be0eb94993f60b8583550baae9439e", size = 146082, upload_time = "2025-02-15T05:16:31.064Z" }, + { url = "https://files.pythonhosted.org/packages/ee/22/d7ba958cfed39827335b82656b1c46f89678faecda9a7677b47e87b48ee6/simplejson-3.20.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d34d04bf90b4cea7c22d8b19091633908f14a096caa301b24c2f3d85b5068fb8", size = 154339, upload_time = "2025-02-15T05:16:32.719Z" }, + { url = "https://files.pythonhosted.org/packages/b8/c8/b072b741129406a7086a0799c6f5d13096231bf35fdd87a0cffa789687fc/simplejson-3.20.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:69dd28d4ce38390ea4aaf212902712c0fd1093dc4c1ff67e09687c3c3e15a749", size = 147915, upload_time = "2025-02-15T05:16:34.291Z" }, + { url = "https://files.pythonhosted.org/packages/6c/46/8347e61e9cf3db5342a42f7fd30a81b4f5cf85977f916852d7674a540907/simplejson-3.20.1-cp311-cp311-win32.whl", hash = "sha256:dfe7a9da5fd2a3499436cd350f31539e0a6ded5da6b5b3d422df016444d65e43", size = 73972, upload_time = "2025-02-15T05:16:35.712Z" }, + { url = "https://files.pythonhosted.org/packages/01/85/b52f24859237b4e9d523d5655796d911ba3d46e242eb1959c45b6af5aedd/simplejson-3.20.1-cp311-cp311-win_amd64.whl", hash = "sha256:896a6c04d7861d507d800da7642479c3547060bf97419d9ef73d98ced8258766", size = 75595, upload_time = "2025-02-15T05:16:36.957Z" }, + { url = "https://files.pythonhosted.org/packages/8d/eb/34c16a1ac9ba265d024dc977ad84e1659d931c0a700967c3e59a98ed7514/simplejson-3.20.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f31c4a3a7ab18467ee73a27f3e59158255d1520f3aad74315edde7a940f1be23", size = 93100, upload_time = "2025-02-15T05:16:38.801Z" }, + { url = "https://files.pythonhosted.org/packages/41/fc/2c2c007d135894971e6814e7c0806936e5bade28f8db4dd7e2a58b50debd/simplejson-3.20.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:884e6183d16b725e113b83a6fc0230152ab6627d4d36cb05c89c2c5bccfa7bc6", size = 75464, upload_time = "2025-02-15T05:16:40.905Z" }, + { url = "https://files.pythonhosted.org/packages/0f/05/2b5ecb33b776c34bb5cace5de5d7669f9b60e3ca13c113037b2ca86edfbd/simplejson-3.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03d7a426e416fe0d3337115f04164cd9427eb4256e843a6b8751cacf70abc832", size = 75112, upload_time = "2025-02-15T05:16:42.246Z" }, + { url = "https://files.pythonhosted.org/packages/fe/36/1f3609a2792f06cd4b71030485f78e91eb09cfd57bebf3116bf2980a8bac/simplejson-3.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:000602141d0bddfcff60ea6a6e97d5e10c9db6b17fd2d6c66199fa481b6214bb", size = 150182, upload_time = "2025-02-15T05:16:43.557Z" }, + { url = "https://files.pythonhosted.org/packages/2f/b0/053fbda38b8b602a77a4f7829def1b4f316cd8deb5440a6d3ee90790d2a4/simplejson-3.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:af8377a8af78226e82e3a4349efdde59ffa421ae88be67e18cef915e4023a595", size = 158363, upload_time = "2025-02-15T05:16:45.748Z" }, + { url = "https://files.pythonhosted.org/packages/d1/4b/2eb84ae867539a80822e92f9be4a7200dffba609275faf99b24141839110/simplejson-3.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15c7de4c88ab2fbcb8781a3b982ef883696736134e20b1210bca43fb42ff1acf", size = 148415, upload_time = "2025-02-15T05:16:47.861Z" }, + { url = "https://files.pythonhosted.org/packages/e0/bd/400b0bd372a5666addf2540c7358bfc3841b9ce5cdbc5cc4ad2f61627ad8/simplejson-3.20.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:455a882ff3f97d810709f7b620007d4e0aca8da71d06fc5c18ba11daf1c4df49", size = 152213, upload_time = "2025-02-15T05:16:49.25Z" }, + { url = "https://files.pythonhosted.org/packages/50/12/143f447bf6a827ee9472693768dc1a5eb96154f8feb140a88ce6973a3cfa/simplejson-3.20.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fc0f523ce923e7f38eb67804bc80e0a028c76d7868500aa3f59225574b5d0453", size = 150048, upload_time = "2025-02-15T05:16:51.5Z" }, + { url = "https://files.pythonhosted.org/packages/5e/ea/dd9b3e8e8ed710a66f24a22c16a907c9b539b6f5f45fd8586bd5c231444e/simplejson-3.20.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:76461ec929282dde4a08061071a47281ad939d0202dc4e63cdd135844e162fbc", size = 151668, upload_time = "2025-02-15T05:16:53Z" }, + { url = "https://files.pythonhosted.org/packages/99/af/ee52a8045426a0c5b89d755a5a70cc821815ef3c333b56fbcad33c4435c0/simplejson-3.20.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ab19c2da8c043607bde4d4ef3a6b633e668a7d2e3d56f40a476a74c5ea71949f", size = 158840, upload_time = "2025-02-15T05:16:54.851Z" }, + { url = "https://files.pythonhosted.org/packages/68/db/ab32869acea6b5de7d75fa0dac07a112ded795d41eaa7e66c7813b17be95/simplejson-3.20.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b2578bedaedf6294415197b267d4ef678fea336dd78ee2a6d2f4b028e9d07be3", size = 154212, upload_time = "2025-02-15T05:16:56.318Z" }, + { url = "https://files.pythonhosted.org/packages/fa/7a/e3132d454977d75a3bf9a6d541d730f76462ebf42a96fea2621498166f41/simplejson-3.20.1-cp312-cp312-win32.whl", hash = "sha256:339f407373325a36b7fd744b688ba5bae0666b5d340ec6d98aebc3014bf3d8ea", size = 74101, upload_time = "2025-02-15T05:16:57.746Z" }, + { url = "https://files.pythonhosted.org/packages/bc/5d/4e243e937fa3560107c69f6f7c2eed8589163f5ed14324e864871daa2dd9/simplejson-3.20.1-cp312-cp312-win_amd64.whl", hash = "sha256:627d4486a1ea7edf1f66bb044ace1ce6b4c1698acd1b05353c97ba4864ea2e17", size = 75736, upload_time = "2025-02-15T05:16:59.017Z" }, + { url = "https://files.pythonhosted.org/packages/c4/03/0f453a27877cb5a5fff16a975925f4119102cc8552f52536b9a98ef0431e/simplejson-3.20.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:71e849e7ceb2178344998cbe5ade101f1b329460243c79c27fbfc51c0447a7c3", size = 93109, upload_time = "2025-02-15T05:17:00.377Z" }, + { url = "https://files.pythonhosted.org/packages/74/1f/a729f4026850cabeaff23e134646c3f455e86925d2533463420635ae54de/simplejson-3.20.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b63fdbab29dc3868d6f009a59797cefaba315fd43cd32ddd998ee1da28e50e29", size = 75475, upload_time = "2025-02-15T05:17:02.544Z" }, + { url = "https://files.pythonhosted.org/packages/e2/14/50a2713fee8ff1f8d655b1a14f4a0f1c0c7246768a1b3b3d12964a4ed5aa/simplejson-3.20.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1190f9a3ce644fd50ec277ac4a98c0517f532cfebdcc4bd975c0979a9f05e1fb", size = 75112, upload_time = "2025-02-15T05:17:03.875Z" }, + { url = "https://files.pythonhosted.org/packages/45/86/ea9835abb646755140e2d482edc9bc1e91997ed19a59fd77ae4c6a0facea/simplejson-3.20.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1336ba7bcb722ad487cd265701ff0583c0bb6de638364ca947bb84ecc0015d1", size = 150245, upload_time = "2025-02-15T05:17:06.899Z" }, + { url = "https://files.pythonhosted.org/packages/12/b4/53084809faede45da829fe571c65fbda8479d2a5b9c633f46b74124d56f5/simplejson-3.20.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e975aac6a5acd8b510eba58d5591e10a03e3d16c1cf8a8624ca177491f7230f0", size = 158465, upload_time = "2025-02-15T05:17:08.707Z" }, + { url = "https://files.pythonhosted.org/packages/a9/7d/d56579468d1660b3841e1f21c14490d103e33cf911886b22652d6e9683ec/simplejson-3.20.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6a6dd11ee282937ad749da6f3b8d87952ad585b26e5edfa10da3ae2536c73078", size = 148514, upload_time = "2025-02-15T05:17:11.323Z" }, + { url = "https://files.pythonhosted.org/packages/19/e3/874b1cca3d3897b486d3afdccc475eb3a09815bf1015b01cf7fcb52a55f0/simplejson-3.20.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab980fcc446ab87ea0879edad41a5c28f2d86020014eb035cf5161e8de4474c6", size = 152262, upload_time = "2025-02-15T05:17:13.543Z" }, + { url = "https://files.pythonhosted.org/packages/32/84/f0fdb3625292d945c2bd13a814584603aebdb38cfbe5fe9be6b46fe598c4/simplejson-3.20.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f5aee2a4cb6b146bd17333ac623610f069f34e8f31d2f4f0c1a2186e50c594f0", size = 150164, upload_time = "2025-02-15T05:17:15.021Z" }, + { url = "https://files.pythonhosted.org/packages/95/51/6d625247224f01eaaeabace9aec75ac5603a42f8ebcce02c486fbda8b428/simplejson-3.20.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:652d8eecbb9a3b6461b21ec7cf11fd0acbab144e45e600c817ecf18e4580b99e", size = 151795, upload_time = "2025-02-15T05:17:16.542Z" }, + { url = "https://files.pythonhosted.org/packages/7f/d9/bb921df6b35be8412f519e58e86d1060fddf3ad401b783e4862e0a74c4c1/simplejson-3.20.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8c09948f1a486a89251ee3a67c9f8c969b379f6ffff1a6064b41fea3bce0a112", size = 159027, upload_time = "2025-02-15T05:17:18.083Z" }, + { url = "https://files.pythonhosted.org/packages/03/c5/5950605e4ad023a6621cf4c931b29fd3d2a9c1f36be937230bfc83d7271d/simplejson-3.20.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cbbd7b215ad4fc6f058b5dd4c26ee5c59f72e031dfda3ac183d7968a99e4ca3a", size = 154380, upload_time = "2025-02-15T05:17:20.334Z" }, + { url = "https://files.pythonhosted.org/packages/66/ad/b74149557c5ec1e4e4d55758bda426f5d2ec0123cd01a53ae63b8de51fa3/simplejson-3.20.1-cp313-cp313-win32.whl", hash = "sha256:ae81e482476eaa088ef9d0120ae5345de924f23962c0c1e20abbdff597631f87", size = 74102, upload_time = "2025-02-15T05:17:22.475Z" }, + { url = "https://files.pythonhosted.org/packages/db/a9/25282fdd24493e1022f30b7f5cdf804255c007218b2bfaa655bd7ad34b2d/simplejson-3.20.1-cp313-cp313-win_amd64.whl", hash = "sha256:1b9fd15853b90aec3b1739f4471efbf1ac05066a2c7041bf8db821bb73cd2ddc", size = 75736, upload_time = "2025-02-15T05:17:24.122Z" }, + { url = "https://files.pythonhosted.org/packages/4b/30/00f02a0a921556dd5a6db1ef2926a1bc7a8bbbfb1c49cfed68a275b8ab2b/simplejson-3.20.1-py3-none-any.whl", hash = "sha256:8a6c1bbac39fa4a79f83cbf1df6ccd8ff7069582a9fd8db1e52cea073bc2c697", size = 57121, upload_time = "2025-02-15T05:18:51.243Z" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload_time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload_time = "2024-12-04T17:35:26.475Z" }, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload_time = "2024-02-25T23:20:04.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload_time = "2024-02-25T23:20:01.196Z" }, +] + +[[package]] +name = "soupsieve" +version = "2.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/f4/4a80cd6ef364b2e8b65b15816a843c0980f7a5a2b4dc701fc574952aa19f/soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a", size = 103418, upload_time = "2025-04-20T18:50:08.518Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/9c/0e6afc12c269578be5c0c1c9f4b49a8d32770a080260c333ac04cc1c832d/soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4", size = 36677, upload_time = "2025-04-20T18:50:07.196Z" }, +] + +[[package]] +name = "sqlalchemy" +version = "2.0.40" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/68/c3/3f2bfa5e4dcd9938405fe2fab5b6ab94a9248a4f9536ea2fd497da20525f/sqlalchemy-2.0.40.tar.gz", hash = "sha256:d827099289c64589418ebbcaead0145cd19f4e3e8a93919a0100247af245fa00", size = 9664299, upload_time = "2025-03-27T17:52:31.876Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/7e/55044a9ec48c3249bb38d5faae93f09579c35e862bb318ebd1ed7a1994a5/sqlalchemy-2.0.40-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f6bacab7514de6146a1976bc56e1545bee247242fab030b89e5f70336fc0003e", size = 2114025, upload_time = "2025-03-27T18:49:29.456Z" }, + { url = "https://files.pythonhosted.org/packages/77/0f/dcf7bba95f847aec72f638750747b12d37914f71c8cc7c133cf326ab945c/sqlalchemy-2.0.40-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5654d1ac34e922b6c5711631f2da497d3a7bffd6f9f87ac23b35feea56098011", size = 2104419, upload_time = "2025-03-27T18:49:30.75Z" }, + { url = "https://files.pythonhosted.org/packages/75/70/c86a5c20715e4fe903dde4c2fd44fc7e7a0d5fb52c1b954d98526f65a3ea/sqlalchemy-2.0.40-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35904d63412db21088739510216e9349e335f142ce4a04b69e2528020ee19ed4", size = 3222720, upload_time = "2025-03-27T18:44:29.871Z" }, + { url = "https://files.pythonhosted.org/packages/12/cf/b891a8c1d0c27ce9163361664c2128c7a57de3f35000ea5202eb3a2917b7/sqlalchemy-2.0.40-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c7a80ed86d6aaacb8160a1caef6680d4ddd03c944d985aecee940d168c411d1", size = 3222682, upload_time = "2025-03-27T18:55:20.097Z" }, + { url = "https://files.pythonhosted.org/packages/15/3f/7709d8c8266953d945435a96b7f425ae4172a336963756b58e996fbef7f3/sqlalchemy-2.0.40-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:519624685a51525ddaa7d8ba8265a1540442a2ec71476f0e75241eb8263d6f51", size = 3159542, upload_time = "2025-03-27T18:44:31.333Z" }, + { url = "https://files.pythonhosted.org/packages/85/7e/717eaabaf0f80a0132dc2032ea8f745b7a0914451c984821a7c8737fb75a/sqlalchemy-2.0.40-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2ee5f9999a5b0e9689bed96e60ee53c3384f1a05c2dd8068cc2e8361b0df5b7a", size = 3179864, upload_time = "2025-03-27T18:55:21.784Z" }, + { url = "https://files.pythonhosted.org/packages/e4/cc/03eb5dfcdb575cbecd2bd82487b9848f250a4b6ecfb4707e834b4ce4ec07/sqlalchemy-2.0.40-cp311-cp311-win32.whl", hash = "sha256:c0cae71e20e3c02c52f6b9e9722bca70e4a90a466d59477822739dc31ac18b4b", size = 2084675, upload_time = "2025-03-27T18:48:55.915Z" }, + { url = "https://files.pythonhosted.org/packages/9a/48/440946bf9dc4dc231f4f31ef0d316f7135bf41d4b86aaba0c0655150d370/sqlalchemy-2.0.40-cp311-cp311-win_amd64.whl", hash = "sha256:574aea2c54d8f1dd1699449f332c7d9b71c339e04ae50163a3eb5ce4c4325ee4", size = 2110099, upload_time = "2025-03-27T18:48:57.45Z" }, + { url = "https://files.pythonhosted.org/packages/92/06/552c1f92e880b57d8b92ce6619bd569b25cead492389b1d84904b55989d8/sqlalchemy-2.0.40-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9d3b31d0a1c44b74d3ae27a3de422dfccd2b8f0b75e51ecb2faa2bf65ab1ba0d", size = 2112620, upload_time = "2025-03-27T18:40:00.071Z" }, + { url = "https://files.pythonhosted.org/packages/01/72/a5bc6e76c34cebc071f758161dbe1453de8815ae6e662393910d3be6d70d/sqlalchemy-2.0.40-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:37f7a0f506cf78c80450ed1e816978643d3969f99c4ac6b01104a6fe95c5490a", size = 2103004, upload_time = "2025-03-27T18:40:04.204Z" }, + { url = "https://files.pythonhosted.org/packages/bf/fd/0e96c8e6767618ed1a06e4d7a167fe13734c2f8113c4cb704443e6783038/sqlalchemy-2.0.40-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bb933a650323e476a2e4fbef8997a10d0003d4da996aad3fd7873e962fdde4d", size = 3252440, upload_time = "2025-03-27T18:51:25.624Z" }, + { url = "https://files.pythonhosted.org/packages/cd/6a/eb82e45b15a64266a2917a6833b51a334ea3c1991728fd905bfccbf5cf63/sqlalchemy-2.0.40-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6959738971b4745eea16f818a2cd086fb35081383b078272c35ece2b07012716", size = 3263277, upload_time = "2025-03-27T18:50:28.142Z" }, + { url = "https://files.pythonhosted.org/packages/45/97/ebe41ab4530f50af99e3995ebd4e0204bf1b0dc0930f32250dde19c389fe/sqlalchemy-2.0.40-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:110179728e442dae85dd39591beb74072ae4ad55a44eda2acc6ec98ead80d5f2", size = 3198591, upload_time = "2025-03-27T18:51:27.543Z" }, + { url = "https://files.pythonhosted.org/packages/e6/1c/a569c1b2b2f5ac20ba6846a1321a2bf52e9a4061001f282bf1c5528dcd69/sqlalchemy-2.0.40-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8040680eaacdce4d635f12c55c714f3d4c7f57da2bc47a01229d115bd319191", size = 3225199, upload_time = "2025-03-27T18:50:30.069Z" }, + { url = "https://files.pythonhosted.org/packages/8f/91/87cc71a6b10065ca0209d19a4bb575378abda6085e72fa0b61ffb2201b84/sqlalchemy-2.0.40-cp312-cp312-win32.whl", hash = "sha256:650490653b110905c10adac69408380688cefc1f536a137d0d69aca1069dc1d1", size = 2082959, upload_time = "2025-03-27T18:45:57.574Z" }, + { url = "https://files.pythonhosted.org/packages/2a/9f/14c511cda174aa1ad9b0e42b64ff5a71db35d08b0d80dc044dae958921e5/sqlalchemy-2.0.40-cp312-cp312-win_amd64.whl", hash = "sha256:2be94d75ee06548d2fc591a3513422b873490efb124048f50556369a834853b0", size = 2108526, upload_time = "2025-03-27T18:45:58.965Z" }, + { url = "https://files.pythonhosted.org/packages/8c/18/4e3a86cc0232377bc48c373a9ba6a1b3fb79ba32dbb4eda0b357f5a2c59d/sqlalchemy-2.0.40-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:915866fd50dd868fdcc18d61d8258db1bf9ed7fbd6dfec960ba43365952f3b01", size = 2107887, upload_time = "2025-03-27T18:40:05.461Z" }, + { url = "https://files.pythonhosted.org/packages/cb/60/9fa692b1d2ffc4cbd5f47753731fd332afed30137115d862d6e9a1e962c7/sqlalchemy-2.0.40-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a4c5a2905a9ccdc67a8963e24abd2f7afcd4348829412483695c59e0af9a705", size = 2098367, upload_time = "2025-03-27T18:40:07.182Z" }, + { url = "https://files.pythonhosted.org/packages/4c/9f/84b78357ca641714a439eb3fbbddb17297dacfa05d951dbf24f28d7b5c08/sqlalchemy-2.0.40-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55028d7a3ebdf7ace492fab9895cbc5270153f75442a0472d8516e03159ab364", size = 3184806, upload_time = "2025-03-27T18:51:29.356Z" }, + { url = "https://files.pythonhosted.org/packages/4b/7d/e06164161b6bfce04c01bfa01518a20cccbd4100d5c951e5a7422189191a/sqlalchemy-2.0.40-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6cfedff6878b0e0d1d0a50666a817ecd85051d12d56b43d9d425455e608b5ba0", size = 3198131, upload_time = "2025-03-27T18:50:31.616Z" }, + { url = "https://files.pythonhosted.org/packages/6d/51/354af20da42d7ec7b5c9de99edafbb7663a1d75686d1999ceb2c15811302/sqlalchemy-2.0.40-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bb19e30fdae77d357ce92192a3504579abe48a66877f476880238a962e5b96db", size = 3131364, upload_time = "2025-03-27T18:51:31.336Z" }, + { url = "https://files.pythonhosted.org/packages/7a/2f/48a41ff4e6e10549d83fcc551ab85c268bde7c03cf77afb36303c6594d11/sqlalchemy-2.0.40-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:16d325ea898f74b26ffcd1cf8c593b0beed8714f0317df2bed0d8d1de05a8f26", size = 3159482, upload_time = "2025-03-27T18:50:33.201Z" }, + { url = "https://files.pythonhosted.org/packages/33/ac/e5e0a807163652a35be878c0ad5cfd8b1d29605edcadfb5df3c512cdf9f3/sqlalchemy-2.0.40-cp313-cp313-win32.whl", hash = "sha256:a669cbe5be3c63f75bcbee0b266779706f1a54bcb1000f302685b87d1b8c1500", size = 2080704, upload_time = "2025-03-27T18:46:00.193Z" }, + { url = "https://files.pythonhosted.org/packages/1c/cb/f38c61f7f2fd4d10494c1c135ff6a6ddb63508d0b47bccccd93670637309/sqlalchemy-2.0.40-cp313-cp313-win_amd64.whl", hash = "sha256:641ee2e0834812d657862f3a7de95e0048bdcb6c55496f39c6fa3d435f6ac6ad", size = 2104564, upload_time = "2025-03-27T18:46:01.442Z" }, + { url = "https://files.pythonhosted.org/packages/d1/7c/5fc8e802e7506fe8b55a03a2e1dab156eae205c91bee46305755e086d2e2/sqlalchemy-2.0.40-py3-none-any.whl", hash = "sha256:32587e2e1e359276957e6fe5dad089758bc042a971a8a09ae8ecf7a8fe23d07a", size = 1903894, upload_time = "2025-03-27T18:40:43.796Z" }, +] + +[[package]] +name = "starlette" +version = "0.45.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ff/fb/2984a686808b89a6781526129a4b51266f678b2d2b97ab2d325e56116df8/starlette-0.45.3.tar.gz", hash = "sha256:2cbcba2a75806f8a41c722141486f37c28e30a0921c5f6fe4346cb0dcee1302f", size = 2574076, upload_time = "2025-01-24T11:17:36.535Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d9/61/f2b52e107b1fc8944b33ef56bf6ac4ebbe16d91b94d2b87ce013bf63fb84/starlette-0.45.3-py3-none-any.whl", hash = "sha256:dfb6d332576f136ec740296c7e8bb8c8a7125044e7c6da30744718880cdd059d", size = 71507, upload_time = "2025-01-24T11:17:34.182Z" }, +] + +[[package]] +name = "tensorboard" +version = "2.15.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "absl-py" }, + { name = "google-auth" }, + { name = "google-auth-oauthlib" }, + { name = "grpcio" }, + { name = "markdown" }, + { name = "numpy" }, + { name = "protobuf" }, + { name = "requests" }, + { name = "setuptools" }, + { name = "six" }, + { name = "tensorboard-data-server" }, + { name = "werkzeug" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/12/f6e9b9dcc310263cbd3948274e286538bd6800fd0c268850788f14a0c6d0/tensorboard-2.15.2-py3-none-any.whl", hash = "sha256:a6f6443728064d962caea6d34653e220e34ef8df764cb06a8212c17e1a8f0622", size = 5539713, upload_time = "2024-02-09T10:39:25.636Z" }, +] + +[[package]] +name = "tensorboard-data-server" +version = "0.7.2" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/13/e503968fefabd4c6b2650af21e110aa8466fe21432cd7c43a84577a89438/tensorboard_data_server-0.7.2-py3-none-any.whl", hash = "sha256:7e0610d205889588983836ec05dc098e80f97b7e7bbff7e994ebb78f578d0ddb", size = 2356, upload_time = "2023-10-23T21:23:32.16Z" }, + { url = "https://files.pythonhosted.org/packages/b7/85/dabeaf902892922777492e1d253bb7e1264cadce3cea932f7ff599e53fea/tensorboard_data_server-0.7.2-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:9fe5d24221b29625dbc7328b0436ca7fc1c23de4acf4d272f1180856e32f9f60", size = 4823598, upload_time = "2023-10-23T21:23:33.714Z" }, + { url = "https://files.pythonhosted.org/packages/73/c6/825dab04195756cf8ff2e12698f22513b3db2f64925bdd41671bfb33aaa5/tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:ef687163c24185ae9754ed5650eb5bc4d84ff257aabdc33f0cc6f74d8ba54530", size = 6590363, upload_time = "2023-10-23T21:23:35.583Z" }, +] + +[[package]] +name = "tensorflow-cpu" +version = "2.15.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "absl-py" }, + { name = "astunparse" }, + { name = "flatbuffers" }, + { name = "gast" }, + { name = "google-pasta" }, + { name = "grpcio" }, + { name = "h5py" }, + { name = "keras" }, + { name = "libclang" }, + { name = "ml-dtypes" }, + { name = "numpy" }, + { name = "opt-einsum" }, + { name = "packaging" }, + { name = "protobuf" }, + { name = "setuptools" }, + { name = "six" }, + { name = "tensorboard" }, + { name = "tensorflow-estimator" }, + { name = "tensorflow-io-gcs-filesystem" }, + { name = "termcolor" }, + { name = "typing-extensions" }, + { name = "wrapt" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/d8/6c/dc0642ce2656637d8f31ba9c618a41bf14e38428ba77e4e0a9359be39436/tensorflow_cpu-2.15.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:ee3bb114c6031d471d891c761e7eda2c80bea19bb318abcd3d5bab92ccfaf9aa", size = 236482774, upload_time = "2024-03-08T23:52:40.963Z" }, + { url = "https://files.pythonhosted.org/packages/5b/00/af89cb211fc96ffdebb52a687dad7f83b0b1d82bc057f65309fa03a89911/tensorflow_cpu-2.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54660c074d7241a503e81edfd9f5ef5af88f64051b72e2945f26318c790f2d26", size = 207208420, upload_time = "2024-03-08T23:48:30.479Z" }, + { url = "https://files.pythonhosted.org/packages/51/8a/ff2fc9bad8edc68ef4cd63963c10b320de03d3496def83d2a9b1635c6831/tensorflow_cpu-2.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:dc75baf4c08a6e8ab7ceec97f002bb993508a5b58f13fac5283ee976a71a3c67", size = 2133, upload_time = "2024-03-08T23:53:47.249Z" }, +] + +[[package]] +name = "tensorflow-estimator" +version = "2.15.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/c8/2f823c8958d5342eafc6dd3e922f0cc4fcf8c2e0460284cc462dae3b60a0/tensorflow_estimator-2.15.0-py2.py3-none-any.whl", hash = "sha256:aedf21eec7fb2dc91150fc91a1ce12bc44dbb72278a08b58e79ff87c9e28f153", size = 441974, upload_time = "2023-11-07T01:10:10.812Z" }, +] + +[[package]] +name = "tensorflow-io-gcs-filesystem" +version = "0.31.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/00/900ca310ff2e46eb3127f8f54af0b0000a6cc786be6a54dc2cfe841f4683/tensorflow_io_gcs_filesystem-0.31.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:8909c4344b0e96aa356230ab460ffafe5900c33c1aaced65fafae71d177a1966", size = 1642401, upload_time = "2023-02-25T19:31:40.204Z" }, + { url = "https://files.pythonhosted.org/packages/e7/c4/0d44ef93add3432ce43f37fe0c205cc7b6fd685fca80054fb4a646a9dbe3/tensorflow_io_gcs_filesystem-0.31.0-cp311-cp311-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e417faf8755aafe52d8f8c6b5ae5bae6e4fae8326ee3acd5e9181b83bbfbae87", size = 2381673, upload_time = "2023-02-25T19:31:41.992Z" }, + { url = "https://files.pythonhosted.org/packages/a8/2b/3064195efa016fff942009fe965ecbbbbd7d70bf34ee22d4ff31a0f3443a/tensorflow_io_gcs_filesystem-0.31.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37c40e3c4ee1f8dda3b545deea6b8839192c82037d8021db9f589908034ad975", size = 2572150, upload_time = "2023-02-25T19:31:43.874Z" }, + { url = "https://files.pythonhosted.org/packages/ac/4e/9566a313927be582ca99455a9523a097c7888fc819695bdc08415432b202/tensorflow_io_gcs_filesystem-0.31.0-cp311-cp311-win_amd64.whl", hash = "sha256:4bb37d23f21c434687b11059cb7ffd094d52a7813368915ba1b7057e3c16e414", size = 1486315, upload_time = "2023-02-25T19:31:45.641Z" }, +] + +[[package]] +name = "termcolor" +version = "3.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/6c/3d75c196ac07ac8749600b60b03f4f6094d54e132c4d94ebac6ee0e0add0/termcolor-3.1.0.tar.gz", hash = "sha256:6a6dd7fbee581909eeec6a756cff1d7f7c376063b14e4a298dc4980309e55970", size = 14324, upload_time = "2025-04-30T11:37:53.791Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4f/bd/de8d508070629b6d84a30d01d57e4a65c69aa7f5abe7560b8fad3b50ea59/termcolor-3.1.0-py3-none-any.whl", hash = "sha256:591dd26b5c2ce03b9e43f391264626557873ce1d379019786f99b0c2bee140aa", size = 7684, upload_time = "2025-04-30T11:37:52.382Z" }, +] + +[[package]] +name = "text-unidecode" +version = "1.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ab/e2/e9a00f0ccb71718418230718b3d900e71a5d16e701a3dae079a21e9cd8f8/text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93", size = 76885, upload_time = "2019-08-30T21:36:45.405Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/a5/c0b6468d3824fe3fde30dbb5e1f687b291608f9473681bbf7dabbf5a87d7/text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8", size = 78154, upload_time = "2019-08-30T21:37:03.543Z" }, +] + +[[package]] +name = "tokenizers" +version = "0.19.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/04/2071c150f374aab6d5e92aaec38d0f3c368d227dd9e0469a1f0966ac68d1/tokenizers-0.19.1.tar.gz", hash = "sha256:ee59e6680ed0fdbe6b724cf38bd70400a0c1dd623b07ac729087270caeac88e3", size = 321039, upload_time = "2024-04-17T21:40:41.849Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/d6/6e1d728d765eb4102767f071bf7f6439ab10d7f4a975c9217db65715207a/tokenizers-0.19.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5c88d1481f1882c2e53e6bb06491e474e420d9ac7bdff172610c4f9ad3898059", size = 2533448, upload_time = "2024-04-17T21:36:38.61Z" }, + { url = "https://files.pythonhosted.org/packages/90/79/d17a0f491d10817cd30f1121a07aa09c8e97a81114b116e473baf1577f09/tokenizers-0.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ddf672ed719b4ed82b51499100f5417d7d9f6fb05a65e232249268f35de5ed14", size = 2440254, upload_time = "2024-04-17T21:36:40.398Z" }, + { url = "https://files.pythonhosted.org/packages/c7/28/2d11c3ff94f9d42eceb2ea549a06e3f166fe391c5a025e5d96fac898a3ac/tokenizers-0.19.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:dadc509cc8a9fe460bd274c0e16ac4184d0958117cf026e0ea8b32b438171594", size = 3684971, upload_time = "2024-04-17T21:36:43.115Z" }, + { url = "https://files.pythonhosted.org/packages/36/c6/537f22b57e6003904d35d07962dbde2f2e9bdd791d0241da976a4c7f8194/tokenizers-0.19.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfedf31824ca4915b511b03441784ff640378191918264268e6923da48104acc", size = 3568894, upload_time = "2024-04-17T21:36:45.011Z" }, + { url = "https://files.pythonhosted.org/packages/af/ef/3c1deed14ec59b2c8e7e2fa27b2a53f7d101181277a43b89ab17d891ef2e/tokenizers-0.19.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac11016d0a04aa6487b1513a3a36e7bee7eec0e5d30057c9c0408067345c48d2", size = 3426873, upload_time = "2024-04-17T21:36:47.001Z" }, + { url = "https://files.pythonhosted.org/packages/06/db/c0320c4798ac6bd12d2ef895bec9d10d216a3b4d6fff10e9d68883ea7edc/tokenizers-0.19.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76951121890fea8330d3a0df9a954b3f2a37e3ec20e5b0530e9a0044ca2e11fe", size = 3965050, upload_time = "2024-04-17T21:36:49.202Z" }, + { url = "https://files.pythonhosted.org/packages/4c/8a/a166888d6cb14db55f5eb7ce0b1d4777d145aa27cbf4f945712cf6c29935/tokenizers-0.19.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b342d2ce8fc8d00f376af068e3274e2e8649562e3bc6ae4a67784ded6b99428d", size = 4047855, upload_time = "2024-04-17T21:36:52.864Z" }, + { url = "https://files.pythonhosted.org/packages/a7/03/fb50fc03f86016b227a967c8d474f90230c885c0d18f78acdfda7a96ce56/tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d16ff18907f4909dca9b076b9c2d899114dd6abceeb074eca0c93e2353f943aa", size = 3608228, upload_time = "2024-04-17T21:36:55.7Z" }, + { url = "https://files.pythonhosted.org/packages/5b/cd/0385e1026e1e03732fd398e964792a3a8433918b166748c82507e014d748/tokenizers-0.19.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:706a37cc5332f85f26efbe2bdc9ef8a9b372b77e4645331a405073e4b3a8c1c6", size = 9633115, upload_time = "2024-04-17T21:36:58.299Z" }, + { url = "https://files.pythonhosted.org/packages/25/50/8f8ad0bbdaf09d04b15e6502d1fa1c653754ed7e016e4ae009726aa1a4e4/tokenizers-0.19.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:16baac68651701364b0289979ecec728546133e8e8fe38f66fe48ad07996b88b", size = 9949062, upload_time = "2024-04-17T21:37:01.947Z" }, + { url = "https://files.pythonhosted.org/packages/db/11/31be66710f1d14526f3588a441efadeb184e1e68458067007b20ead03c59/tokenizers-0.19.1-cp311-none-win32.whl", hash = "sha256:9ed240c56b4403e22b9584ee37d87b8bfa14865134e3e1c3fb4b2c42fafd3256", size = 2041039, upload_time = "2024-04-17T21:37:05.607Z" }, + { url = "https://files.pythonhosted.org/packages/65/8e/6d7d72b28f22c422cff8beae10ac3c2e4376b9be721ef8167b7eecd1da62/tokenizers-0.19.1-cp311-none-win_amd64.whl", hash = "sha256:ad57d59341710b94a7d9dbea13f5c1e7d76fd8d9bcd944a7a6ab0b0da6e0cc66", size = 2220386, upload_time = "2024-04-17T21:37:08.295Z" }, + { url = "https://files.pythonhosted.org/packages/63/90/2890cd096898dcdb596ee172cde40c0f54a9cf43b0736aa260a5501252af/tokenizers-0.19.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:621d670e1b1c281a1c9698ed89451395d318802ff88d1fc1accff0867a06f153", size = 2530580, upload_time = "2024-04-17T21:37:10.688Z" }, + { url = "https://files.pythonhosted.org/packages/74/d1/f4e1e950adb36675dfd8f9d0f4be644f3f3aaf22a5677a4f5c81282b662e/tokenizers-0.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d924204a3dbe50b75630bd16f821ebda6a5f729928df30f582fb5aade90c818a", size = 2436682, upload_time = "2024-04-17T21:37:12.966Z" }, + { url = "https://files.pythonhosted.org/packages/ed/30/89b321a16c58d233e301ec15072c0d3ed5014825e72da98604cd3ab2fba1/tokenizers-0.19.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4f3fefdc0446b1a1e6d81cd4c07088ac015665d2e812f6dbba4a06267d1a2c95", size = 3693494, upload_time = "2024-04-17T21:37:14.755Z" }, + { url = "https://files.pythonhosted.org/packages/05/40/fa899f32de483500fbc78befd378fd7afba4270f17db707d1a78c0a4ddc3/tokenizers-0.19.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9620b78e0b2d52ef07b0d428323fb34e8ea1219c5eac98c2596311f20f1f9266", size = 3566541, upload_time = "2024-04-17T21:37:17.067Z" }, + { url = "https://files.pythonhosted.org/packages/67/14/e7da32ae5fb4971830f1ef335932fae3fa57e76b537e852f146c850aefdf/tokenizers-0.19.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04ce49e82d100594715ac1b2ce87d1a36e61891a91de774755f743babcd0dd52", size = 3430792, upload_time = "2024-04-17T21:37:19.055Z" }, + { url = "https://files.pythonhosted.org/packages/f2/4b/aae61bdb6ab584d2612170801703982ee0e35f8b6adacbeefe5a3b277621/tokenizers-0.19.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5c2ff13d157afe413bf7e25789879dd463e5a4abfb529a2d8f8473d8042e28f", size = 3962812, upload_time = "2024-04-17T21:37:21.008Z" }, + { url = "https://files.pythonhosted.org/packages/0a/b6/f7b7ef89c4da7b20256e6eab23d3835f05d1ca8f451d31c16cbfe3cd9eb6/tokenizers-0.19.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3174c76efd9d08f836bfccaca7cfec3f4d1c0a4cf3acbc7236ad577cc423c840", size = 4024688, upload_time = "2024-04-17T21:37:23.659Z" }, + { url = "https://files.pythonhosted.org/packages/80/54/12047a69f5b382d7ee72044dc89151a2dd0d13b2c9bdcc22654883704d31/tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9d5b6c0e7a1e979bec10ff960fae925e947aab95619a6fdb4c1d8ff3708ce3", size = 3610961, upload_time = "2024-04-17T21:37:26.234Z" }, + { url = "https://files.pythonhosted.org/packages/52/b7/1e8a913d18ac28feeda42d4d2d51781874398fb59cd1c1e2653a4b5742ed/tokenizers-0.19.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a179856d1caee06577220ebcfa332af046d576fb73454b8f4d4b0ba8324423ea", size = 9631367, upload_time = "2024-04-17T21:37:28.752Z" }, + { url = "https://files.pythonhosted.org/packages/ac/3d/2284f6d99f8f21d09352b88b8cfefa24ab88468d962aeb0aa15c20d76b32/tokenizers-0.19.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:952b80dac1a6492170f8c2429bd11fcaa14377e097d12a1dbe0ef2fb2241e16c", size = 9950121, upload_time = "2024-04-17T21:37:31.741Z" }, + { url = "https://files.pythonhosted.org/packages/2a/94/ec3369dbc9b7200c14c8c7a1a04c78b7a7398d0c001e1b7d1ffe30eb93a0/tokenizers-0.19.1-cp312-none-win32.whl", hash = "sha256:01d62812454c188306755c94755465505836fd616f75067abcae529c35edeb57", size = 2044069, upload_time = "2024-04-17T21:37:35.672Z" }, + { url = "https://files.pythonhosted.org/packages/0c/97/80bff6937e0c67d30c0facacd4f0bcf4254e581aa4995c73cef8c8640e56/tokenizers-0.19.1-cp312-none-win_amd64.whl", hash = "sha256:b70bfbe3a82d3e3fb2a5e9b22a39f8d1740c96c68b6ace0086b39074f08ab89a", size = 2214527, upload_time = "2024-04-17T21:37:39.19Z" }, +] + +[[package]] +name = "tqdm" +version = "4.67.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload_time = "2024-11-24T20:12:22.481Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload_time = "2024-11-24T20:12:19.698Z" }, +] + +[[package]] +name = "transformers" +version = "4.40.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "huggingface-hub" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "regex" }, + { name = "requests" }, + { name = "safetensors" }, + { name = "tokenizers" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/ef/d877998c9ab04ecb8eeda495e1c64f2f6bb6724b0634f7d0d6aca2cdc6af/transformers-4.40.2.tar.gz", hash = "sha256:657b6054a2097671398d976ad46e60836e7e15f9ea9551631a96e33cb9240649", size = 7797669, upload_time = "2024-05-06T16:08:02.166Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/23/ba02efa28518557e0cfe0ce5c1170000dd7501ed02ac865fc90cbe3daa93/transformers-4.40.2-py3-none-any.whl", hash = "sha256:71cb94301ec211a2e1d4b8c8d18dcfaa902dfa00a089dceca167a8aa265d6f2d", size = 8999918, upload_time = "2024-05-06T16:07:56.121Z" }, +] + +[[package]] +name = "typer" +version = "0.15.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/1a/5f36851f439884bcfe8539f6a20ff7516e7b60f319bbaf69a90dc35cc2eb/typer-0.15.3.tar.gz", hash = "sha256:818873625d0569653438316567861899f7e9972f2e6e0c16dab608345ced713c", size = 101641, upload_time = "2025-04-28T21:40:59.204Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/20/9d953de6f4367163d23ec823200eb3ecb0050a2609691e512c8b95827a9b/typer-0.15.3-py3-none-any.whl", hash = "sha256:c86a65ad77ca531f03de08d1b9cb67cd09ad02ddddf4b34745b5008f43b239bd", size = 45253, upload_time = "2025-04-28T21:40:56.269Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.13.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967, upload_time = "2025-04-10T14:19:05.416Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806, upload_time = "2025-04-10T14:19:03.967Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/82/5c/e6082df02e215b846b4b8c0b887a64d7d08ffaba30605502639d44c06b82/typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122", size = 76222, upload_time = "2025-02-25T17:27:59.638Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/08/aa4fdfb71f7de5176385bd9e90852eaf6b5d622735020ad600f2bab54385/typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f", size = 14125, upload_time = "2025-02-25T17:27:57.754Z" }, +] + +[[package]] +name = "tzdata" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380, upload_time = "2025-03-23T13:54:43.652Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload_time = "2025-03-23T13:54:41.845Z" }, +] + +[[package]] +name = "tzlocal" +version = "5.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd", size = 30761, upload_time = "2025-03-05T21:17:41.549Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d", size = 18026, upload_time = "2025-03-05T21:17:39.857Z" }, +] + +[[package]] +name = "uritemplate" +version = "4.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d2/5a/4742fdba39cd02a56226815abfa72fe0aa81c33bed16ed045647d6000eba/uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0", size = 273898, upload_time = "2021-10-13T11:15:14.84Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/c0/7461b49cd25aeece13766f02ee576d1db528f1c37ce69aee300e075b485b/uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e", size = 10356, upload_time = "2021-10-13T11:15:12.316Z" }, +] + +[[package]] +name = "urllib3" +version = "1.26.20" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/e8/6ff5e6bc22095cfc59b6ea711b687e2b7ed4bdb373f7eeec370a97d7392f/urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32", size = 307380, upload_time = "2024-08-29T15:43:11.37Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/cf/8435d5a7159e2a9c83a95896ed596f68cf798005fe107cc655b5c5c14704/urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e", size = 144225, upload_time = "2024-08-29T15:43:08.921Z" }, +] + +[[package]] +name = "uvicorn" +version = "0.34.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a6/ae/9bbb19b9e1c450cf9ecaef06463e40234d98d95bf572fab11b4f19ae5ded/uvicorn-0.34.2.tar.gz", hash = "sha256:0e929828f6186353a80b58ea719861d2629d766293b6d19baf086ba31d4f3328", size = 76815, upload_time = "2025-04-19T06:02:50.101Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/4b/4cef6ce21a2aaca9d852a6e84ef4f135d99fcd74fa75105e2fc0c8308acd/uvicorn-0.34.2-py3-none-any.whl", hash = "sha256:deb49af569084536d269fe0a6d67e3754f104cf03aba7c11c40f01aadf33c403", size = 62483, upload_time = "2025-04-19T06:02:48.42Z" }, +] + +[package.optional-dependencies] +standard = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "httptools" }, + { name = "python-dotenv" }, + { name = "pyyaml" }, + { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'" }, + { name = "watchfiles" }, + { name = "websockets" }, +] + +[[package]] +name = "uvloop" +version = "0.21.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/c0/854216d09d33c543f12a44b393c402e89a920b1a0a7dc634c42de91b9cf6/uvloop-0.21.0.tar.gz", hash = "sha256:3bf12b0fda68447806a7ad847bfa591613177275d35b6724b1ee573faa3704e3", size = 2492741, upload_time = "2024-10-14T23:38:35.489Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/57/a7/4cf0334105c1160dd6819f3297f8700fda7fc30ab4f61fbf3e725acbc7cc/uvloop-0.21.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c0f3fa6200b3108919f8bdabb9a7f87f20e7097ea3c543754cabc7d717d95cf8", size = 1447410, upload_time = "2024-10-14T23:37:33.612Z" }, + { url = "https://files.pythonhosted.org/packages/8c/7c/1517b0bbc2dbe784b563d6ab54f2ef88c890fdad77232c98ed490aa07132/uvloop-0.21.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0878c2640cf341b269b7e128b1a5fed890adc4455513ca710d77d5e93aa6d6a0", size = 805476, upload_time = "2024-10-14T23:37:36.11Z" }, + { url = "https://files.pythonhosted.org/packages/ee/ea/0bfae1aceb82a503f358d8d2fa126ca9dbdb2ba9c7866974faec1cb5875c/uvloop-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9fb766bb57b7388745d8bcc53a359b116b8a04c83a2288069809d2b3466c37e", size = 3960855, upload_time = "2024-10-14T23:37:37.683Z" }, + { url = "https://files.pythonhosted.org/packages/8a/ca/0864176a649838b838f36d44bf31c451597ab363b60dc9e09c9630619d41/uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a375441696e2eda1c43c44ccb66e04d61ceeffcd76e4929e527b7fa401b90fb", size = 3973185, upload_time = "2024-10-14T23:37:40.226Z" }, + { url = "https://files.pythonhosted.org/packages/30/bf/08ad29979a936d63787ba47a540de2132169f140d54aa25bc8c3df3e67f4/uvloop-0.21.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:baa0e6291d91649c6ba4ed4b2f982f9fa165b5bbd50a9e203c416a2797bab3c6", size = 3820256, upload_time = "2024-10-14T23:37:42.839Z" }, + { url = "https://files.pythonhosted.org/packages/da/e2/5cf6ef37e3daf2f06e651aae5ea108ad30df3cb269102678b61ebf1fdf42/uvloop-0.21.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4509360fcc4c3bd2c70d87573ad472de40c13387f5fda8cb58350a1d7475e58d", size = 3937323, upload_time = "2024-10-14T23:37:45.337Z" }, + { url = "https://files.pythonhosted.org/packages/8c/4c/03f93178830dc7ce8b4cdee1d36770d2f5ebb6f3d37d354e061eefc73545/uvloop-0.21.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:359ec2c888397b9e592a889c4d72ba3d6befba8b2bb01743f72fffbde663b59c", size = 1471284, upload_time = "2024-10-14T23:37:47.833Z" }, + { url = "https://files.pythonhosted.org/packages/43/3e/92c03f4d05e50f09251bd8b2b2b584a2a7f8fe600008bcc4523337abe676/uvloop-0.21.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f7089d2dc73179ce5ac255bdf37c236a9f914b264825fdaacaded6990a7fb4c2", size = 821349, upload_time = "2024-10-14T23:37:50.149Z" }, + { url = "https://files.pythonhosted.org/packages/a6/ef/a02ec5da49909dbbfb1fd205a9a1ac4e88ea92dcae885e7c961847cd51e2/uvloop-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baa4dcdbd9ae0a372f2167a207cd98c9f9a1ea1188a8a526431eef2f8116cc8d", size = 4580089, upload_time = "2024-10-14T23:37:51.703Z" }, + { url = "https://files.pythonhosted.org/packages/06/a7/b4e6a19925c900be9f98bec0a75e6e8f79bb53bdeb891916609ab3958967/uvloop-0.21.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86975dca1c773a2c9864f4c52c5a55631038e387b47eaf56210f873887b6c8dc", size = 4693770, upload_time = "2024-10-14T23:37:54.122Z" }, + { url = "https://files.pythonhosted.org/packages/ce/0c/f07435a18a4b94ce6bd0677d8319cd3de61f3a9eeb1e5f8ab4e8b5edfcb3/uvloop-0.21.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:461d9ae6660fbbafedd07559c6a2e57cd553b34b0065b6550685f6653a98c1cb", size = 4451321, upload_time = "2024-10-14T23:37:55.766Z" }, + { url = "https://files.pythonhosted.org/packages/8f/eb/f7032be105877bcf924709c97b1bf3b90255b4ec251f9340cef912559f28/uvloop-0.21.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:183aef7c8730e54c9a3ee3227464daed66e37ba13040bb3f350bc2ddc040f22f", size = 4659022, upload_time = "2024-10-14T23:37:58.195Z" }, + { url = "https://files.pythonhosted.org/packages/3f/8d/2cbef610ca21539f0f36e2b34da49302029e7c9f09acef0b1c3b5839412b/uvloop-0.21.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bfd55dfcc2a512316e65f16e503e9e450cab148ef11df4e4e679b5e8253a5281", size = 1468123, upload_time = "2024-10-14T23:38:00.688Z" }, + { url = "https://files.pythonhosted.org/packages/93/0d/b0038d5a469f94ed8f2b2fce2434a18396d8fbfb5da85a0a9781ebbdec14/uvloop-0.21.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:787ae31ad8a2856fc4e7c095341cccc7209bd657d0e71ad0dc2ea83c4a6fa8af", size = 819325, upload_time = "2024-10-14T23:38:02.309Z" }, + { url = "https://files.pythonhosted.org/packages/50/94/0a687f39e78c4c1e02e3272c6b2ccdb4e0085fda3b8352fecd0410ccf915/uvloop-0.21.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ee4d4ef48036ff6e5cfffb09dd192c7a5027153948d85b8da7ff705065bacc6", size = 4582806, upload_time = "2024-10-14T23:38:04.711Z" }, + { url = "https://files.pythonhosted.org/packages/d2/19/f5b78616566ea68edd42aacaf645adbf71fbd83fc52281fba555dc27e3f1/uvloop-0.21.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3df876acd7ec037a3d005b3ab85a7e4110422e4d9c1571d4fc89b0fc41b6816", size = 4701068, upload_time = "2024-10-14T23:38:06.385Z" }, + { url = "https://files.pythonhosted.org/packages/47/57/66f061ee118f413cd22a656de622925097170b9380b30091b78ea0c6ea75/uvloop-0.21.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd53ecc9a0f3d87ab847503c2e1552b690362e005ab54e8a48ba97da3924c0dc", size = 4454428, upload_time = "2024-10-14T23:38:08.416Z" }, + { url = "https://files.pythonhosted.org/packages/63/9a/0962b05b308494e3202d3f794a6e85abe471fe3cafdbcf95c2e8c713aabd/uvloop-0.21.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5c39f217ab3c663dc699c04cbd50c13813e31d917642d459fdcec07555cc553", size = 4660018, upload_time = "2024-10-14T23:38:10.888Z" }, +] + +[[package]] +name = "watchfiles" +version = "1.0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/03/e2/8ed598c42057de7aa5d97c472254af4906ff0a59a66699d426fc9ef795d7/watchfiles-1.0.5.tar.gz", hash = "sha256:b7529b5dcc114679d43827d8c35a07c493ad6f083633d573d81c660abc5979e9", size = 94537, upload_time = "2025-04-08T10:36:26.722Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/f4/41b591f59021786ef517e1cdc3b510383551846703e03f204827854a96f8/watchfiles-1.0.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:237f9be419e977a0f8f6b2e7b0475ababe78ff1ab06822df95d914a945eac827", size = 405336, upload_time = "2025-04-08T10:34:59.359Z" }, + { url = "https://files.pythonhosted.org/packages/ae/06/93789c135be4d6d0e4f63e96eea56dc54050b243eacc28439a26482b5235/watchfiles-1.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0da39ff917af8b27a4bdc5a97ac577552a38aac0d260a859c1517ea3dc1a7c4", size = 395977, upload_time = "2025-04-08T10:35:00.522Z" }, + { url = "https://files.pythonhosted.org/packages/d2/db/1cd89bd83728ca37054512d4d35ab69b5f12b8aa2ac9be3b0276b3bf06cc/watchfiles-1.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cfcb3952350e95603f232a7a15f6c5f86c5375e46f0bd4ae70d43e3e063c13d", size = 455232, upload_time = "2025-04-08T10:35:01.698Z" }, + { url = "https://files.pythonhosted.org/packages/40/90/d8a4d44ffe960517e487c9c04f77b06b8abf05eb680bed71c82b5f2cad62/watchfiles-1.0.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:68b2dddba7a4e6151384e252a5632efcaa9bc5d1c4b567f3cb621306b2ca9f63", size = 459151, upload_time = "2025-04-08T10:35:03.358Z" }, + { url = "https://files.pythonhosted.org/packages/6c/da/267a1546f26465dead1719caaba3ce660657f83c9d9c052ba98fb8856e13/watchfiles-1.0.5-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:95cf944fcfc394c5f9de794ce581914900f82ff1f855326f25ebcf24d5397418", size = 489054, upload_time = "2025-04-08T10:35:04.561Z" }, + { url = "https://files.pythonhosted.org/packages/b1/31/33850dfd5c6efb6f27d2465cc4c6b27c5a6f5ed53c6fa63b7263cf5f60f6/watchfiles-1.0.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ecf6cd9f83d7c023b1aba15d13f705ca7b7d38675c121f3cc4a6e25bd0857ee9", size = 523955, upload_time = "2025-04-08T10:35:05.786Z" }, + { url = "https://files.pythonhosted.org/packages/09/84/b7d7b67856efb183a421f1416b44ca975cb2ea6c4544827955dfb01f7dc2/watchfiles-1.0.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:852de68acd6212cd6d33edf21e6f9e56e5d98c6add46f48244bd479d97c967c6", size = 502234, upload_time = "2025-04-08T10:35:07.187Z" }, + { url = "https://files.pythonhosted.org/packages/71/87/6dc5ec6882a2254cfdd8b0718b684504e737273903b65d7338efaba08b52/watchfiles-1.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5730f3aa35e646103b53389d5bc77edfbf578ab6dab2e005142b5b80a35ef25", size = 454750, upload_time = "2025-04-08T10:35:08.859Z" }, + { url = "https://files.pythonhosted.org/packages/3d/6c/3786c50213451a0ad15170d091570d4a6554976cf0df19878002fc96075a/watchfiles-1.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:18b3bd29954bc4abeeb4e9d9cf0b30227f0f206c86657674f544cb032296acd5", size = 631591, upload_time = "2025-04-08T10:35:10.64Z" }, + { url = "https://files.pythonhosted.org/packages/1b/b3/1427425ade4e359a0deacce01a47a26024b2ccdb53098f9d64d497f6684c/watchfiles-1.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ba5552a1b07c8edbf197055bc9d518b8f0d98a1c6a73a293bc0726dce068ed01", size = 625370, upload_time = "2025-04-08T10:35:12.412Z" }, + { url = "https://files.pythonhosted.org/packages/15/ba/f60e053b0b5b8145d682672024aa91370a29c5c921a88977eb565de34086/watchfiles-1.0.5-cp311-cp311-win32.whl", hash = "sha256:2f1fefb2e90e89959447bc0420fddd1e76f625784340d64a2f7d5983ef9ad246", size = 277791, upload_time = "2025-04-08T10:35:13.719Z" }, + { url = "https://files.pythonhosted.org/packages/50/ed/7603c4e164225c12c0d4e8700b64bb00e01a6c4eeea372292a3856be33a4/watchfiles-1.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:b6e76ceb1dd18c8e29c73f47d41866972e891fc4cc7ba014f487def72c1cf096", size = 291622, upload_time = "2025-04-08T10:35:15.071Z" }, + { url = "https://files.pythonhosted.org/packages/a2/c2/99bb7c96b4450e36877fde33690ded286ff555b5a5c1d925855d556968a1/watchfiles-1.0.5-cp311-cp311-win_arm64.whl", hash = "sha256:266710eb6fddc1f5e51843c70e3bebfb0f5e77cf4f27129278c70554104d19ed", size = 283699, upload_time = "2025-04-08T10:35:16.732Z" }, + { url = "https://files.pythonhosted.org/packages/2a/8c/4f0b9bdb75a1bfbd9c78fad7d8854369283f74fe7cf03eb16be77054536d/watchfiles-1.0.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b5eb568c2aa6018e26da9e6c86f3ec3fd958cee7f0311b35c2630fa4217d17f2", size = 401511, upload_time = "2025-04-08T10:35:17.956Z" }, + { url = "https://files.pythonhosted.org/packages/dc/4e/7e15825def77f8bd359b6d3f379f0c9dac4eb09dd4ddd58fd7d14127179c/watchfiles-1.0.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0a04059f4923ce4e856b4b4e5e783a70f49d9663d22a4c3b3298165996d1377f", size = 392715, upload_time = "2025-04-08T10:35:19.202Z" }, + { url = "https://files.pythonhosted.org/packages/58/65/b72fb817518728e08de5840d5d38571466c1b4a3f724d190cec909ee6f3f/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e380c89983ce6e6fe2dd1e1921b9952fb4e6da882931abd1824c092ed495dec", size = 454138, upload_time = "2025-04-08T10:35:20.586Z" }, + { url = "https://files.pythonhosted.org/packages/3e/a4/86833fd2ea2e50ae28989f5950b5c3f91022d67092bfec08f8300d8b347b/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fe43139b2c0fdc4a14d4f8d5b5d967f7a2777fd3d38ecf5b1ec669b0d7e43c21", size = 458592, upload_time = "2025-04-08T10:35:21.87Z" }, + { url = "https://files.pythonhosted.org/packages/38/7e/42cb8df8be9a37e50dd3a818816501cf7a20d635d76d6bd65aae3dbbff68/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee0822ce1b8a14fe5a066f93edd20aada932acfe348bede8aa2149f1a4489512", size = 487532, upload_time = "2025-04-08T10:35:23.143Z" }, + { url = "https://files.pythonhosted.org/packages/fc/fd/13d26721c85d7f3df6169d8b495fcac8ab0dc8f0945ebea8845de4681dab/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a0dbcb1c2d8f2ab6e0a81c6699b236932bd264d4cef1ac475858d16c403de74d", size = 522865, upload_time = "2025-04-08T10:35:24.702Z" }, + { url = "https://files.pythonhosted.org/packages/a1/0d/7f9ae243c04e96c5455d111e21b09087d0eeaf9a1369e13a01c7d3d82478/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a2014a2b18ad3ca53b1f6c23f8cd94a18ce930c1837bd891262c182640eb40a6", size = 499887, upload_time = "2025-04-08T10:35:25.969Z" }, + { url = "https://files.pythonhosted.org/packages/8e/0f/a257766998e26aca4b3acf2ae97dff04b57071e991a510857d3799247c67/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10f6ae86d5cb647bf58f9f655fcf577f713915a5d69057a0371bc257e2553234", size = 454498, upload_time = "2025-04-08T10:35:27.353Z" }, + { url = "https://files.pythonhosted.org/packages/81/79/8bf142575a03e0af9c3d5f8bcae911ee6683ae93a625d349d4ecf4c8f7df/watchfiles-1.0.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1a7bac2bde1d661fb31f4d4e8e539e178774b76db3c2c17c4bb3e960a5de07a2", size = 630663, upload_time = "2025-04-08T10:35:28.685Z" }, + { url = "https://files.pythonhosted.org/packages/f1/80/abe2e79f610e45c63a70d271caea90c49bbf93eb00fa947fa9b803a1d51f/watchfiles-1.0.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ab626da2fc1ac277bbf752446470b367f84b50295264d2d313e28dc4405d663", size = 625410, upload_time = "2025-04-08T10:35:30.42Z" }, + { url = "https://files.pythonhosted.org/packages/91/6f/bc7fbecb84a41a9069c2c6eb6319f7f7df113adf113e358c57fc1aff7ff5/watchfiles-1.0.5-cp312-cp312-win32.whl", hash = "sha256:9f4571a783914feda92018ef3901dab8caf5b029325b5fe4558c074582815249", size = 277965, upload_time = "2025-04-08T10:35:32.023Z" }, + { url = "https://files.pythonhosted.org/packages/99/a5/bf1c297ea6649ec59e935ab311f63d8af5faa8f0b86993e3282b984263e3/watchfiles-1.0.5-cp312-cp312-win_amd64.whl", hash = "sha256:360a398c3a19672cf93527f7e8d8b60d8275119c5d900f2e184d32483117a705", size = 291693, upload_time = "2025-04-08T10:35:33.225Z" }, + { url = "https://files.pythonhosted.org/packages/7f/7b/fd01087cc21db5c47e5beae507b87965db341cce8a86f9eb12bf5219d4e0/watchfiles-1.0.5-cp312-cp312-win_arm64.whl", hash = "sha256:1a2902ede862969077b97523987c38db28abbe09fb19866e711485d9fbf0d417", size = 283287, upload_time = "2025-04-08T10:35:34.568Z" }, + { url = "https://files.pythonhosted.org/packages/c7/62/435766874b704f39b2fecd8395a29042db2b5ec4005bd34523415e9bd2e0/watchfiles-1.0.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:0b289572c33a0deae62daa57e44a25b99b783e5f7aed81b314232b3d3c81a11d", size = 401531, upload_time = "2025-04-08T10:35:35.792Z" }, + { url = "https://files.pythonhosted.org/packages/6e/a6/e52a02c05411b9cb02823e6797ef9bbba0bfaf1bb627da1634d44d8af833/watchfiles-1.0.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a056c2f692d65bf1e99c41045e3bdcaea3cb9e6b5a53dcaf60a5f3bd95fc9763", size = 392417, upload_time = "2025-04-08T10:35:37.048Z" }, + { url = "https://files.pythonhosted.org/packages/3f/53/c4af6819770455932144e0109d4854437769672d7ad897e76e8e1673435d/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9dca99744991fc9850d18015c4f0438865414e50069670f5f7eee08340d8b40", size = 453423, upload_time = "2025-04-08T10:35:38.357Z" }, + { url = "https://files.pythonhosted.org/packages/cb/d1/8e88df58bbbf819b8bc5cfbacd3c79e01b40261cad0fc84d1e1ebd778a07/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:894342d61d355446d02cd3988a7326af344143eb33a2fd5d38482a92072d9563", size = 458185, upload_time = "2025-04-08T10:35:39.708Z" }, + { url = "https://files.pythonhosted.org/packages/ff/70/fffaa11962dd5429e47e478a18736d4e42bec42404f5ee3b92ef1b87ad60/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ab44e1580924d1ffd7b3938e02716d5ad190441965138b4aa1d1f31ea0877f04", size = 486696, upload_time = "2025-04-08T10:35:41.469Z" }, + { url = "https://files.pythonhosted.org/packages/39/db/723c0328e8b3692d53eb273797d9a08be6ffb1d16f1c0ba2bdbdc2a3852c/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d6f9367b132078b2ceb8d066ff6c93a970a18c3029cea37bfd7b2d3dd2e5db8f", size = 522327, upload_time = "2025-04-08T10:35:43.289Z" }, + { url = "https://files.pythonhosted.org/packages/cd/05/9fccc43c50c39a76b68343484b9da7b12d42d0859c37c61aec018c967a32/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2e55a9b162e06e3f862fb61e399fe9f05d908d019d87bf5b496a04ef18a970a", size = 499741, upload_time = "2025-04-08T10:35:44.574Z" }, + { url = "https://files.pythonhosted.org/packages/23/14/499e90c37fa518976782b10a18b18db9f55ea73ca14641615056f8194bb3/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0125f91f70e0732a9f8ee01e49515c35d38ba48db507a50c5bdcad9503af5827", size = 453995, upload_time = "2025-04-08T10:35:46.336Z" }, + { url = "https://files.pythonhosted.org/packages/61/d9/f75d6840059320df5adecd2c687fbc18960a7f97b55c300d20f207d48aef/watchfiles-1.0.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:13bb21f8ba3248386337c9fa51c528868e6c34a707f729ab041c846d52a0c69a", size = 629693, upload_time = "2025-04-08T10:35:48.161Z" }, + { url = "https://files.pythonhosted.org/packages/fc/17/180ca383f5061b61406477218c55d66ec118e6c0c51f02d8142895fcf0a9/watchfiles-1.0.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:839ebd0df4a18c5b3c1b890145b5a3f5f64063c2a0d02b13c76d78fe5de34936", size = 624677, upload_time = "2025-04-08T10:35:49.65Z" }, + { url = "https://files.pythonhosted.org/packages/bf/15/714d6ef307f803f236d69ee9d421763707899d6298d9f3183e55e366d9af/watchfiles-1.0.5-cp313-cp313-win32.whl", hash = "sha256:4a8ec1e4e16e2d5bafc9ba82f7aaecfeec990ca7cd27e84fb6f191804ed2fcfc", size = 277804, upload_time = "2025-04-08T10:35:51.093Z" }, + { url = "https://files.pythonhosted.org/packages/a8/b4/c57b99518fadf431f3ef47a610839e46e5f8abf9814f969859d1c65c02c7/watchfiles-1.0.5-cp313-cp313-win_amd64.whl", hash = "sha256:f436601594f15bf406518af922a89dcaab416568edb6f65c4e5bbbad1ea45c11", size = 291087, upload_time = "2025-04-08T10:35:52.458Z" }, +] + +[[package]] +name = "websockets" +version = "15.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016, upload_time = "2025-03-05T20:03:41.606Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9f/32/18fcd5919c293a398db67443acd33fde142f283853076049824fc58e6f75/websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431", size = 175423, upload_time = "2025-03-05T20:01:56.276Z" }, + { url = "https://files.pythonhosted.org/packages/76/70/ba1ad96b07869275ef42e2ce21f07a5b0148936688c2baf7e4a1f60d5058/websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57", size = 173082, upload_time = "2025-03-05T20:01:57.563Z" }, + { url = "https://files.pythonhosted.org/packages/86/f2/10b55821dd40eb696ce4704a87d57774696f9451108cff0d2824c97e0f97/websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905", size = 173330, upload_time = "2025-03-05T20:01:59.063Z" }, + { url = "https://files.pythonhosted.org/packages/a5/90/1c37ae8b8a113d3daf1065222b6af61cc44102da95388ac0018fcb7d93d9/websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562", size = 182878, upload_time = "2025-03-05T20:02:00.305Z" }, + { url = "https://files.pythonhosted.org/packages/8e/8d/96e8e288b2a41dffafb78e8904ea7367ee4f891dafc2ab8d87e2124cb3d3/websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792", size = 181883, upload_time = "2025-03-05T20:02:03.148Z" }, + { url = "https://files.pythonhosted.org/packages/93/1f/5d6dbf551766308f6f50f8baf8e9860be6182911e8106da7a7f73785f4c4/websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413", size = 182252, upload_time = "2025-03-05T20:02:05.29Z" }, + { url = "https://files.pythonhosted.org/packages/d4/78/2d4fed9123e6620cbf1706c0de8a1632e1a28e7774d94346d7de1bba2ca3/websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8", size = 182521, upload_time = "2025-03-05T20:02:07.458Z" }, + { url = "https://files.pythonhosted.org/packages/e7/3b/66d4c1b444dd1a9823c4a81f50231b921bab54eee2f69e70319b4e21f1ca/websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3", size = 181958, upload_time = "2025-03-05T20:02:09.842Z" }, + { url = "https://files.pythonhosted.org/packages/08/ff/e9eed2ee5fed6f76fdd6032ca5cd38c57ca9661430bb3d5fb2872dc8703c/websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf", size = 181918, upload_time = "2025-03-05T20:02:11.968Z" }, + { url = "https://files.pythonhosted.org/packages/d8/75/994634a49b7e12532be6a42103597b71098fd25900f7437d6055ed39930a/websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85", size = 176388, upload_time = "2025-03-05T20:02:13.32Z" }, + { url = "https://files.pythonhosted.org/packages/98/93/e36c73f78400a65f5e236cd376713c34182e6663f6889cd45a4a04d8f203/websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065", size = 176828, upload_time = "2025-03-05T20:02:14.585Z" }, + { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437, upload_time = "2025-03-05T20:02:16.706Z" }, + { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096, upload_time = "2025-03-05T20:02:18.832Z" }, + { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332, upload_time = "2025-03-05T20:02:20.187Z" }, + { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152, upload_time = "2025-03-05T20:02:22.286Z" }, + { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096, upload_time = "2025-03-05T20:02:24.368Z" }, + { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523, upload_time = "2025-03-05T20:02:25.669Z" }, + { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790, upload_time = "2025-03-05T20:02:26.99Z" }, + { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165, upload_time = "2025-03-05T20:02:30.291Z" }, + { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160, upload_time = "2025-03-05T20:02:31.634Z" }, + { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395, upload_time = "2025-03-05T20:02:33.017Z" }, + { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841, upload_time = "2025-03-05T20:02:34.498Z" }, + { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440, upload_time = "2025-03-05T20:02:36.695Z" }, + { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098, upload_time = "2025-03-05T20:02:37.985Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329, upload_time = "2025-03-05T20:02:39.298Z" }, + { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111, upload_time = "2025-03-05T20:02:40.595Z" }, + { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054, upload_time = "2025-03-05T20:02:41.926Z" }, + { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496, upload_time = "2025-03-05T20:02:43.304Z" }, + { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829, upload_time = "2025-03-05T20:02:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217, upload_time = "2025-03-05T20:02:50.14Z" }, + { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195, upload_time = "2025-03-05T20:02:51.561Z" }, + { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393, upload_time = "2025-03-05T20:02:53.814Z" }, + { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837, upload_time = "2025-03-05T20:02:55.237Z" }, + { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload_time = "2025-03-05T20:03:39.41Z" }, +] + +[[package]] +name = "werkzeug" +version = "3.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9f/69/83029f1f6300c5fb2471d621ab06f6ec6b3324685a2ce0f9777fd4a8b71e/werkzeug-3.1.3.tar.gz", hash = "sha256:60723ce945c19328679790e3282cc758aa4a6040e4bb330f53d30fa546d44746", size = 806925, upload_time = "2024-11-08T15:52:18.093Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/52/24/ab44c871b0f07f491e5d2ad12c9bd7358e527510618cb1b803a88e986db1/werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e", size = 224498, upload_time = "2024-11-08T15:52:16.132Z" }, +] + +[[package]] +name = "wheel" +version = "0.45.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8a/98/2d9906746cdc6a6ef809ae6338005b3f21bb568bea3165cfc6a243fdc25c/wheel-0.45.1.tar.gz", hash = "sha256:661e1abd9198507b1409a20c02106d9670b2576e916d58f520316666abca6729", size = 107545, upload_time = "2024-11-23T00:18:23.513Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/2c/87f3254fd8ffd29e4c02732eee68a83a1d3c346ae39bc6822dcbcb697f2b/wheel-0.45.1-py3-none-any.whl", hash = "sha256:708e7481cc80179af0e556bbf0cc00b8444c7321e2700b8d8580231d13017248", size = 72494, upload_time = "2024-11-23T00:18:21.207Z" }, +] + +[[package]] +name = "wrapt" +version = "1.14.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/11/eb/e06e77394d6cf09977d92bff310cb0392930c08a338f99af6066a5a98f92/wrapt-1.14.1.tar.gz", hash = "sha256:380a85cf89e0e69b7cfbe2ea9f765f004ff419f34194018a6827ac0e3edfed4d", size = 50890, upload_time = "2022-05-02T05:28:31.026Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/f9/8c078b4973604cd968b23eb3dff52028b5c48f2a02c4f1f975f4d5e344d1/wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55", size = 35432, upload_time = "2023-10-07T08:29:58.387Z" }, + { url = "https://files.pythonhosted.org/packages/6e/79/aec8185eefe20e8f49e5adeb0c2e20e016d5916d10872c17705ddac41be2/wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9", size = 36219, upload_time = "2023-10-07T08:30:01.249Z" }, + { url = "https://files.pythonhosted.org/packages/d1/71/8d68004e5d5a676177342a56808af51e1df3b0e54b203e3295a8cd96b53b/wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335", size = 78509, upload_time = "2023-10-07T08:30:03.544Z" }, + { url = "https://files.pythonhosted.org/packages/5a/27/604d6ad71fe5935446df1b7512d491b47fe2aef8c95e9813d03d78024a28/wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9", size = 70972, upload_time = "2023-10-07T08:30:05.619Z" }, + { url = "https://files.pythonhosted.org/packages/7f/1b/e0439eec0db6520968c751bc7e12480bb80bb8d939190e0e55ed762f3c7a/wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8", size = 78402, upload_time = "2023-10-07T08:30:07.408Z" }, + { url = "https://files.pythonhosted.org/packages/b9/45/2cc612ff64061d4416baf8d0daf27bea7f79f0097638ddc2af51a3e647f3/wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf", size = 83373, upload_time = "2023-10-07T08:30:09.317Z" }, + { url = "https://files.pythonhosted.org/packages/ad/b7/332692b8d0387922da0f1323ad36a14e365911def3c78ea0d102f83ac592/wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a", size = 76299, upload_time = "2023-10-07T08:30:10.723Z" }, + { url = "https://files.pythonhosted.org/packages/f2/31/cbce966b6760e62d005c237961e839a755bf0c907199248394e2ee03ab05/wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be", size = 83361, upload_time = "2023-10-07T08:30:11.98Z" }, + { url = "https://files.pythonhosted.org/packages/9a/aa/ab46fb18072b86e87e0965a402f8723217e8c0312d1b3e2a91308df924ab/wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204", size = 33454, upload_time = "2023-10-07T08:30:13.513Z" }, + { url = "https://files.pythonhosted.org/packages/ba/7e/14113996bc6ee68eb987773b4139c87afd3ceff60e27e37648aa5eb2798a/wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224", size = 35616, upload_time = "2023-10-07T08:30:14.868Z" }, +] + +[[package]] +name = "xxhash" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/00/5e/d6e5258d69df8b4ed8c83b6664f2b47d30d2dec551a29ad72a6c69eafd31/xxhash-3.5.0.tar.gz", hash = "sha256:84f2caddf951c9cbf8dc2e22a89d4ccf5d86391ac6418fe81e3c67d0cf60b45f", size = 84241, upload_time = "2024-08-17T09:20:38.972Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/c7/afed0f131fbda960ff15eee7f304fa0eeb2d58770fade99897984852ef23/xxhash-3.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:02c2e816896dc6f85922ced60097bcf6f008dedfc5073dcba32f9c8dd786f3c1", size = 31969, upload_time = "2024-08-17T09:18:00.852Z" }, + { url = "https://files.pythonhosted.org/packages/8c/0c/7c3bc6d87e5235672fcc2fb42fd5ad79fe1033925f71bf549ee068c7d1ca/xxhash-3.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6027dcd885e21581e46d3c7f682cfb2b870942feeed58a21c29583512c3f09f8", size = 30800, upload_time = "2024-08-17T09:18:01.863Z" }, + { url = "https://files.pythonhosted.org/packages/04/9e/01067981d98069eec1c20201f8c145367698e9056f8bc295346e4ea32dd1/xxhash-3.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1308fa542bbdbf2fa85e9e66b1077eea3a88bef38ee8a06270b4298a7a62a166", size = 221566, upload_time = "2024-08-17T09:18:03.461Z" }, + { url = "https://files.pythonhosted.org/packages/d4/09/d4996de4059c3ce5342b6e1e6a77c9d6c91acce31f6ed979891872dd162b/xxhash-3.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c28b2fdcee797e1c1961cd3bcd3d545cab22ad202c846235197935e1df2f8ef7", size = 201214, upload_time = "2024-08-17T09:18:05.616Z" }, + { url = "https://files.pythonhosted.org/packages/62/f5/6d2dc9f8d55a7ce0f5e7bfef916e67536f01b85d32a9fbf137d4cadbee38/xxhash-3.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:924361811732ddad75ff23e90efd9ccfda4f664132feecb90895bade6a1b4623", size = 429433, upload_time = "2024-08-17T09:18:06.957Z" }, + { url = "https://files.pythonhosted.org/packages/d9/72/9256303f10e41ab004799a4aa74b80b3c5977d6383ae4550548b24bd1971/xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89997aa1c4b6a5b1e5b588979d1da048a3c6f15e55c11d117a56b75c84531f5a", size = 194822, upload_time = "2024-08-17T09:18:08.331Z" }, + { url = "https://files.pythonhosted.org/packages/34/92/1a3a29acd08248a34b0e6a94f4e0ed9b8379a4ff471f1668e4dce7bdbaa8/xxhash-3.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:685c4f4e8c59837de103344eb1c8a3851f670309eb5c361f746805c5471b8c88", size = 208538, upload_time = "2024-08-17T09:18:10.332Z" }, + { url = "https://files.pythonhosted.org/packages/53/ad/7fa1a109663366de42f724a1cdb8e796a260dbac45047bce153bc1e18abf/xxhash-3.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dbd2ecfbfee70bc1a4acb7461fa6af7748ec2ab08ac0fa298f281c51518f982c", size = 216953, upload_time = "2024-08-17T09:18:11.707Z" }, + { url = "https://files.pythonhosted.org/packages/35/02/137300e24203bf2b2a49b48ce898ecce6fd01789c0fcd9c686c0a002d129/xxhash-3.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:25b5a51dc3dfb20a10833c8eee25903fd2e14059e9afcd329c9da20609a307b2", size = 203594, upload_time = "2024-08-17T09:18:13.799Z" }, + { url = "https://files.pythonhosted.org/packages/23/03/aeceb273933d7eee248c4322b98b8e971f06cc3880e5f7602c94e5578af5/xxhash-3.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a8fb786fb754ef6ff8c120cb96629fb518f8eb5a61a16aac3a979a9dbd40a084", size = 210971, upload_time = "2024-08-17T09:18:15.824Z" }, + { url = "https://files.pythonhosted.org/packages/e3/64/ed82ec09489474cbb35c716b189ddc1521d8b3de12b1b5ab41ce7f70253c/xxhash-3.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a905ad00ad1e1c34fe4e9d7c1d949ab09c6fa90c919860c1534ff479f40fd12d", size = 415050, upload_time = "2024-08-17T09:18:17.142Z" }, + { url = "https://files.pythonhosted.org/packages/71/43/6db4c02dcb488ad4e03bc86d70506c3d40a384ee73c9b5c93338eb1f3c23/xxhash-3.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:963be41bcd49f53af6d795f65c0da9b4cc518c0dd9c47145c98f61cb464f4839", size = 192216, upload_time = "2024-08-17T09:18:18.779Z" }, + { url = "https://files.pythonhosted.org/packages/22/6d/db4abec29e7a567455344433d095fdb39c97db6955bb4a2c432e486b4d28/xxhash-3.5.0-cp311-cp311-win32.whl", hash = "sha256:109b436096d0a2dd039c355fa3414160ec4d843dfecc64a14077332a00aeb7da", size = 30120, upload_time = "2024-08-17T09:18:20.009Z" }, + { url = "https://files.pythonhosted.org/packages/52/1c/fa3b61c0cf03e1da4767213672efe186b1dfa4fc901a4a694fb184a513d1/xxhash-3.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:b702f806693201ad6c0a05ddbbe4c8f359626d0b3305f766077d51388a6bac58", size = 30003, upload_time = "2024-08-17T09:18:21.052Z" }, + { url = "https://files.pythonhosted.org/packages/6b/8e/9e6fc572acf6e1cc7ccb01973c213f895cb8668a9d4c2b58a99350da14b7/xxhash-3.5.0-cp311-cp311-win_arm64.whl", hash = "sha256:c4dcb4120d0cc3cc448624147dba64e9021b278c63e34a38789b688fd0da9bf3", size = 26777, upload_time = "2024-08-17T09:18:22.809Z" }, + { url = "https://files.pythonhosted.org/packages/07/0e/1bfce2502c57d7e2e787600b31c83535af83746885aa1a5f153d8c8059d6/xxhash-3.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:14470ace8bd3b5d51318782cd94e6f94431974f16cb3b8dc15d52f3b69df8e00", size = 31969, upload_time = "2024-08-17T09:18:24.025Z" }, + { url = "https://files.pythonhosted.org/packages/3f/d6/8ca450d6fe5b71ce521b4e5db69622383d039e2b253e9b2f24f93265b52c/xxhash-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:59aa1203de1cb96dbeab595ded0ad0c0056bb2245ae11fac11c0ceea861382b9", size = 30787, upload_time = "2024-08-17T09:18:25.318Z" }, + { url = "https://files.pythonhosted.org/packages/5b/84/de7c89bc6ef63d750159086a6ada6416cc4349eab23f76ab870407178b93/xxhash-3.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08424f6648526076e28fae6ea2806c0a7d504b9ef05ae61d196d571e5c879c84", size = 220959, upload_time = "2024-08-17T09:18:26.518Z" }, + { url = "https://files.pythonhosted.org/packages/fe/86/51258d3e8a8545ff26468c977101964c14d56a8a37f5835bc0082426c672/xxhash-3.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:61a1ff00674879725b194695e17f23d3248998b843eb5e933007ca743310f793", size = 200006, upload_time = "2024-08-17T09:18:27.905Z" }, + { url = "https://files.pythonhosted.org/packages/02/0a/96973bd325412feccf23cf3680fd2246aebf4b789122f938d5557c54a6b2/xxhash-3.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2f2c61bee5844d41c3eb015ac652a0229e901074951ae48581d58bfb2ba01be", size = 428326, upload_time = "2024-08-17T09:18:29.335Z" }, + { url = "https://files.pythonhosted.org/packages/11/a7/81dba5010f7e733de88af9555725146fc133be97ce36533867f4c7e75066/xxhash-3.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d32a592cac88d18cc09a89172e1c32d7f2a6e516c3dfde1b9adb90ab5df54a6", size = 194380, upload_time = "2024-08-17T09:18:30.706Z" }, + { url = "https://files.pythonhosted.org/packages/fb/7d/f29006ab398a173f4501c0e4977ba288f1c621d878ec217b4ff516810c04/xxhash-3.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70dabf941dede727cca579e8c205e61121afc9b28516752fd65724be1355cc90", size = 207934, upload_time = "2024-08-17T09:18:32.133Z" }, + { url = "https://files.pythonhosted.org/packages/8a/6e/6e88b8f24612510e73d4d70d9b0c7dff62a2e78451b9f0d042a5462c8d03/xxhash-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e5d0ddaca65ecca9c10dcf01730165fd858533d0be84c75c327487c37a906a27", size = 216301, upload_time = "2024-08-17T09:18:33.474Z" }, + { url = "https://files.pythonhosted.org/packages/af/51/7862f4fa4b75a25c3b4163c8a873f070532fe5f2d3f9b3fc869c8337a398/xxhash-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3e5b5e16c5a480fe5f59f56c30abdeba09ffd75da8d13f6b9b6fd224d0b4d0a2", size = 203351, upload_time = "2024-08-17T09:18:34.889Z" }, + { url = "https://files.pythonhosted.org/packages/22/61/8d6a40f288f791cf79ed5bb113159abf0c81d6efb86e734334f698eb4c59/xxhash-3.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149b7914451eb154b3dfaa721315117ea1dac2cc55a01bfbd4df7c68c5dd683d", size = 210294, upload_time = "2024-08-17T09:18:36.355Z" }, + { url = "https://files.pythonhosted.org/packages/17/02/215c4698955762d45a8158117190261b2dbefe9ae7e5b906768c09d8bc74/xxhash-3.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:eade977f5c96c677035ff39c56ac74d851b1cca7d607ab3d8f23c6b859379cab", size = 414674, upload_time = "2024-08-17T09:18:38.536Z" }, + { url = "https://files.pythonhosted.org/packages/31/5c/b7a8db8a3237cff3d535261325d95de509f6a8ae439a5a7a4ffcff478189/xxhash-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fa9f547bd98f5553d03160967866a71056a60960be00356a15ecc44efb40ba8e", size = 192022, upload_time = "2024-08-17T09:18:40.138Z" }, + { url = "https://files.pythonhosted.org/packages/78/e3/dd76659b2811b3fd06892a8beb850e1996b63e9235af5a86ea348f053e9e/xxhash-3.5.0-cp312-cp312-win32.whl", hash = "sha256:f7b58d1fd3551b8c80a971199543379be1cee3d0d409e1f6d8b01c1a2eebf1f8", size = 30170, upload_time = "2024-08-17T09:18:42.163Z" }, + { url = "https://files.pythonhosted.org/packages/d9/6b/1c443fe6cfeb4ad1dcf231cdec96eb94fb43d6498b4469ed8b51f8b59a37/xxhash-3.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:fa0cafd3a2af231b4e113fba24a65d7922af91aeb23774a8b78228e6cd785e3e", size = 30040, upload_time = "2024-08-17T09:18:43.699Z" }, + { url = "https://files.pythonhosted.org/packages/0f/eb/04405305f290173acc0350eba6d2f1a794b57925df0398861a20fbafa415/xxhash-3.5.0-cp312-cp312-win_arm64.whl", hash = "sha256:586886c7e89cb9828bcd8a5686b12e161368e0064d040e225e72607b43858ba2", size = 26796, upload_time = "2024-08-17T09:18:45.29Z" }, + { url = "https://files.pythonhosted.org/packages/c9/b8/e4b3ad92d249be5c83fa72916c9091b0965cb0faeff05d9a0a3870ae6bff/xxhash-3.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:37889a0d13b0b7d739cfc128b1c902f04e32de17b33d74b637ad42f1c55101f6", size = 31795, upload_time = "2024-08-17T09:18:46.813Z" }, + { url = "https://files.pythonhosted.org/packages/fc/d8/b3627a0aebfbfa4c12a41e22af3742cf08c8ea84f5cc3367b5de2d039cce/xxhash-3.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:97a662338797c660178e682f3bc180277b9569a59abfb5925e8620fba00b9fc5", size = 30792, upload_time = "2024-08-17T09:18:47.862Z" }, + { url = "https://files.pythonhosted.org/packages/c3/cc/762312960691da989c7cd0545cb120ba2a4148741c6ba458aa723c00a3f8/xxhash-3.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f85e0108d51092bdda90672476c7d909c04ada6923c14ff9d913c4f7dc8a3bc", size = 220950, upload_time = "2024-08-17T09:18:49.06Z" }, + { url = "https://files.pythonhosted.org/packages/fe/e9/cc266f1042c3c13750e86a535496b58beb12bf8c50a915c336136f6168dc/xxhash-3.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2fd827b0ba763ac919440042302315c564fdb797294d86e8cdd4578e3bc7f3", size = 199980, upload_time = "2024-08-17T09:18:50.445Z" }, + { url = "https://files.pythonhosted.org/packages/bf/85/a836cd0dc5cc20376de26b346858d0ac9656f8f730998ca4324921a010b9/xxhash-3.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:82085c2abec437abebf457c1d12fccb30cc8b3774a0814872511f0f0562c768c", size = 428324, upload_time = "2024-08-17T09:18:51.988Z" }, + { url = "https://files.pythonhosted.org/packages/b4/0e/15c243775342ce840b9ba34aceace06a1148fa1630cd8ca269e3223987f5/xxhash-3.5.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07fda5de378626e502b42b311b049848c2ef38784d0d67b6f30bb5008642f8eb", size = 194370, upload_time = "2024-08-17T09:18:54.164Z" }, + { url = "https://files.pythonhosted.org/packages/87/a1/b028bb02636dfdc190da01951d0703b3d904301ed0ef6094d948983bef0e/xxhash-3.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c279f0d2b34ef15f922b77966640ade58b4ccdfef1c4d94b20f2a364617a493f", size = 207911, upload_time = "2024-08-17T09:18:55.509Z" }, + { url = "https://files.pythonhosted.org/packages/80/d5/73c73b03fc0ac73dacf069fdf6036c9abad82de0a47549e9912c955ab449/xxhash-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:89e66ceed67b213dec5a773e2f7a9e8c58f64daeb38c7859d8815d2c89f39ad7", size = 216352, upload_time = "2024-08-17T09:18:57.073Z" }, + { url = "https://files.pythonhosted.org/packages/b6/2a/5043dba5ddbe35b4fe6ea0a111280ad9c3d4ba477dd0f2d1fe1129bda9d0/xxhash-3.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bcd51708a633410737111e998ceb3b45d3dbc98c0931f743d9bb0a209033a326", size = 203410, upload_time = "2024-08-17T09:18:58.54Z" }, + { url = "https://files.pythonhosted.org/packages/a2/b2/9a8ded888b7b190aed75b484eb5c853ddd48aa2896e7b59bbfbce442f0a1/xxhash-3.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3ff2c0a34eae7df88c868be53a8dd56fbdf592109e21d4bfa092a27b0bf4a7bf", size = 210322, upload_time = "2024-08-17T09:18:59.943Z" }, + { url = "https://files.pythonhosted.org/packages/98/62/440083fafbc917bf3e4b67c2ade621920dd905517e85631c10aac955c1d2/xxhash-3.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:4e28503dccc7d32e0b9817aa0cbfc1f45f563b2c995b7a66c4c8a0d232e840c7", size = 414725, upload_time = "2024-08-17T09:19:01.332Z" }, + { url = "https://files.pythonhosted.org/packages/75/db/009206f7076ad60a517e016bb0058381d96a007ce3f79fa91d3010f49cc2/xxhash-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a6c50017518329ed65a9e4829154626f008916d36295b6a3ba336e2458824c8c", size = 192070, upload_time = "2024-08-17T09:19:03.007Z" }, + { url = "https://files.pythonhosted.org/packages/1f/6d/c61e0668943a034abc3a569cdc5aeae37d686d9da7e39cf2ed621d533e36/xxhash-3.5.0-cp313-cp313-win32.whl", hash = "sha256:53a068fe70301ec30d868ece566ac90d873e3bb059cf83c32e76012c889b8637", size = 30172, upload_time = "2024-08-17T09:19:04.355Z" }, + { url = "https://files.pythonhosted.org/packages/96/14/8416dce965f35e3d24722cdf79361ae154fa23e2ab730e5323aa98d7919e/xxhash-3.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:80babcc30e7a1a484eab952d76a4f4673ff601f54d5142c26826502740e70b43", size = 30041, upload_time = "2024-08-17T09:19:05.435Z" }, + { url = "https://files.pythonhosted.org/packages/27/ee/518b72faa2073f5aa8e3262408d284892cb79cf2754ba0c3a5870645ef73/xxhash-3.5.0-cp313-cp313-win_arm64.whl", hash = "sha256:4811336f1ce11cac89dcbd18f3a25c527c16311709a89313c3acaf771def2d4b", size = 26801, upload_time = "2024-08-17T09:19:06.547Z" }, +] + +[[package]] +name = "yarl" +version = "1.20.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "multidict" }, + { name = "propcache" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/62/51/c0edba5219027f6eab262e139f73e2417b0f4efffa23bf562f6e18f76ca5/yarl-1.20.0.tar.gz", hash = "sha256:686d51e51ee5dfe62dec86e4866ee0e9ed66df700d55c828a615640adc885307", size = 185258, upload_time = "2025-04-17T00:45:14.661Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/82/a59d8e21b20ffc836775fa7daedac51d16bb8f3010c4fcb495c4496aa922/yarl-1.20.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fdb5204d17cb32b2de2d1e21c7461cabfacf17f3645e4b9039f210c5d3378bf3", size = 145178, upload_time = "2025-04-17T00:42:04.511Z" }, + { url = "https://files.pythonhosted.org/packages/ba/81/315a3f6f95947cfbf37c92d6fbce42a1a6207b6c38e8c2b452499ec7d449/yarl-1.20.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:eaddd7804d8e77d67c28d154ae5fab203163bd0998769569861258e525039d2a", size = 96859, upload_time = "2025-04-17T00:42:06.43Z" }, + { url = "https://files.pythonhosted.org/packages/ad/17/9b64e575583158551b72272a1023cdbd65af54fe13421d856b2850a6ddb7/yarl-1.20.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:634b7ba6b4a85cf67e9df7c13a7fb2e44fa37b5d34501038d174a63eaac25ee2", size = 94647, upload_time = "2025-04-17T00:42:07.976Z" }, + { url = "https://files.pythonhosted.org/packages/2c/29/8f291e7922a58a21349683f6120a85701aeefaa02e9f7c8a2dc24fe3f431/yarl-1.20.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d409e321e4addf7d97ee84162538c7258e53792eb7c6defd0c33647d754172e", size = 355788, upload_time = "2025-04-17T00:42:09.902Z" }, + { url = "https://files.pythonhosted.org/packages/26/6d/b4892c80b805c42c228c6d11e03cafabf81662d371b0853e7f0f513837d5/yarl-1.20.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:ea52f7328a36960ba3231c6677380fa67811b414798a6e071c7085c57b6d20a9", size = 344613, upload_time = "2025-04-17T00:42:11.768Z" }, + { url = "https://files.pythonhosted.org/packages/d7/0e/517aa28d3f848589bae9593717b063a544b86ba0a807d943c70f48fcf3bb/yarl-1.20.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c8703517b924463994c344dcdf99a2d5ce9eca2b6882bb640aa555fb5efc706a", size = 370953, upload_time = "2025-04-17T00:42:13.983Z" }, + { url = "https://files.pythonhosted.org/packages/5f/9b/5bd09d2f1ad6e6f7c2beae9e50db78edd2cca4d194d227b958955573e240/yarl-1.20.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:077989b09ffd2f48fb2d8f6a86c5fef02f63ffe6b1dd4824c76de7bb01e4f2e2", size = 369204, upload_time = "2025-04-17T00:42:16.386Z" }, + { url = "https://files.pythonhosted.org/packages/9c/85/d793a703cf4bd0d4cd04e4b13cc3d44149470f790230430331a0c1f52df5/yarl-1.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0acfaf1da020253f3533526e8b7dd212838fdc4109959a2c53cafc6db611bff2", size = 358108, upload_time = "2025-04-17T00:42:18.622Z" }, + { url = "https://files.pythonhosted.org/packages/6f/54/b6c71e13549c1f6048fbc14ce8d930ac5fb8bafe4f1a252e621a24f3f1f9/yarl-1.20.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4230ac0b97ec5eeb91d96b324d66060a43fd0d2a9b603e3327ed65f084e41f8", size = 346610, upload_time = "2025-04-17T00:42:20.9Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1a/d6087d58bdd0d8a2a37bbcdffac9d9721af6ebe50d85304d9f9b57dfd862/yarl-1.20.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a6a1e6ae21cdd84011c24c78d7a126425148b24d437b5702328e4ba640a8902", size = 365378, upload_time = "2025-04-17T00:42:22.926Z" }, + { url = "https://files.pythonhosted.org/packages/02/84/e25ddff4cbc001dbc4af76f8d41a3e23818212dd1f0a52044cbc60568872/yarl-1.20.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:86de313371ec04dd2531f30bc41a5a1a96f25a02823558ee0f2af0beaa7ca791", size = 356919, upload_time = "2025-04-17T00:42:25.145Z" }, + { url = "https://files.pythonhosted.org/packages/04/76/898ae362353bf8f64636495d222c8014c8e5267df39b1a9fe1e1572fb7d0/yarl-1.20.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:dd59c9dd58ae16eaa0f48c3d0cbe6be8ab4dc7247c3ff7db678edecbaf59327f", size = 364248, upload_time = "2025-04-17T00:42:27.475Z" }, + { url = "https://files.pythonhosted.org/packages/1b/b0/9d9198d83a622f1c40fdbf7bd13b224a6979f2e1fc2cf50bfb1d8773c495/yarl-1.20.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a0bc5e05f457b7c1994cc29e83b58f540b76234ba6b9648a4971ddc7f6aa52da", size = 378418, upload_time = "2025-04-17T00:42:29.333Z" }, + { url = "https://files.pythonhosted.org/packages/c7/ce/1f50c1cc594cf5d3f5bf4a9b616fca68680deaec8ad349d928445ac52eb8/yarl-1.20.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:c9471ca18e6aeb0e03276b5e9b27b14a54c052d370a9c0c04a68cefbd1455eb4", size = 383850, upload_time = "2025-04-17T00:42:31.668Z" }, + { url = "https://files.pythonhosted.org/packages/89/1e/a59253a87b35bfec1a25bb5801fb69943330b67cfd266278eb07e0609012/yarl-1.20.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:40ed574b4df723583a26c04b298b283ff171bcc387bc34c2683235e2487a65a5", size = 381218, upload_time = "2025-04-17T00:42:33.523Z" }, + { url = "https://files.pythonhosted.org/packages/85/b0/26f87df2b3044b0ef1a7cf66d321102bdca091db64c5ae853fcb2171c031/yarl-1.20.0-cp311-cp311-win32.whl", hash = "sha256:db243357c6c2bf3cd7e17080034ade668d54ce304d820c2a58514a4e51d0cfd6", size = 86606, upload_time = "2025-04-17T00:42:35.873Z" }, + { url = "https://files.pythonhosted.org/packages/33/46/ca335c2e1f90446a77640a45eeb1cd8f6934f2c6e4df7db0f0f36ef9f025/yarl-1.20.0-cp311-cp311-win_amd64.whl", hash = "sha256:8c12cd754d9dbd14204c328915e23b0c361b88f3cffd124129955e60a4fbfcfb", size = 93374, upload_time = "2025-04-17T00:42:37.586Z" }, + { url = "https://files.pythonhosted.org/packages/c3/e8/3efdcb83073df978bb5b1a9cc0360ce596680e6c3fac01f2a994ccbb8939/yarl-1.20.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e06b9f6cdd772f9b665e5ba8161968e11e403774114420737f7884b5bd7bdf6f", size = 147089, upload_time = "2025-04-17T00:42:39.602Z" }, + { url = "https://files.pythonhosted.org/packages/60/c3/9e776e98ea350f76f94dd80b408eaa54e5092643dbf65fd9babcffb60509/yarl-1.20.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b9ae2fbe54d859b3ade40290f60fe40e7f969d83d482e84d2c31b9bff03e359e", size = 97706, upload_time = "2025-04-17T00:42:41.469Z" }, + { url = "https://files.pythonhosted.org/packages/0c/5b/45cdfb64a3b855ce074ae607b9fc40bc82e7613b94e7612b030255c93a09/yarl-1.20.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6d12b8945250d80c67688602c891237994d203d42427cb14e36d1a732eda480e", size = 95719, upload_time = "2025-04-17T00:42:43.666Z" }, + { url = "https://files.pythonhosted.org/packages/2d/4e/929633b249611eeed04e2f861a14ed001acca3ef9ec2a984a757b1515889/yarl-1.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:087e9731884621b162a3e06dc0d2d626e1542a617f65ba7cc7aeab279d55ad33", size = 343972, upload_time = "2025-04-17T00:42:45.391Z" }, + { url = "https://files.pythonhosted.org/packages/49/fd/047535d326c913f1a90407a3baf7ff535b10098611eaef2c527e32e81ca1/yarl-1.20.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:69df35468b66c1a6e6556248e6443ef0ec5f11a7a4428cf1f6281f1879220f58", size = 339639, upload_time = "2025-04-17T00:42:47.552Z" }, + { url = "https://files.pythonhosted.org/packages/48/2f/11566f1176a78f4bafb0937c0072410b1b0d3640b297944a6a7a556e1d0b/yarl-1.20.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b2992fe29002fd0d4cbaea9428b09af9b8686a9024c840b8a2b8f4ea4abc16f", size = 353745, upload_time = "2025-04-17T00:42:49.406Z" }, + { url = "https://files.pythonhosted.org/packages/26/17/07dfcf034d6ae8837b33988be66045dd52f878dfb1c4e8f80a7343f677be/yarl-1.20.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c903e0b42aab48abfbac668b5a9d7b6938e721a6341751331bcd7553de2dcae", size = 354178, upload_time = "2025-04-17T00:42:51.588Z" }, + { url = "https://files.pythonhosted.org/packages/15/45/212604d3142d84b4065d5f8cab6582ed3d78e4cc250568ef2a36fe1cf0a5/yarl-1.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf099e2432131093cc611623e0b0bcc399b8cddd9a91eded8bfb50402ec35018", size = 349219, upload_time = "2025-04-17T00:42:53.674Z" }, + { url = "https://files.pythonhosted.org/packages/e6/e0/a10b30f294111c5f1c682461e9459935c17d467a760c21e1f7db400ff499/yarl-1.20.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a7f62f5dc70a6c763bec9ebf922be52aa22863d9496a9a30124d65b489ea672", size = 337266, upload_time = "2025-04-17T00:42:55.49Z" }, + { url = "https://files.pythonhosted.org/packages/33/a6/6efa1d85a675d25a46a167f9f3e80104cde317dfdf7f53f112ae6b16a60a/yarl-1.20.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:54ac15a8b60382b2bcefd9a289ee26dc0920cf59b05368c9b2b72450751c6eb8", size = 360873, upload_time = "2025-04-17T00:42:57.895Z" }, + { url = "https://files.pythonhosted.org/packages/77/67/c8ab718cb98dfa2ae9ba0f97bf3cbb7d45d37f13fe1fbad25ac92940954e/yarl-1.20.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:25b3bc0763a7aca16a0f1b5e8ef0f23829df11fb539a1b70476dcab28bd83da7", size = 360524, upload_time = "2025-04-17T00:43:00.094Z" }, + { url = "https://files.pythonhosted.org/packages/bd/e8/c3f18660cea1bc73d9f8a2b3ef423def8dadbbae6c4afabdb920b73e0ead/yarl-1.20.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b2586e36dc070fc8fad6270f93242124df68b379c3a251af534030a4a33ef594", size = 365370, upload_time = "2025-04-17T00:43:02.242Z" }, + { url = "https://files.pythonhosted.org/packages/c9/99/33f3b97b065e62ff2d52817155a89cfa030a1a9b43fee7843ef560ad9603/yarl-1.20.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:866349da9d8c5290cfefb7fcc47721e94de3f315433613e01b435473be63daa6", size = 373297, upload_time = "2025-04-17T00:43:04.189Z" }, + { url = "https://files.pythonhosted.org/packages/3d/89/7519e79e264a5f08653d2446b26d4724b01198a93a74d2e259291d538ab1/yarl-1.20.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:33bb660b390a0554d41f8ebec5cd4475502d84104b27e9b42f5321c5192bfcd1", size = 378771, upload_time = "2025-04-17T00:43:06.609Z" }, + { url = "https://files.pythonhosted.org/packages/3a/58/6c460bbb884abd2917c3eef6f663a4a873f8dc6f498561fc0ad92231c113/yarl-1.20.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:737e9f171e5a07031cbee5e9180f6ce21a6c599b9d4b2c24d35df20a52fabf4b", size = 375000, upload_time = "2025-04-17T00:43:09.01Z" }, + { url = "https://files.pythonhosted.org/packages/3b/2a/dd7ed1aa23fea996834278d7ff178f215b24324ee527df53d45e34d21d28/yarl-1.20.0-cp312-cp312-win32.whl", hash = "sha256:839de4c574169b6598d47ad61534e6981979ca2c820ccb77bf70f4311dd2cc64", size = 86355, upload_time = "2025-04-17T00:43:11.311Z" }, + { url = "https://files.pythonhosted.org/packages/ca/c6/333fe0338305c0ac1c16d5aa7cc4841208d3252bbe62172e0051006b5445/yarl-1.20.0-cp312-cp312-win_amd64.whl", hash = "sha256:3d7dbbe44b443b0c4aa0971cb07dcb2c2060e4a9bf8d1301140a33a93c98e18c", size = 92904, upload_time = "2025-04-17T00:43:13.087Z" }, + { url = "https://files.pythonhosted.org/packages/0f/6f/514c9bff2900c22a4f10e06297714dbaf98707143b37ff0bcba65a956221/yarl-1.20.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2137810a20b933b1b1b7e5cf06a64c3ed3b4747b0e5d79c9447c00db0e2f752f", size = 145030, upload_time = "2025-04-17T00:43:15.083Z" }, + { url = "https://files.pythonhosted.org/packages/4e/9d/f88da3fa319b8c9c813389bfb3463e8d777c62654c7168e580a13fadff05/yarl-1.20.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:447c5eadd750db8389804030d15f43d30435ed47af1313303ed82a62388176d3", size = 96894, upload_time = "2025-04-17T00:43:17.372Z" }, + { url = "https://files.pythonhosted.org/packages/cd/57/92e83538580a6968b2451d6c89c5579938a7309d4785748e8ad42ddafdce/yarl-1.20.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:42fbe577272c203528d402eec8bf4b2d14fd49ecfec92272334270b850e9cd7d", size = 94457, upload_time = "2025-04-17T00:43:19.431Z" }, + { url = "https://files.pythonhosted.org/packages/e9/ee/7ee43bd4cf82dddd5da97fcaddb6fa541ab81f3ed564c42f146c83ae17ce/yarl-1.20.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18e321617de4ab170226cd15006a565d0fa0d908f11f724a2c9142d6b2812ab0", size = 343070, upload_time = "2025-04-17T00:43:21.426Z" }, + { url = "https://files.pythonhosted.org/packages/4a/12/b5eccd1109e2097bcc494ba7dc5de156e41cf8309fab437ebb7c2b296ce3/yarl-1.20.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4345f58719825bba29895011e8e3b545e6e00257abb984f9f27fe923afca2501", size = 337739, upload_time = "2025-04-17T00:43:23.634Z" }, + { url = "https://files.pythonhosted.org/packages/7d/6b/0eade8e49af9fc2585552f63c76fa59ef469c724cc05b29519b19aa3a6d5/yarl-1.20.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d9b980d7234614bc4674468ab173ed77d678349c860c3af83b1fffb6a837ddc", size = 351338, upload_time = "2025-04-17T00:43:25.695Z" }, + { url = "https://files.pythonhosted.org/packages/45/cb/aaaa75d30087b5183c7b8a07b4fb16ae0682dd149a1719b3a28f54061754/yarl-1.20.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af4baa8a445977831cbaa91a9a84cc09debb10bc8391f128da2f7bd070fc351d", size = 353636, upload_time = "2025-04-17T00:43:27.876Z" }, + { url = "https://files.pythonhosted.org/packages/98/9d/d9cb39ec68a91ba6e66fa86d97003f58570327d6713833edf7ad6ce9dde5/yarl-1.20.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:123393db7420e71d6ce40d24885a9e65eb1edefc7a5228db2d62bcab3386a5c0", size = 348061, upload_time = "2025-04-17T00:43:29.788Z" }, + { url = "https://files.pythonhosted.org/packages/72/6b/103940aae893d0cc770b4c36ce80e2ed86fcb863d48ea80a752b8bda9303/yarl-1.20.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ab47acc9332f3de1b39e9b702d9c916af7f02656b2a86a474d9db4e53ef8fd7a", size = 334150, upload_time = "2025-04-17T00:43:31.742Z" }, + { url = "https://files.pythonhosted.org/packages/ef/b2/986bd82aa222c3e6b211a69c9081ba46484cffa9fab2a5235e8d18ca7a27/yarl-1.20.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4a34c52ed158f89876cba9c600b2c964dfc1ca52ba7b3ab6deb722d1d8be6df2", size = 362207, upload_time = "2025-04-17T00:43:34.099Z" }, + { url = "https://files.pythonhosted.org/packages/14/7c/63f5922437b873795d9422cbe7eb2509d4b540c37ae5548a4bb68fd2c546/yarl-1.20.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:04d8cfb12714158abf2618f792c77bc5c3d8c5f37353e79509608be4f18705c9", size = 361277, upload_time = "2025-04-17T00:43:36.202Z" }, + { url = "https://files.pythonhosted.org/packages/81/83/450938cccf732466953406570bdb42c62b5ffb0ac7ac75a1f267773ab5c8/yarl-1.20.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7dc63ad0d541c38b6ae2255aaa794434293964677d5c1ec5d0116b0e308031f5", size = 364990, upload_time = "2025-04-17T00:43:38.551Z" }, + { url = "https://files.pythonhosted.org/packages/b4/de/af47d3a47e4a833693b9ec8e87debb20f09d9fdc9139b207b09a3e6cbd5a/yarl-1.20.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d02b591a64e4e6ca18c5e3d925f11b559c763b950184a64cf47d74d7e41877", size = 374684, upload_time = "2025-04-17T00:43:40.481Z" }, + { url = "https://files.pythonhosted.org/packages/62/0b/078bcc2d539f1faffdc7d32cb29a2d7caa65f1a6f7e40795d8485db21851/yarl-1.20.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:95fc9876f917cac7f757df80a5dda9de59d423568460fe75d128c813b9af558e", size = 382599, upload_time = "2025-04-17T00:43:42.463Z" }, + { url = "https://files.pythonhosted.org/packages/74/a9/4fdb1a7899f1fb47fd1371e7ba9e94bff73439ce87099d5dd26d285fffe0/yarl-1.20.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bb769ae5760cd1c6a712135ee7915f9d43f11d9ef769cb3f75a23e398a92d384", size = 378573, upload_time = "2025-04-17T00:43:44.797Z" }, + { url = "https://files.pythonhosted.org/packages/fd/be/29f5156b7a319e4d2e5b51ce622b4dfb3aa8d8204cd2a8a339340fbfad40/yarl-1.20.0-cp313-cp313-win32.whl", hash = "sha256:70e0c580a0292c7414a1cead1e076c9786f685c1fc4757573d2967689b370e62", size = 86051, upload_time = "2025-04-17T00:43:47.076Z" }, + { url = "https://files.pythonhosted.org/packages/52/56/05fa52c32c301da77ec0b5f63d2d9605946fe29defacb2a7ebd473c23b81/yarl-1.20.0-cp313-cp313-win_amd64.whl", hash = "sha256:4c43030e4b0af775a85be1fa0433119b1565673266a70bf87ef68a9d5ba3174c", size = 92742, upload_time = "2025-04-17T00:43:49.193Z" }, + { url = "https://files.pythonhosted.org/packages/d4/2f/422546794196519152fc2e2f475f0e1d4d094a11995c81a465faf5673ffd/yarl-1.20.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b6c4c3d0d6a0ae9b281e492b1465c72de433b782e6b5001c8e7249e085b69051", size = 163575, upload_time = "2025-04-17T00:43:51.533Z" }, + { url = "https://files.pythonhosted.org/packages/90/fc/67c64ddab6c0b4a169d03c637fb2d2a212b536e1989dec8e7e2c92211b7f/yarl-1.20.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8681700f4e4df891eafa4f69a439a6e7d480d64e52bf460918f58e443bd3da7d", size = 106121, upload_time = "2025-04-17T00:43:53.506Z" }, + { url = "https://files.pythonhosted.org/packages/6d/00/29366b9eba7b6f6baed7d749f12add209b987c4cfbfa418404dbadc0f97c/yarl-1.20.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:84aeb556cb06c00652dbf87c17838eb6d92cfd317799a8092cee0e570ee11229", size = 103815, upload_time = "2025-04-17T00:43:55.41Z" }, + { url = "https://files.pythonhosted.org/packages/28/f4/a2a4c967c8323c03689383dff73396281ced3b35d0ed140580825c826af7/yarl-1.20.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f166eafa78810ddb383e930d62e623d288fb04ec566d1b4790099ae0f31485f1", size = 408231, upload_time = "2025-04-17T00:43:57.825Z" }, + { url = "https://files.pythonhosted.org/packages/0f/a1/66f7ffc0915877d726b70cc7a896ac30b6ac5d1d2760613603b022173635/yarl-1.20.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:5d3d6d14754aefc7a458261027a562f024d4f6b8a798adb472277f675857b1eb", size = 390221, upload_time = "2025-04-17T00:44:00.526Z" }, + { url = "https://files.pythonhosted.org/packages/41/15/cc248f0504610283271615e85bf38bc014224122498c2016d13a3a1b8426/yarl-1.20.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a8f64df8ed5d04c51260dbae3cc82e5649834eebea9eadfd829837b8093eb00", size = 411400, upload_time = "2025-04-17T00:44:02.853Z" }, + { url = "https://files.pythonhosted.org/packages/5c/af/f0823d7e092bfb97d24fce6c7269d67fcd1aefade97d0a8189c4452e4d5e/yarl-1.20.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4d9949eaf05b4d30e93e4034a7790634bbb41b8be2d07edd26754f2e38e491de", size = 411714, upload_time = "2025-04-17T00:44:04.904Z" }, + { url = "https://files.pythonhosted.org/packages/83/70/be418329eae64b9f1b20ecdaac75d53aef098797d4c2299d82ae6f8e4663/yarl-1.20.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c366b254082d21cc4f08f522ac201d0d83a8b8447ab562732931d31d80eb2a5", size = 404279, upload_time = "2025-04-17T00:44:07.721Z" }, + { url = "https://files.pythonhosted.org/packages/19/f5/52e02f0075f65b4914eb890eea1ba97e6fd91dd821cc33a623aa707b2f67/yarl-1.20.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91bc450c80a2e9685b10e34e41aef3d44ddf99b3a498717938926d05ca493f6a", size = 384044, upload_time = "2025-04-17T00:44:09.708Z" }, + { url = "https://files.pythonhosted.org/packages/6a/36/b0fa25226b03d3f769c68d46170b3e92b00ab3853d73127273ba22474697/yarl-1.20.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9c2aa4387de4bc3a5fe158080757748d16567119bef215bec643716b4fbf53f9", size = 416236, upload_time = "2025-04-17T00:44:11.734Z" }, + { url = "https://files.pythonhosted.org/packages/cb/3a/54c828dd35f6831dfdd5a79e6c6b4302ae2c5feca24232a83cb75132b205/yarl-1.20.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:d2cbca6760a541189cf87ee54ff891e1d9ea6406079c66341008f7ef6ab61145", size = 402034, upload_time = "2025-04-17T00:44:13.975Z" }, + { url = "https://files.pythonhosted.org/packages/10/97/c7bf5fba488f7e049f9ad69c1b8fdfe3daa2e8916b3d321aa049e361a55a/yarl-1.20.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:798a5074e656f06b9fad1a162be5a32da45237ce19d07884d0b67a0aa9d5fdda", size = 407943, upload_time = "2025-04-17T00:44:16.052Z" }, + { url = "https://files.pythonhosted.org/packages/fd/a4/022d2555c1e8fcff08ad7f0f43e4df3aba34f135bff04dd35d5526ce54ab/yarl-1.20.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f106e75c454288472dbe615accef8248c686958c2e7dd3b8d8ee2669770d020f", size = 423058, upload_time = "2025-04-17T00:44:18.547Z" }, + { url = "https://files.pythonhosted.org/packages/4c/f6/0873a05563e5df29ccf35345a6ae0ac9e66588b41fdb7043a65848f03139/yarl-1.20.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:3b60a86551669c23dc5445010534d2c5d8a4e012163218fc9114e857c0586fdd", size = 423792, upload_time = "2025-04-17T00:44:20.639Z" }, + { url = "https://files.pythonhosted.org/packages/9e/35/43fbbd082708fa42e923f314c24f8277a28483d219e049552e5007a9aaca/yarl-1.20.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e429857e341d5e8e15806118e0294f8073ba9c4580637e59ab7b238afca836f", size = 422242, upload_time = "2025-04-17T00:44:22.851Z" }, + { url = "https://files.pythonhosted.org/packages/ed/f7/f0f2500cf0c469beb2050b522c7815c575811627e6d3eb9ec7550ddd0bfe/yarl-1.20.0-cp313-cp313t-win32.whl", hash = "sha256:65a4053580fe88a63e8e4056b427224cd01edfb5f951498bfefca4052f0ce0ac", size = 93816, upload_time = "2025-04-17T00:44:25.491Z" }, + { url = "https://files.pythonhosted.org/packages/3f/93/f73b61353b2a699d489e782c3f5998b59f974ec3156a2050a52dfd7e8946/yarl-1.20.0-cp313-cp313t-win_amd64.whl", hash = "sha256:53b2da3a6ca0a541c1ae799c349788d480e5144cac47dba0266c7cb6c76151fe", size = 101093, upload_time = "2025-04-17T00:44:27.418Z" }, + { url = "https://files.pythonhosted.org/packages/ea/1f/70c57b3d7278e94ed22d85e09685d3f0a38ebdd8c5c73b65ba4c0d0fe002/yarl-1.20.0-py3-none-any.whl", hash = "sha256:5d0fe6af927a47a230f31e6004621fd0959eaa915fc62acfafa67ff7229a3124", size = 46124, upload_time = "2025-04-17T00:45:12.199Z" }, +]