diff --git a/ENV.md b/ENV.md index c0df0c2d..73f6623a 100644 --- a/ENV.md +++ b/ENV.md @@ -28,14 +28,45 @@ Please ensure these are properly defined in a `.env` file in the root directory. [^1:] The user account in question will require elevated permissions to access certain endpoints. At a minimum, the user will require the `source_collector` and `db_write` permissions. +# Flags + +Flags are used to enable/disable certain features. They are set to `1` to enable the feature and `0` to disable the feature. By default, all flags are enabled. + +## Configuration Flags + +Configuration flags are used to enable/disable certain configurations. + +| Flag | Description | +|--------------|------------------------------------| +| `POST_TO_DISCORD_FLAG` | Enables posting errors to discord. | + + ## Task Flags -Task flags are used to enable/disable certain tasks. They are set to `1` to enable the task and `0` to disable the task. By default, all tasks are enabled. +Task flags are used to enable/disable certain tasks. + +Note that some tasks/subtasks are themselves enabled by other tasks. + +### Scheduled Task Flags + +| Flag | Description | +|-------------------------------------|--------------------------------------------------------------------| +| `SCHEDULED_TASKS_FLAG` | All scheduled tasks. Disabling disables all other scheduled tasks. | +| `SYNC_AGENCIES_TASK_FLAG` | Synchonize agencies from Data Sources App. | +| `SYNC_DATA_SOURCES_TASK_FLAG` | Synchonize data sources from Data Sources App. | +| `PUSH_TO_HUGGING_FACE_TASK_FLAG` | Pushes data to HuggingFace. | +| `POPULATE_BACKLOG_SNAPSHOT_TASK_FLAG` | Populates the backlog snapshot. | +| `DELETE_OLD_LOGS_TASK_FLAG` | Deletes old logs. | +| `RUN_URL_TASKS_TASK_FLAG` | Runs URL tasks. | +| `IA_PROBE_TASK_FLAG` | Extracts and links Internet Archives metadata to URLs. | +| `IA_SAVE_TASK_FLAG` | Saves URLs to Internet Archives. | + +### URL Task Flags + +URL Task Flags are collectively controlled by the `RUN_URL_TASKS_TASK_FLAG` flag. -The following flags are available: -| Flag | Description | -|-------------------------------------|--------------------------------------------------------| -| `SCHEDULED_TASKS_FLAG` | All scheduled tasks. | +| Flag | Description | +|-------------------------------------|--------------------------------------------------------------------| | `URL_HTML_TASK_FLAG` | URL HTML scraping task. | | `URL_RECORD_TYPE_TASK_FLAG` | Automatically assigns Record Types to URLs. | | `URL_AGENCY_IDENTIFICATION_TASK_FLAG` | Automatically assigns and suggests Agencies for URLs. | @@ -45,14 +76,13 @@ The following flags are available: | `URL_AUTO_RELEVANCE_TASK_FLAG` | Automatically assigns Relevances to URLs. | | `URL_PROBE_TASK_FLAG` | Probes URLs for web metadata. | | `URL_ROOT_URL_TASK_FLAG` | Extracts and links Root URLs to URLs. | -| `SYNC_AGENCIES_TASK_FLAG` | Synchonize agencies from Data Sources App. | -| `SYNC_DATA_SOURCES_TASK_FLAG` | Synchonize data sources from Data Sources App. | -| `PUSH_TO_HUGGING_FACE_TASK_FLAG` | Pushes data to HuggingFace. | -| `POPULATE_BACKLOG_SNAPSHOT_TASK_FLAG` | Populates the backlog snapshot. | -| `DELETE_OLD_LOGS_TASK_FLAG` | Deletes old logs. | -| `RUN_URL_TASKS_TASK_FLAG` | Runs URL tasks. | -| `IA_PROBE_TASK_FLAG` | Extracts and links Internet Archives metadata to URLs. | -| `IA_SAVE_TASK_FLAG` | Saves URLs to Internet Archives. | + +### Agency ID Subtasks + +Agency ID Subtasks are collectively disabled by the `URL_AGENCY_IDENTIFICATION_TASK_FLAG` flag. + +| Flag | Description | +|-------------------------------------|--------------------------------------------------------------------| | `AGENCY_ID_HOMEPAGE_MATCH_FLAG` | Enables the homepage match subtask for agency identification. | | `AGENCY_ID_NLP_LOCATION_MATCH_FLAG` | Enables the NLP location match subtask for agency identification. | | `AGENCY_ID_CKAN_FLAG` | Enables the CKAN subtask for agency identification. | diff --git a/src/api/main.py b/src/api/main.py index f17c147f..f4f7db5c 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -39,11 +39,13 @@ from src.external.internet_archives.client import InternetArchivesClient from src.external.pdap.client import PDAPClient from src.external.url_request.core import URLRequestInterface - +from environs import Env @asynccontextmanager async def lifespan(app: FastAPI): env_var_manager = EnvVarManager.get() + env = Env() + env.read_env() # Initialize shared dependencies db_client = DatabaseClient( @@ -57,11 +59,16 @@ async def lifespan(app: FastAPI): session = aiohttp.ClientSession() - task_handler = TaskHandler( - adb_client=adb_client, - discord_poster=DiscordPoster( + if env.bool("POST_TO_DISCORD_FLAG", True): + discord_poster = DiscordPoster( webhook_url=env_var_manager.discord_webhook_url ) + else: + discord_poster = None + + task_handler = TaskHandler( + adb_client=adb_client, + discord_poster=discord_poster ) pdap_client = PDAPClient( access_manager=AccessManager( diff --git a/src/core/tasks/handler.py b/src/core/tasks/handler.py index 6ddca6eb..7f79e3bb 100644 --- a/src/core/tasks/handler.py +++ b/src/core/tasks/handler.py @@ -14,7 +14,7 @@ class TaskHandler: def __init__( self, adb_client: AsyncDatabaseClient, - discord_poster: DiscordPoster + discord_poster: DiscordPoster | None ): self.adb_client = adb_client self.discord_poster = discord_poster @@ -24,7 +24,10 @@ def __init__( self.logger.setLevel(logging.INFO) - async def post_to_discord(self, message: str): + async def post_to_discord(self, message: str) -> None: + if self.discord_poster is None: + print("Post to Discord disabled by POST_TO_DISCORD_FLAG") + return self.discord_poster.post_to_discord(message=message) async def initiate_task_in_db(self, task_type: TaskType) -> int: # @@ -50,9 +53,9 @@ async def handle_task_error(self, run_info: TaskOperatorRunInfo): # task_id=run_info.task_id, error=run_info.message ) - msg: str = f"Task {run_info.task_id} ({run_info.task_type.value}) failed with error: {run_info.message}" + msg: str = f"Task {run_info.task_id} ({run_info.task_type.value}) failed with error: {run_info.message[:100]}..." print(msg) - self.discord_poster.post_to_discord( + await self.post_to_discord( message=msg ) diff --git a/tests/manual/external/discord/__init__.py b/tests/manual/external/discord/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/manual/external/discord/test_post.py b/tests/manual/external/discord/test_post.py new file mode 100644 index 00000000..87b56d23 --- /dev/null +++ b/tests/manual/external/discord/test_post.py @@ -0,0 +1,10 @@ +from discord_poster import DiscordPoster +from environs import Env + +def test_post_to_discord(): + env = Env() + env.read_env() + dp = DiscordPoster( + webhook_url=env.str("PROD_DISCORD_WEBHOOK_URL") + ) + dp.post_to_discord("Testing") \ No newline at end of file