diff --git a/backend/api_v2/api_deployment_views.py b/backend/api_v2/api_deployment_views.py index 47d0aac34d..e636ca01ec 100644 --- a/backend/api_v2/api_deployment_views.py +++ b/backend/api_v2/api_deployment_views.py @@ -3,7 +3,6 @@ import uuid from typing import Any -from configuration.models import Configuration from django.db.models import F, OuterRef, QuerySet, Subquery from django.http import HttpResponse from permissions.permission import IsOwner, IsOwnerOrSharedUserOrSharedToOrg @@ -211,31 +210,15 @@ def get( status=status.HTTP_422_UNPROCESSABLE_ENTITY, ) - # Process completed execution response_status = status.HTTP_422_UNPROCESSABLE_ENTITY if execution_status_value == CeleryTaskState.COMPLETED.value: response_status = status.HTTP_200_OK - # Check if highlight data should be removed using configuration registry - api_deployment = deployment_execution_dto.api - organization = api_deployment.organization if api_deployment else None - enable_highlight = False # Safe default if the key is unavailable (e.g., OSS) - # Check if the configuration key exists (Cloud deployment) or use settings (OSS) - from configuration.config_registry import ConfigurationRegistry - - if ConfigurationRegistry.is_config_key_available( - "ENABLE_HIGHLIGHT_API_DEPLOYMENT" - ): - enable_highlight = Configuration.get_value_by_organization( - config_key="ENABLE_HIGHLIGHT_API_DEPLOYMENT", - organization=organization, - ) - if not enable_highlight: - response.remove_result_metadata_keys(["highlight_data"]) - response.remove_result_metadata_keys(["extracted_text"]) - if not include_metadata: - response.remove_result_metadata_keys() - if not include_metrics: - response.remove_result_metrics() + DeploymentHelper.process_completed_execution( + response=response, + deployment_execution_dto=deployment_execution_dto, + include_metadata=include_metadata, + include_metrics=include_metrics, + ) return Response( data={ "status": response.execution_status, diff --git a/backend/api_v2/deployment_helper.py b/backend/api_v2/deployment_helper.py index bfbff58b7b..86a0152953 100644 --- a/backend/api_v2/deployment_helper.py +++ b/backend/api_v2/deployment_helper.py @@ -5,6 +5,7 @@ from urllib.parse import urlencode, urlparse import requests +from configuration.config_registry import ConfigurationRegistry from configuration.models import Configuration from django.conf import settings from django.core.files.uploadedfile import InMemoryUploadedFile, UploadedFile @@ -13,6 +14,7 @@ from rest_framework.serializers import Serializer from rest_framework.utils.serializer_helpers import ReturnDict from tags.models import Tag +from usage_v2.helper import UsageHelper from utils.constants import Account, CeleryQueue from utils.local_context import StateStore from workflow_manager.endpoint_v2.destination import DestinationConnector @@ -258,11 +260,12 @@ def execute_workflow( result.status_api = DeploymentHelper.construct_status_endpoint( api_endpoint=api.api_endpoint, execution_id=execution_id ) - # Check if highlight data should be removed using configuration registry + # Ensure workflow identification keys are always in item metadata organization = api.organization if api else None + org_id = str(organization.organization_id) if organization else "" + cls._enrich_result_with_workflow_metadata(result, organization_id=org_id) + # Check if highlight data should be removed using configuration registry enable_highlight = False # Safe default if the key is unavailable (e.g., OSS) - from configuration.config_registry import ConfigurationRegistry - if ConfigurationRegistry.is_config_key_available( "ENABLE_HIGHLIGHT_API_DEPLOYMENT" ): @@ -273,8 +276,10 @@ def execute_workflow( if not enable_highlight: result.remove_result_metadata_keys(["highlight_data"]) result.remove_result_metadata_keys(["extracted_text"]) + if include_metadata or include_metrics: + cls._enrich_result_with_usage_metadata(result) if not include_metadata: - result.remove_result_metadata_keys() + result.remove_inner_result_metadata() if not include_metrics: result.remove_result_metrics() except Exception as error: @@ -293,6 +298,145 @@ def execute_workflow( ) return APIExecutionResponseSerializer(result).data + @staticmethod + def _enrich_item_inner_metadata( + item: dict, file_exec_id: str, usage_helper: Any + ) -> None: + """Inject per-model usage breakdown into item['result']['metadata'].""" + inner_result = item.get("result") + if not isinstance(inner_result, dict): + return + metadata = inner_result.get("metadata") + if not isinstance(metadata, dict): + return + usage_by_model = usage_helper.get_usage_by_model(file_exec_id) + if usage_by_model: + metadata.update(usage_by_model) + + @staticmethod + def _enrich_item_top_metadata( + item: dict, file_exec_id: str, usage_helper: Any + ) -> None: + """Inject aggregated usage totals into item['metadata']['usage'].""" + item_metadata = item.get("metadata") + if not isinstance(item_metadata, dict): + return + aggregated = usage_helper.get_aggregated_token_count(file_exec_id) + if aggregated: + aggregated["file_execution_id"] = file_exec_id + item_metadata["usage"] = aggregated + + @staticmethod + def _enrich_result_with_usage_metadata(result: ExecutionResponse) -> None: + """Enrich each file result's metadata with usage data. + + For each file_execution_id: + 1. Injects per-model cost arrays (extraction_llm, challenge_llm, + embedding) into item["result"]["metadata"]. + 2. Injects aggregated usage totals into item["metadata"]["usage"], + matching the legacy response format. + """ + if not isinstance(result.result, list): + return + + for item in result.result: + if not isinstance(item, dict): + continue + file_exec_id = item.get("file_execution_id") + if not file_exec_id: + continue + DeploymentHelper._enrich_item_inner_metadata(item, file_exec_id, UsageHelper) + DeploymentHelper._enrich_item_top_metadata(item, file_exec_id, UsageHelper) + + @staticmethod + def _enrich_item_workflow_metadata( + item: dict, + file_exec_id: str, + fe_lookup: dict, + workflow_execution: Any, + organization_id: str, + tag_names: list[str], + ) -> None: + """Populate workflow identification keys into item['metadata'].""" + if not isinstance(item.get("metadata"), dict): + item["metadata"] = {} + metadata = item["metadata"] + fe = fe_lookup.get(str(file_exec_id)) + we = fe.workflow_execution if fe else workflow_execution + if fe: + metadata.setdefault("source_name", fe.file_name) + metadata.setdefault("source_hash", fe.file_hash or "") + metadata.setdefault("file_execution_id", str(fe.id)) + metadata.setdefault("total_elapsed_time", fe.execution_time) + if we: + metadata.setdefault("workflow_id", str(we.workflow_id)) + metadata.setdefault("execution_id", str(we.id)) + metadata.setdefault( + "workflow_start_time", + we.created_at.timestamp() if we.created_at else None, + ) + metadata.setdefault("organization_id", organization_id) + metadata.setdefault("tags", tag_names) + + @staticmethod + def _enrich_result_with_workflow_metadata( + result: ExecutionResponse, + organization_id: str, + ) -> None: + """Ensure workflow identification keys are always present in item metadata. + + Uses setdefault() — fills in MISSING keys only, never overwrites + values already present from the workers cache. + """ + if not isinstance(result.result, list): + return + + # 1. Collect file_execution_ids + file_exec_ids = [ + item.get("file_execution_id") + for item in result.result + if isinstance(item, dict) and item.get("file_execution_id") + ] + if not file_exec_ids: + return + + # 2. Batch query (single JOIN query for all file executions) + # Local import to avoid circular dependency: + # deployment_helper → file_execution.models → workflow_v2.models + # → workflow_v2.models.execution → api_v2.models + from workflow_manager.file_execution.models import WorkflowFileExecution + + fe_lookup = { + str(fe.id): fe + for fe in WorkflowFileExecution.objects.filter( + id__in=file_exec_ids + ).select_related("workflow_execution") + } + + # 3. Get execution-level data (tags) — one M2M query + workflow_execution = None + tag_names: list[str] = [] + if fe_lookup: + first_fe = next(iter(fe_lookup.values())) + workflow_execution = first_fe.workflow_execution + tag_names = list(workflow_execution.tags.values_list("name", flat=True)) + + # 4. Enrich each item + for item in result.result: + if not isinstance(item, dict): + continue + file_exec_id = item.get("file_execution_id") + if not file_exec_id: + continue + DeploymentHelper._enrich_item_workflow_metadata( + item=item, + file_exec_id=file_exec_id, + fe_lookup=fe_lookup, + workflow_execution=workflow_execution, + organization_id=organization_id, + tag_names=tag_names, + ) + @staticmethod def get_execution_status(execution_id: str) -> ExecutionResponse: """Current status of api execution. @@ -308,6 +452,38 @@ def get_execution_status(execution_id: str) -> ExecutionResponse: ) return execution_response + @staticmethod + def process_completed_execution( + response: ExecutionResponse, + deployment_execution_dto: Any, + include_metadata: bool, + include_metrics: bool, + ) -> None: + """Enrich and clean up the response for a completed execution.""" + api_deployment = deployment_execution_dto.api + organization = api_deployment.organization if api_deployment else None + org_id = str(organization.organization_id) if organization else "" + DeploymentHelper._enrich_result_with_workflow_metadata( + response, organization_id=org_id + ) + enable_highlight = False + if ConfigurationRegistry.is_config_key_available( + "ENABLE_HIGHLIGHT_API_DEPLOYMENT" + ): + enable_highlight = Configuration.get_value_by_organization( + config_key="ENABLE_HIGHLIGHT_API_DEPLOYMENT", + organization=organization, + ) + if not enable_highlight: + response.remove_result_metadata_keys(["highlight_data"]) + response.remove_result_metadata_keys(["extracted_text"]) + if include_metadata or include_metrics: + DeploymentHelper._enrich_result_with_usage_metadata(response) + if not include_metadata: + response.remove_inner_result_metadata() + if not include_metrics: + response.remove_result_metrics() + @staticmethod def fetch_presigned_file(url: str) -> InMemoryUploadedFile: """Fetch a file from a presigned URL and convert it to an uploaded file. diff --git a/backend/backend/internal_base_urls.py b/backend/backend/internal_base_urls.py index 065a635916..0354a691ae 100644 --- a/backend/backend/internal_base_urls.py +++ b/backend/backend/internal_base_urls.py @@ -263,4 +263,10 @@ def test_middleware_debug(request): include("usage_v2.internal_urls"), name="usage_internal", ), + # Prompt Studio IDE callback APIs + path( + "v1/prompt-studio/", + include("prompt_studio.prompt_studio_core_v2.internal_urls"), + name="prompt_studio_internal", + ), ] diff --git a/backend/backend/worker_celery.py b/backend/backend/worker_celery.py new file mode 100644 index 0000000000..63cade66f8 --- /dev/null +++ b/backend/backend/worker_celery.py @@ -0,0 +1,114 @@ +"""Lightweight Celery app for dispatching tasks to worker-v2 workers. + +The Django backend already has a Celery app for internal tasks (beat, +periodic tasks, etc.) whose broker URL is set via CELERY_BROKER_URL. +Workers use the same broker. This module provides a second Celery app +instance that reuses the same broker URL (from Django settings) but +bypasses Celery's env-var-takes-priority behaviour so it can coexist +with the main Django Celery app in the same process. + +Problem: Celery reads the ``CELERY_BROKER_URL`` environment variable +with highest priority — overriding constructor args, ``conf.update()``, +and ``config_from_object()``. + +Solution: Subclass Celery and override ``connection_for_write`` / +``connection_for_read`` so they always use our explicit broker URL, +bypassing the config resolution chain entirely. +""" + +import logging +import threading +from urllib.parse import quote_plus + +from celery import Celery +from django.conf import settings + +logger = logging.getLogger(__name__) + +_worker_app: Celery | None = None +_worker_app_lock = threading.Lock() + + +class _WorkerDispatchCelery(Celery): + """Celery subclass that forces an explicit broker URL. + + Works around Celery's env-var-takes-priority behaviour where + ``CELERY_BROKER_URL`` always overrides per-app configuration. + The connection methods are the actual points where Celery opens + AMQP/Redis connections, so overriding them is both sufficient + and safe. + + NOTE: ``connection_for_write`` / ``connection_for_read`` are Celery + internals, not public API. Verified against celery>=5.3.4 (see + backend/pyproject.toml). Re-verify on major Celery upgrades. + """ + + _explicit_broker: str | None = None + + def connection_for_write(self, url=None, *args, **kwargs): + return super().connection_for_write(url or self._explicit_broker, *args, **kwargs) + + def connection_for_read(self, url=None, *args, **kwargs): + return super().connection_for_read(url or self._explicit_broker, *args, **kwargs) + + +def get_worker_celery_app() -> Celery: + """Get or create a Celery app for dispatching to worker-v2 workers. + + The app uses: + - Same broker as the workers (built from CELERY_BROKER_BASE_URL, + CELERY_BROKER_USER, CELERY_BROKER_PASS via Django settings) + - Same PostgreSQL result backend as the Django Celery app + + Returns: + Celery app configured for worker-v2 dispatch. + """ + global _worker_app + if _worker_app is not None: + return _worker_app + + with _worker_app_lock: + # Double-check after acquiring lock + if _worker_app is not None: + return _worker_app + + # Reuse the broker URL already built by Django settings (base.py) + # from CELERY_BROKER_BASE_URL + CELERY_BROKER_USER + CELERY_BROKER_PASS + broker_url = settings.CELERY_BROKER_URL + + # Reuse the same PostgreSQL result backend as Django's Celery app + result_backend = ( + f"db+postgresql://{quote_plus(settings.DB_USER)}:" + f"{quote_plus(settings.DB_PASSWORD)}" + f"@{settings.DB_HOST}:{settings.DB_PORT}/" + f"{settings.CELERY_BACKEND_DB_NAME}" + ) + + app = _WorkerDispatchCelery( + "worker-dispatch", + set_as_current=False, + fixups=[], + ) + # Store the explicit broker URL for use in connection overrides + app._explicit_broker = broker_url + + app.conf.update( + result_backend=result_backend, + task_serializer="json", + accept_content=["json"], + result_serializer="json", + result_extended=True, + ) + + _worker_app = app + # Log broker host only (mask credentials) + safe_broker = broker_url.split("@")[-1] if "@" in broker_url else broker_url + safe_backend = ( + result_backend.split("@")[-1] if "@" in result_backend else result_backend + ) + logger.info( + "Created worker dispatch Celery app (broker=%s, result_backend=%s)", + safe_broker, + safe_backend, + ) + return _worker_app diff --git a/backend/prompt_studio/prompt_studio_core_v2/internal_urls.py b/backend/prompt_studio/prompt_studio_core_v2/internal_urls.py new file mode 100644 index 0000000000..19b5c1b376 --- /dev/null +++ b/backend/prompt_studio/prompt_studio_core_v2/internal_urls.py @@ -0,0 +1,24 @@ +"""Internal API URLs for Prompt Studio IDE callbacks.""" + +from django.urls import path + +from . import internal_views + +app_name = "prompt_studio_internal" + +urlpatterns = [ + path("output/", internal_views.prompt_output, name="prompt-output"), + path("index/", internal_views.index_update, name="index-update"), + path("indexing-status/", internal_views.indexing_status, name="indexing-status"), + path( + "profile//", + internal_views.profile_detail, + name="profile-detail", + ), + path("hubspot-notify/", internal_views.hubspot_notify, name="hubspot-notify"), + path( + "summary-index-key/", + internal_views.summary_index_key, + name="summary-index-key", + ), +] diff --git a/backend/prompt_studio/prompt_studio_core_v2/internal_views.py b/backend/prompt_studio/prompt_studio_core_v2/internal_views.py new file mode 100644 index 0000000000..ba0f0ff8c4 --- /dev/null +++ b/backend/prompt_studio/prompt_studio_core_v2/internal_views.py @@ -0,0 +1,373 @@ +"""Internal API views for Prompt Studio IDE callbacks. + +These endpoints are called by the ide_callback worker (via InternalAPIClient) +to perform Django ORM operations that were previously done directly in the +backend callback tasks. Moving these behind HTTP keeps the worker image +free of Django dependencies. + +Security note: @csrf_exempt is safe here because these endpoints are +internal-only (called by backend workers via service-to-service HTTP, +not by browsers). They are bound to the internal URL namespace and are +not exposed to end users. +""" + +import json +import logging + +from django.http import JsonResponse +from django.views.decorators.csrf import csrf_exempt +from django.views.decorators.http import require_http_methods +from rest_framework import status + +logger = logging.getLogger(__name__) + +_ERR_INVALID_JSON = "Invalid JSON" + + +def _parse_json_body(request): + """Parse JSON from request body, returning (data, None) or (None, JsonResponse).""" + try: + return json.loads(request.body), None + except json.JSONDecodeError: + return None, JsonResponse( + {"success": False, "error": _ERR_INVALID_JSON}, + status=status.HTTP_400_BAD_REQUEST, + ) + + +@csrf_exempt +@require_http_methods(["POST"]) +def prompt_output(request): + """Persist prompt execution output via OutputManagerHelper. + + Expected JSON payload: + { + "run_id": str, + "prompt_ids": [str, ...], + "outputs": dict, + "document_id": str, + "is_single_pass_extract": bool, + "profile_manager_id": str | null, + "metadata": dict + } + """ + data, err = _parse_json_body(request) + if err: + return err + + run_id = data.get("run_id", "") + prompt_ids = data.get("prompt_ids", []) + outputs = data.get("outputs", {}) + document_id = data.get("document_id", "") + is_single_pass = data.get("is_single_pass_extract", False) + profile_manager_id = data.get("profile_manager_id") + metadata = data.get("metadata", {}) + + if not prompt_ids or not document_id: + return JsonResponse( + {"success": False, "error": "prompt_ids and document_id are required"}, + status=status.HTTP_400_BAD_REQUEST, + ) + + try: + from prompt_studio.prompt_studio_output_manager_v2.output_manager_helper import ( + OutputManagerHelper, + ) + from prompt_studio.prompt_studio_v2.models import ToolStudioPrompt + + prompts = list( + ToolStudioPrompt.objects.filter(prompt_id__in=prompt_ids).order_by( + "sequence_number" + ) + ) + + response = OutputManagerHelper.handle_prompt_output_update( + run_id=run_id, + prompts=prompts, + outputs=outputs, + document_id=document_id, + is_single_pass_extract=is_single_pass, + profile_manager_id=profile_manager_id, + metadata=metadata, + ) + return JsonResponse({"success": True, "data": response}) + + except Exception as e: + logger.exception("prompt_output internal API failed") + return JsonResponse( + {"success": False, "error": str(e)}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + +@csrf_exempt +@require_http_methods(["POST"]) +def index_update(request): + """Update IndexManager after successful indexing. + + Expected JSON payload: + { + "document_id": str, + "profile_manager_id": str, + "doc_id": str, + "is_summary": bool (optional, default false) + } + """ + data, err = _parse_json_body(request) + if err: + return err + + document_id = data.get("document_id", "") + profile_manager_id = data.get("profile_manager_id", "") + doc_id = data.get("doc_id", "") + is_summary = data.get("is_summary", False) + + if not document_id or not profile_manager_id or not doc_id: + return JsonResponse( + { + "success": False, + "error": "document_id, profile_manager_id, and doc_id are required", + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + try: + from prompt_studio.prompt_profile_manager_v2.models import ProfileManager + from prompt_studio.prompt_studio_index_manager_v2.prompt_studio_index_helper import ( + PromptStudioIndexHelper, + ) + + profile_manager = ProfileManager.objects.get(pk=profile_manager_id) + PromptStudioIndexHelper.handle_index_manager( + document_id=document_id, + profile_manager=profile_manager, + doc_id=doc_id, + is_summary=is_summary, + ) + return JsonResponse({"success": True}) + + except Exception as e: + logger.exception("index_update internal API failed") + return JsonResponse( + {"success": False, "error": str(e)}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + +@csrf_exempt +@require_http_methods(["POST"]) +def indexing_status(request): + """Update document indexing cache status (mark indexed or remove). + + Expected JSON payload: + { + "action": "mark_indexed" | "remove", + "org_id": str, + "user_id": str, + "doc_id_key": str, + "doc_id": str (required when action == "mark_indexed") + } + """ + data, err = _parse_json_body(request) + if err: + return err + + action = data.get("action", "") + org_id = data.get("org_id", "") + user_id = data.get("user_id", "") + doc_id_key = data.get("doc_id_key", "") + + if not action or not org_id or not user_id or not doc_id_key: + return JsonResponse( + { + "success": False, + "error": "action, org_id, user_id, doc_id_key are required", + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + try: + from prompt_studio.prompt_studio_core_v2.document_indexing_service import ( + DocumentIndexingService, + ) + + if action == "mark_indexed": + doc_id = data.get("doc_id", "") + if not doc_id: + return JsonResponse( + {"success": False, "error": "doc_id required for mark_indexed"}, + status=status.HTTP_400_BAD_REQUEST, + ) + DocumentIndexingService.mark_document_indexed( + org_id=org_id, user_id=user_id, doc_id_key=doc_id_key, doc_id=doc_id + ) + elif action == "remove": + DocumentIndexingService.remove_document_indexing( + org_id=org_id, user_id=user_id, doc_id_key=doc_id_key + ) + else: + return JsonResponse( + {"success": False, "error": f"Unknown action: {action}"}, + status=status.HTTP_400_BAD_REQUEST, + ) + + return JsonResponse({"success": True}) + + except Exception as e: + logger.exception("indexing_status internal API failed") + return JsonResponse( + {"success": False, "error": str(e)}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + +@require_http_methods(["GET"]) +def profile_detail(request, profile_id): + """Return profile manager details needed by the worker for summary indexing. + + Returns vector_store, embedding_model, x2text adapter IDs and chunk_overlap. + """ + try: + from prompt_studio.prompt_profile_manager_v2.models import ProfileManager + + profile = ProfileManager.objects.get(pk=profile_id) + return JsonResponse( + { + "success": True, + "data": { + "profile_id": str(profile.profile_id), + "vector_store_id": str(profile.vector_store_id), + "embedding_model_id": str(profile.embedding_model_id), + "x2text_id": str(profile.x2text_id), + "chunk_overlap": profile.chunk_overlap, + "chunk_size": profile.chunk_size, + }, + } + ) + + except Exception as e: + logger.exception("profile_detail internal API failed") + return JsonResponse( + {"success": False, "error": str(e)}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + +@csrf_exempt +@require_http_methods(["POST"]) +def hubspot_notify(request): + """Fire a HubSpot event for a given user. + + Expected JSON payload: + { + "user_id": str, + "event_name": str, + "is_first_for_org": bool, + "action_label": str + } + """ + data, err = _parse_json_body(request) + if err: + return err + + user_id = data.get("user_id", "") + event_name = data.get("event_name", "") + is_first_for_org = data.get("is_first_for_org", False) + action_label = data.get("action_label", "") + + if not user_id or not event_name: + return JsonResponse( + {"success": False, "error": "user_id and event_name are required"}, + status=status.HTTP_400_BAD_REQUEST, + ) + + try: + from django.contrib.auth import get_user_model + from utils.hubspot_notify import notify_hubspot_event + + user_model = get_user_model() + user = user_model.objects.get(pk=user_id) + notify_hubspot_event( + user=user, + event_name=event_name, + is_first_for_org=is_first_for_org, + action_label=action_label, + ) + return JsonResponse({"success": True}) + + except Exception as e: + logger.warning("hubspot_notify internal API failed: %s", e) + return JsonResponse( + {"success": False, "error": str(e)}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + +@csrf_exempt +@require_http_methods(["POST"]) +def summary_index_key(request): + """Compute summary doc_id hash server-side. + + This requires PromptIdeBaseTool (Django ORM + SDK1) which is only + available on the backend image, not the workers image. + + Expected JSON payload: + { + "summary_profile_id": str, + "summarize_file_path": str, + "org_id": str + } + """ + data, err = _parse_json_body(request) + if err: + return err + + summary_profile_id = data.get("summary_profile_id", "") + summarize_file_path = data.get("summarize_file_path", "") + org_id = data.get("org_id", "") + + if not summary_profile_id or not summarize_file_path or not org_id: + return JsonResponse( + { + "success": False, + "error": "summary_profile_id, summarize_file_path, and org_id are required", + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + try: + from utils.file_storage.constants import FileStorageKeys + + from prompt_studio.prompt_profile_manager_v2.models import ProfileManager + from prompt_studio.prompt_studio_core_v2.prompt_ide_base_tool import ( + PromptIdeBaseTool, + ) + from unstract.sdk1.constants import LogLevel + from unstract.sdk1.file_storage.constants import StorageType + from unstract.sdk1.file_storage.env_helper import EnvHelper + from unstract.sdk1.utils.indexing import IndexingUtils + + profile = ProfileManager.objects.get(pk=summary_profile_id) + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id) + doc_id = IndexingUtils.generate_index_key( + vector_db=str(profile.vector_store_id), + embedding=str(profile.embedding_model_id), + x2text=str(profile.x2text_id), + chunk_size="0", + chunk_overlap=str(profile.chunk_overlap), + file_path=summarize_file_path, + fs=fs_instance, + tool=util, + ) + return JsonResponse({"success": True, "data": {"doc_id": doc_id}}) + + except Exception as e: + logger.exception("summary_index_key internal API failed") + return JsonResponse( + {"success": False, "error": str(e)}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py index b3dcbc95d1..d53aaf8b37 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py +++ b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py @@ -29,6 +29,7 @@ ExecutionSource, IndexingStatus, LogLevels, + ToolStudioKeys, ToolStudioPromptKeys, ) from prompt_studio.prompt_studio_core_v2.constants import IndexingConstants as IKeys @@ -47,7 +48,6 @@ NoPromptsFound, OperationNotSupported, PermissionError, - ToolNotValid, ) from prompt_studio.prompt_studio_core_v2.migration_utils import ( SummarizeMigrationUtils, @@ -68,9 +68,10 @@ from unstract.core.pubsub_helper import LogPublisher from unstract.sdk1.constants import LogLevel from unstract.sdk1.exceptions import IndexingError, SdkError +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher from unstract.sdk1.file_storage.constants import StorageType from unstract.sdk1.file_storage.env_helper import EnvHelper -from unstract.sdk1.prompt import PromptTool from unstract.sdk1.utils.indexing import IndexingUtils from unstract.sdk1.utils.tool import ToolUtils @@ -182,6 +183,9 @@ def validate_profile_manager_owner_access( the action. """ profile_manager_owner = profile_manager.created_by + if profile_manager_owner is None: + # No owner on this profile manager — skip ownership validation + return is_llm_owned = ( profile_manager.llm.shared_to_org @@ -263,10 +267,935 @@ def _publish_log( component: dict[str, str], level: str, state: str, message: str ) -> None: LogPublisher.publish( - StateStore.get(Common.LOG_EVENTS_ID), - LogPublisher.log_prompt(component, level, state, message), + channel_id=StateStore.get(Common.LOG_EVENTS_ID), + payload=LogPublisher.log_progress(component, level, state, message), ) + @staticmethod + def _get_dispatcher() -> ExecutionDispatcher: + """Get an ExecutionDispatcher backed by the worker Celery app. + + Uses the RabbitMQ-backed Celery app (not the Django Redis one) + so tasks reach the worker-v2 executor worker. + """ + from backend.worker_celery import ( + get_worker_celery_app, # Lazy import: avoids Django/Celery circular init + ) + + return ExecutionDispatcher(celery_app=get_worker_celery_app()) + + @staticmethod + def _get_platform_api_key(org_id: str) -> str: + """Get the platform API key for the given organization.""" + # Lazy import: avoids Django app registry init order + from platform_settings_v2.platform_auth_service import ( + PlatformAuthenticationService, + ) + + platform_key = PlatformAuthenticationService.get_active_platform_key(org_id) + if not platform_key: + raise ValueError( + f"No active platform API key found for organization {org_id}. " + "Cannot dispatch executor task." + ) + return str(platform_key.key) + + @staticmethod + def _build_summarize_params( + tool: "CustomTool", + default_profile: "ProfileManager", + directory: str, + stem: str, + extract_file_path: str, + platform_api_key: str, + ) -> tuple[dict[str, Any] | None, str, "ProfileManager"]: + """Build summarize_params dict if summarization is enabled. + + Returns: + (summarize_params or None, summarize_file_path, summary_profile). + """ + if not tool.summarize_context: + return None, "", default_profile + + SummarizeMigrationUtils.migrate_tool_to_adapter_based(tool) + summary_profile = default_profile + if not tool.summarize_llm_adapter: + try: + sp = ProfileManager.objects.get( + prompt_studio_tool=tool, is_summarize_llm=True + ) + sp.chunk_size = 0 + summary_profile = sp + except ProfileManager.DoesNotExist: + pass + + if summary_profile != default_profile: + PromptStudioHelper.validate_adapter_status(summary_profile) + PromptStudioHelper.validate_profile_manager_owner_access(summary_profile) + + llm_adapter_id = ( + str(tool.summarize_llm_adapter.id) + if tool.summarize_llm_adapter + else str(summary_profile.llm.id) + ) + + prompts = PromptStudioHelper.fetch_prompt_from_tool(tool.tool_id) + prompt_keys = [p.prompt_key for p in prompts] + + summarize_file_path = os.path.join(directory, "summarize", stem + ".txt") + + summarize_params = { + "llm_adapter_instance_id": llm_adapter_id, + "summarize_prompt": tool.summarize_prompt or "", + "extract_file_path": extract_file_path, + "summarize_file_path": summarize_file_path, + "platform_api_key": platform_api_key, + "prompt_keys": prompt_keys, + } + return summarize_params, summarize_file_path, summary_profile + + @staticmethod + def _build_prompt_output( + prompt: "ToolStudioPrompt", + profile_manager: "ProfileManager", + vector_db: str, + embedding_model: str, + llm: str, + x2text: str, + monitor_llm: str, + tool: "CustomTool", + doc_name: str, + org_id: str, + user_id: str, + tool_id: str, + document_id: str, + ) -> dict[str, Any]: + """Build the output dict for a single prompt in bulk fetch.""" + output: dict[str, Any] = {} + output[TSPKeys.PROMPT] = prompt.prompt + output[TSPKeys.ACTIVE] = prompt.active + output[TSPKeys.REQUIRED] = prompt.required + output[TSPKeys.CHUNK_SIZE] = profile_manager.chunk_size + output[TSPKeys.VECTOR_DB] = vector_db + output[TSPKeys.EMBEDDING] = embedding_model + output[TSPKeys.CHUNK_OVERLAP] = profile_manager.chunk_overlap + output[TSPKeys.LLM] = llm + output[TSPKeys.TYPE] = prompt.enforce_type + output[TSPKeys.NAME] = prompt.prompt_key + output[TSPKeys.RETRIEVAL_STRATEGY] = profile_manager.retrieval_strategy + output[TSPKeys.SIMILARITY_TOP_K] = profile_manager.similarity_top_k + output[TSPKeys.SECTION] = profile_manager.section + output[TSPKeys.X2TEXT_ADAPTER] = x2text + + webhook_enabled = bool(prompt.enable_postprocessing_webhook) + webhook_url = (prompt.postprocessing_webhook_url or "").strip() + if webhook_enabled and not webhook_url: + webhook_enabled = False + output[TSPKeys.ENABLE_POSTPROCESSING_WEBHOOK] = webhook_enabled + if webhook_enabled: + output[TSPKeys.POSTPROCESSING_WEBHOOK_URL] = webhook_url + + output[TSPKeys.EVAL_SETTINGS] = {} + output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_EVALUATE] = prompt.evaluate + output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_MONITOR_LLM] = [monitor_llm] + output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_EXCLUDE_FAILED] = ( + tool.exclude_failed + ) + for attr in dir(prompt): + if attr.startswith(TSPKeys.EVAL_METRIC_PREFIX): + output[TSPKeys.EVAL_SETTINGS][attr] = getattr(prompt, attr) + + output = PromptStudioHelper.fetch_table_settings_if_enabled( + doc_name, prompt, org_id, user_id, tool_id, output + ) + variable_map = PromptStudioVariableService.frame_variable_replacement_map( + doc_id=document_id, prompt_object=prompt + ) + if variable_map: + output[TSPKeys.VARIABLE_MAP] = variable_map + return output + + @staticmethod + def _wait_for_indexing( + org_id: str, user_id: str, doc_id_key: str + ) -> dict[str, str] | None: + """Poll until an in-progress indexing completes or times out. + + Returns: + Completed/pending result dict, or ``None`` if indexing failed + and the caller should re-index. + """ + if not DocumentIndexingService.is_document_indexing( + org_id=org_id, user_id=user_id, doc_id_key=doc_id_key + ): + return None + + logger.info( + "Document %s is already being indexed; " + "waiting for completion before proceeding.", + doc_id_key, + ) + poll_interval = 2 # seconds + max_wait = 300 # 5 minutes + elapsed = 0 + while elapsed < max_wait: + time.sleep(poll_interval) + elapsed += poll_interval + indexed_doc_id = DocumentIndexingService.get_indexed_document_id( + org_id=org_id, user_id=user_id, doc_id_key=doc_id_key + ) + if indexed_doc_id: + return { + "status": IndexingStatus.COMPLETED_STATUS.value, + "output": indexed_doc_id, + } + if not DocumentIndexingService.is_document_indexing( + org_id=org_id, user_id=user_id, doc_id_key=doc_id_key + ): + return None + # Timed out — return PENDING as safety net + return { + "status": IndexingStatus.PENDING_STATUS.value, + "output": IndexingStatus.DOCUMENT_BEING_INDEXED.value, + } + + # ------------------------------------------------------------------ + # Phase 5B — Payload builders for fire-and-forget dispatch + # ------------------------------------------------------------------ + + @staticmethod + def build_index_payload( + tool_id: str, + file_name: str, + org_id: str, + user_id: str, + document_id: str, + run_id: str, + ) -> tuple[ExecutionContext, dict[str, Any]]: + """Build ide_index ExecutionContext for fire-and-forget dispatch. + + Does ORM validation synchronously, then returns the execution + context so the caller can dispatch with callbacks. Summarization + is deferred to the executor worker via ``summarize_params``. + """ + tool: CustomTool = CustomTool.objects.get(pk=tool_id) + file_path = PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( + org_id, + is_create=False, + user_id=user_id, + tool_id=tool_id, + ) + file_path = str(Path(file_path) / file_name) + + default_profile = ProfileManager.get_default_llm_profile(tool) + if not default_profile: + raise DefaultProfileError() + + PromptStudioHelper.validate_adapter_status(default_profile) + PromptStudioHelper.validate_profile_manager_owner_access(default_profile) + + # Common path decomposition used by extract, summarize, and index + directory, filename = os.path.split(file_path) + stem = os.path.splitext(filename)[0] + extract_file_path = os.path.join(directory, "extract", stem + ".txt") + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + + # Build summarize_params for executor (summarization runs in worker) + summarize_params, summarize_file_path, summary_profile = ( + PromptStudioHelper._build_summarize_params( + tool, + default_profile, + directory, + stem, + extract_file_path, + platform_api_key, + ) + ) + + # Generate doc_id for indexing tracking + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id) + doc_id_key = IndexingUtils.generate_index_key( + vector_db=str(default_profile.vector_store.id), + embedding=str(default_profile.embedding_model.id), + x2text=str(default_profile.x2text.id), + chunk_size=str(default_profile.chunk_size), + chunk_overlap=str(default_profile.chunk_overlap), + file_path=file_path, + file_hash=None, + fs=fs_instance, + tool=util, + ) + + usage_kwargs = {"run_id": run_id, "file_name": filename} + + extract_params = { + IKeys.X2TEXT_INSTANCE_ID: str(default_profile.x2text.id), + IKeys.FILE_PATH: file_path, + IKeys.ENABLE_HIGHLIGHT: tool.enable_highlight, + IKeys.OUTPUT_FILE_PATH: extract_file_path, + "platform_api_key": platform_api_key, + IKeys.USAGE_KWARGS: usage_kwargs, + } + + index_params = { + IKeys.TOOL_ID: tool_id, + IKeys.EMBEDDING_INSTANCE_ID: str(default_profile.embedding_model.id), + IKeys.VECTOR_DB_INSTANCE_ID: str(default_profile.vector_store.id), + IKeys.X2TEXT_INSTANCE_ID: str(default_profile.x2text.id), + IKeys.FILE_PATH: extract_file_path, + IKeys.FILE_HASH: None, + IKeys.CHUNK_OVERLAP: default_profile.chunk_overlap, + IKeys.CHUNK_SIZE: default_profile.chunk_size, + IKeys.REINDEX: True, + IKeys.ENABLE_HIGHLIGHT: tool.enable_highlight, + IKeys.USAGE_KWARGS: usage_kwargs, + IKeys.RUN_ID: run_id, + TSPKeys.EXECUTION_SOURCE: ExecutionSource.IDE.value, + "platform_api_key": platform_api_key, + } + + log_events_id = StateStore.get(Common.LOG_EVENTS_ID) or "" + request_id = StateStore.get(Common.REQUEST_ID) or "" + + context = ExecutionContext( + executor_name="legacy", + operation="ide_index", + run_id=run_id or str(uuid.uuid4()), + execution_source="ide", + organization_id=org_id, + executor_params={ + "extract_params": extract_params, + "index_params": index_params, + "summarize_params": summarize_params, + }, + request_id=request_id, + log_events_id=log_events_id, + ) + + # x2text config hash for extraction status tracking in callback + x2text_metadata = default_profile.x2text.metadata or {} + x2text_config_hash = ToolUtils.hash_str( + json.dumps(x2text_metadata, sort_keys=True) + ) + + cb_kwargs = { + "log_events_id": log_events_id, + "request_id": request_id, + "org_id": org_id, + "user_id": user_id, + "document_id": document_id, + "doc_id_key": doc_id_key, + "profile_manager_id": str(default_profile.profile_id), + "tool_id": tool_id, + "run_id": run_id, + "file_name": file_name, + "x2text_config_hash": x2text_config_hash, + "enable_highlight": tool.enable_highlight, + "summary_profile_id": ( + str(summary_profile.profile_id) if tool.summarize_context else "" + ), + "summarize_file_path": summarize_file_path, + } + + return context, cb_kwargs + + @staticmethod + def _resolve_llm_ids(tool: Any) -> tuple[str, str]: + """Resolve monitor_llm and challenge_llm IDs for the tool.""" + monitor_llm_instance = tool.monitor_llm + challenge_llm_instance = tool.challenge_llm + if monitor_llm_instance: + monitor_llm = str(monitor_llm_instance.id) + else: + dp = ProfileManager.get_default_llm_profile(tool) + if not dp: + raise DefaultProfileError() + monitor_llm = str(dp.llm.id) + if challenge_llm_instance: + challenge_llm = str(challenge_llm_instance.id) + else: + dp = ProfileManager.get_default_llm_profile(tool) + if not dp: + raise DefaultProfileError() + challenge_llm = str(dp.llm.id) + return monitor_llm, challenge_llm + + @staticmethod + def _build_grammar_list(prompt_grammer: Any) -> list[dict[str, Any]]: + """Build the grammar synonym list from the tool's prompt_grammer dict.""" + if not prompt_grammer: + return [] + return [ + {TSPKeys.WORD: word, TSPKeys.SYNONYMS: synonyms} + for word, synonyms in prompt_grammer.items() + ] + + @staticmethod + def build_fetch_response_payload( + tool: CustomTool, + doc_path: str, + doc_name: str, + prompt: ToolStudioPrompt, + org_id: str, + user_id: str, + document_id: str, + run_id: str, + profile_manager_id: str | None = None, + ) -> tuple[ExecutionContext | None, dict[str, Any]]: + """Build answer_prompt ExecutionContext for fire-and-forget dispatch. + + Does ORM work, extraction, and indexing synchronously. Only the + LLM answer_prompt call is dispatched asynchronously. + + Returns: + (context, cb_kwargs) or (None, pending_response_dict) + """ + profile_manager = prompt.profile_manager + if profile_manager_id: + profile_manager = ProfileManagerHelper.get_profile_manager( + profile_manager_id=profile_manager_id + ) + + if not profile_manager: + raise DefaultProfileError() + + monitor_llm, challenge_llm = PromptStudioHelper._resolve_llm_ids(tool) + + PromptStudioHelper.validate_adapter_status(profile_manager) + PromptStudioHelper.validate_profile_manager_owner_access(profile_manager) + + vector_db = str(profile_manager.vector_store.id) + embedding_model = str(profile_manager.embedding_model.id) + llm = str(profile_manager.llm.id) + x2text = str(profile_manager.x2text.id) + + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id) + file_path = doc_path + directory, filename = os.path.split(doc_path) + extract_path = os.path.join( + directory, "extract", os.path.splitext(filename)[0] + ".txt" + ) + + doc_id = IndexingUtils.generate_index_key( + vector_db=vector_db, + embedding=embedding_model, + x2text=x2text, + chunk_size=str(profile_manager.chunk_size), + chunk_overlap=str(profile_manager.chunk_overlap), + file_path=file_path, + file_hash=None, + fs=fs_instance, + tool=util, + ) + + # Extract (blocking, usually cached) + extracted_text = PromptStudioHelper.dynamic_extractor( + profile_manager=profile_manager, + file_path=file_path, + org_id=org_id, + document_id=document_id, + run_id=run_id, + enable_highlight=tool.enable_highlight, + ) + + is_summary = tool.summarize_as_source + if is_summary: + profile_manager.chunk_size = 0 + p = Path(extract_path) + extract_path = str(p.parent.parent / "summarize" / (p.stem + ".txt")) + + # Index (blocking, usually cached) + index_result = PromptStudioHelper.dynamic_indexer( + profile_manager=profile_manager, + tool_id=str(tool.tool_id), + file_path=file_path, + org_id=org_id, + document_id=document_id, + run_id=run_id, + user_id=user_id, + enable_highlight=tool.enable_highlight, + extracted_text=extracted_text, + doc_id_key=doc_id, + ) + + if index_result.get("status") == IndexingStatus.PENDING_STATUS.value: + return None, { + "status": IndexingStatus.PENDING_STATUS.value, + "message": IndexingStatus.DOCUMENT_BEING_INDEXED.value, + } + + # Build outputs + tool_id = str(tool.tool_id) + output: dict[str, Any] = {} + outputs: list[dict[str, Any]] = [] + grammar_list = PromptStudioHelper._build_grammar_list(tool.prompt_grammer) + + output[TSPKeys.PROMPT] = prompt.prompt + output[TSPKeys.ACTIVE] = prompt.active + output[TSPKeys.REQUIRED] = prompt.required + output[TSPKeys.CHUNK_SIZE] = profile_manager.chunk_size + output[TSPKeys.VECTOR_DB] = vector_db + output[TSPKeys.EMBEDDING] = embedding_model + output[TSPKeys.CHUNK_OVERLAP] = profile_manager.chunk_overlap + output[TSPKeys.LLM] = llm + output[TSPKeys.TYPE] = prompt.enforce_type + output[TSPKeys.NAME] = prompt.prompt_key + output[TSPKeys.RETRIEVAL_STRATEGY] = profile_manager.retrieval_strategy + output[TSPKeys.SIMILARITY_TOP_K] = profile_manager.similarity_top_k + output[TSPKeys.SECTION] = profile_manager.section + output[TSPKeys.X2TEXT_ADAPTER] = x2text + + webhook_enabled = bool(prompt.enable_postprocessing_webhook) + webhook_url = (prompt.postprocessing_webhook_url or "").strip() + if webhook_enabled and not webhook_url: + webhook_enabled = False + output[TSPKeys.ENABLE_POSTPROCESSING_WEBHOOK] = webhook_enabled + if webhook_enabled: + output[TSPKeys.POSTPROCESSING_WEBHOOK_URL] = webhook_url + + output[TSPKeys.EVAL_SETTINGS] = {} + output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_EVALUATE] = prompt.evaluate + output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_MONITOR_LLM] = [monitor_llm] + output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_EXCLUDE_FAILED] = ( + tool.exclude_failed + ) + for attr in dir(prompt): + if attr.startswith(TSPKeys.EVAL_METRIC_PREFIX): + output[TSPKeys.EVAL_SETTINGS][attr] = getattr(prompt, attr) + + output = PromptStudioHelper.fetch_table_settings_if_enabled( + doc_name, prompt, org_id, user_id, tool_id, output + ) + variable_map = PromptStudioVariableService.frame_variable_replacement_map( + doc_id=document_id, prompt_object=prompt + ) + if variable_map: + output[TSPKeys.VARIABLE_MAP] = variable_map + outputs.append(output) + + tool_settings: dict[str, Any] = {} + tool_settings[TSPKeys.ENABLE_CHALLENGE] = tool.enable_challenge + tool_settings[TSPKeys.CHALLENGE_LLM] = challenge_llm + tool_settings[TSPKeys.SINGLE_PASS_EXTRACTION_MODE] = ( + tool.single_pass_extraction_mode + ) + tool_settings[TSPKeys.SUMMARIZE_AS_SOURCE] = tool.summarize_as_source + tool_settings[TSPKeys.PREAMBLE] = tool.preamble + tool_settings[TSPKeys.POSTAMBLE] = tool.postamble + tool_settings[TSPKeys.GRAMMAR] = grammar_list + tool_settings[TSPKeys.ENABLE_HIGHLIGHT] = tool.enable_highlight + tool_settings[TSPKeys.ENABLE_WORD_CONFIDENCE] = tool.enable_word_confidence + tool_settings[TSPKeys.PLATFORM_POSTAMBLE] = getattr( + settings, TSPKeys.PLATFORM_POSTAMBLE.upper(), "" + ) + tool_settings[TSPKeys.WORD_CONFIDENCE_POSTAMBLE] = getattr( + settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), "" + ) + + file_hash = fs_instance.get_hash_from_file(path=extract_path) + + payload: dict[str, Any] = { + TSPKeys.TOOL_SETTINGS: tool_settings, + TSPKeys.OUTPUTS: outputs, + TSPKeys.TOOL_ID: tool_id, + TSPKeys.RUN_ID: run_id, + TSPKeys.FILE_NAME: doc_name, + TSPKeys.FILE_HASH: file_hash, + TSPKeys.FILE_PATH: extract_path, + Common.LOG_EVENTS_ID: StateStore.get(Common.LOG_EVENTS_ID), + TSPKeys.EXECUTION_SOURCE: ExecutionSource.IDE.value, + TSPKeys.CUSTOM_DATA: tool.custom_data, + } + + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + payload[ToolStudioKeys.PLATFORM_SERVICE_API_KEY] = platform_api_key + payload[TSPKeys.INCLUDE_METADATA] = True + + log_events_id = StateStore.get(Common.LOG_EVENTS_ID) or "" + request_id = StateStore.get(Common.REQUEST_ID) or "" + + context = ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id=run_id, + execution_source="ide", + organization_id=org_id, + executor_params=payload, + request_id=request_id, + log_events_id=log_events_id, + ) + + cb_kwargs = { + "log_events_id": log_events_id, + "request_id": request_id, + "org_id": org_id, + "user_id": user_id, + "operation": "fetch_response", + "run_id": run_id, + "document_id": document_id, + "tool_id": tool_id, + "prompt_ids": [str(prompt.prompt_id)], + "profile_manager_id": profile_manager_id, + "is_single_pass": False, + } + + return context, cb_kwargs + + @staticmethod + def build_bulk_fetch_response_payload( + tool: CustomTool, + doc_path: str, + doc_name: str, + prompts: list[ToolStudioPrompt], + org_id: str, + user_id: str, + document_id: str, + run_id: str, + profile_manager_id: str | None = None, + ) -> tuple[ExecutionContext | None, dict[str, Any]]: + """Build answer_prompt payload for multiple prompts in one task. + + Does ORM work, extraction, and indexing synchronously once for + all prompts. Only the LLM answer_prompt call is dispatched + asynchronously with all prompts in the outputs list. + + Returns: + (context, cb_kwargs) or (None, pending_response_dict) + """ + profile_manager = ( + ProfileManagerHelper.get_profile_manager(profile_manager_id) + if profile_manager_id + else None + ) + if not profile_manager: + profile_manager = ProfileManager.get_default_llm_profile(tool) + if not profile_manager: + raise DefaultProfileError() + + PromptStudioHelper.validate_adapter_status(profile_manager) + PromptStudioHelper.validate_profile_manager_owner_access(profile_manager) + + monitor_llm, challenge_llm = PromptStudioHelper._resolve_llm_ids(tool) + + vector_db = str(profile_manager.vector_store.id) + embedding_model = str(profile_manager.embedding_model.id) + llm = str(profile_manager.llm.id) + x2text = str(profile_manager.x2text.id) + + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id) + file_path = doc_path + directory, filename = os.path.split(doc_path) + extract_path = os.path.join( + directory, "extract", os.path.splitext(filename)[0] + ".txt" + ) + + doc_id = IndexingUtils.generate_index_key( + vector_db=vector_db, + embedding=embedding_model, + x2text=x2text, + chunk_size=str(profile_manager.chunk_size), + chunk_overlap=str(profile_manager.chunk_overlap), + file_path=file_path, + file_hash=None, + fs=fs_instance, + tool=util, + ) + + # Extract ONCE (blocking, usually cached) + extracted_text = PromptStudioHelper.dynamic_extractor( + profile_manager=profile_manager, + file_path=file_path, + org_id=org_id, + document_id=document_id, + run_id=run_id, + enable_highlight=tool.enable_highlight, + ) + + is_summary = tool.summarize_as_source + if is_summary: + profile_manager.chunk_size = 0 + p = Path(extract_path) + extract_path = str(p.parent.parent / "summarize" / (p.stem + ".txt")) + + # Index ONCE (blocking, usually cached) + index_result = PromptStudioHelper.dynamic_indexer( + profile_manager=profile_manager, + tool_id=str(tool.tool_id), + file_path=file_path, + org_id=org_id, + document_id=document_id, + run_id=run_id, + user_id=user_id, + enable_highlight=tool.enable_highlight, + extracted_text=extracted_text, + doc_id_key=doc_id, + ) + + if index_result.get("status") == IndexingStatus.PENDING_STATUS.value: + return None, { + "status": IndexingStatus.PENDING_STATUS.value, + "message": IndexingStatus.DOCUMENT_BEING_INDEXED.value, + } + + # Per-prompt output building + tool_id = str(tool.tool_id) + grammar_list = PromptStudioHelper._build_grammar_list(tool.prompt_grammer) + outputs: list[dict[str, Any]] = [ + PromptStudioHelper._build_prompt_output( + prompt=prompt, + profile_manager=profile_manager, + vector_db=vector_db, + embedding_model=embedding_model, + llm=llm, + x2text=x2text, + monitor_llm=monitor_llm, + tool=tool, + doc_name=doc_name, + org_id=org_id, + user_id=user_id, + tool_id=tool_id, + document_id=document_id, + ) + for prompt in prompts + ] + + tool_settings: dict[str, Any] = {} + tool_settings[TSPKeys.ENABLE_CHALLENGE] = tool.enable_challenge + tool_settings[TSPKeys.CHALLENGE_LLM] = challenge_llm + tool_settings[TSPKeys.SINGLE_PASS_EXTRACTION_MODE] = ( + tool.single_pass_extraction_mode + ) + tool_settings[TSPKeys.SUMMARIZE_AS_SOURCE] = tool.summarize_as_source + tool_settings[TSPKeys.PREAMBLE] = tool.preamble + tool_settings[TSPKeys.POSTAMBLE] = tool.postamble + tool_settings[TSPKeys.GRAMMAR] = grammar_list + tool_settings[TSPKeys.ENABLE_HIGHLIGHT] = tool.enable_highlight + tool_settings[TSPKeys.ENABLE_WORD_CONFIDENCE] = tool.enable_word_confidence + tool_settings[TSPKeys.PLATFORM_POSTAMBLE] = getattr( + settings, TSPKeys.PLATFORM_POSTAMBLE.upper(), "" + ) + tool_settings[TSPKeys.WORD_CONFIDENCE_POSTAMBLE] = getattr( + settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), "" + ) + + file_hash = fs_instance.get_hash_from_file(path=extract_path) + + payload: dict[str, Any] = { + TSPKeys.TOOL_SETTINGS: tool_settings, + TSPKeys.OUTPUTS: outputs, + TSPKeys.TOOL_ID: tool_id, + TSPKeys.RUN_ID: run_id, + TSPKeys.FILE_NAME: doc_name, + TSPKeys.FILE_HASH: file_hash, + TSPKeys.FILE_PATH: extract_path, + Common.LOG_EVENTS_ID: StateStore.get(Common.LOG_EVENTS_ID), + TSPKeys.EXECUTION_SOURCE: ExecutionSource.IDE.value, + TSPKeys.CUSTOM_DATA: tool.custom_data, + } + + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + payload[ToolStudioKeys.PLATFORM_SERVICE_API_KEY] = platform_api_key + payload[TSPKeys.INCLUDE_METADATA] = True + + log_events_id = StateStore.get(Common.LOG_EVENTS_ID) or "" + request_id = StateStore.get(Common.REQUEST_ID) or "" + + context = ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id=run_id, + execution_source="ide", + organization_id=org_id, + executor_params=payload, + request_id=request_id, + log_events_id=log_events_id, + ) + + cb_kwargs = { + "log_events_id": log_events_id, + "request_id": request_id, + "org_id": org_id, + "user_id": user_id, + "operation": "fetch_response", + "run_id": run_id, + "document_id": document_id, + "tool_id": tool_id, + "prompt_ids": [str(p.prompt_id) for p in prompts], + "profile_manager_id": profile_manager_id, + "is_single_pass": False, + } + + return context, cb_kwargs + + @staticmethod + def build_single_pass_payload( + tool: CustomTool, + doc_path: str, + doc_name: str, + prompts: list[ToolStudioPrompt], + org_id: str, + user_id: str, + document_id: str, + run_id: str, + ) -> tuple[ExecutionContext, dict[str, Any]]: + """Build single_pass_extraction ExecutionContext. + + Does ORM work and extraction synchronously. Only the LLM + single-pass call is dispatched asynchronously. + """ + tool_id = str(tool.tool_id) + outputs: list[dict[str, Any]] = [] + grammar: list[dict[str, Any]] = [] + prompt_grammar = tool.prompt_grammer + default_profile = ProfileManager.get_default_llm_profile(tool) + + if not default_profile: + raise DefaultProfileError() + + challenge_llm_instance: AdapterInstance | None = tool.challenge_llm + challenge_llm: str | None = None + if challenge_llm_instance: + challenge_llm = str(challenge_llm_instance.id) + else: + challenge_llm = str(default_profile.llm.id) + + PromptStudioHelper.validate_adapter_status(default_profile) + PromptStudioHelper.validate_profile_manager_owner_access(default_profile) + default_profile.chunk_size = 0 + + if prompt_grammar: + for word, synonyms in prompt_grammar.items(): + grammar.append({TSPKeys.WORD: word, TSPKeys.SYNONYMS: synonyms}) + + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + directory, filename = os.path.split(doc_path) + file_path = os.path.join( + directory, "extract", os.path.splitext(filename)[0] + ".txt" + ) + + # Extract (blocking, usually cached) + PromptStudioHelper.dynamic_extractor( + profile_manager=default_profile, + file_path=doc_path, + org_id=org_id, + document_id=document_id, + run_id=run_id, + enable_highlight=tool.enable_highlight, + ) + + vector_db = str(default_profile.vector_store.id) + embedding_model = str(default_profile.embedding_model.id) + llm = str(default_profile.llm.id) + x2text = str(default_profile.x2text.id) + + tool_settings: dict[str, Any] = { + TSPKeys.PREAMBLE: tool.preamble, + TSPKeys.POSTAMBLE: tool.postamble, + TSPKeys.GRAMMAR: grammar, + TSPKeys.LLM: llm, + TSPKeys.X2TEXT_ADAPTER: x2text, + TSPKeys.VECTOR_DB: vector_db, + TSPKeys.EMBEDDING: embedding_model, + TSPKeys.CHUNK_SIZE: default_profile.chunk_size, + TSPKeys.CHUNK_OVERLAP: default_profile.chunk_overlap, + TSPKeys.ENABLE_CHALLENGE: tool.enable_challenge, + TSPKeys.ENABLE_HIGHLIGHT: tool.enable_highlight, + TSPKeys.ENABLE_WORD_CONFIDENCE: tool.enable_word_confidence, + TSPKeys.CHALLENGE_LLM: challenge_llm, + TSPKeys.PLATFORM_POSTAMBLE: getattr( + settings, TSPKeys.PLATFORM_POSTAMBLE.upper(), "" + ), + TSPKeys.WORD_CONFIDENCE_POSTAMBLE: getattr( + settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), "" + ), + TSPKeys.SUMMARIZE_AS_SOURCE: tool.summarize_as_source, + TSPKeys.RETRIEVAL_STRATEGY: default_profile.retrieval_strategy + or TSPKeys.SIMPLE, + TSPKeys.SIMILARITY_TOP_K: default_profile.similarity_top_k, + } + + for p in prompts: + if not p.prompt: + raise EmptyPromptError() + outputs.append( + { + TSPKeys.PROMPT: p.prompt, + TSPKeys.ACTIVE: p.active, + TSPKeys.TYPE: p.enforce_type, + TSPKeys.NAME: p.prompt_key, + } + ) + + if tool.summarize_as_source: + path_obj = Path(file_path) + file_path = str( + path_obj.parent.parent / TSPKeys.SUMMARIZE / (path_obj.stem + ".txt") + ) + + file_hash = fs_instance.get_hash_from_file(path=file_path) + + payload: dict[str, Any] = { + TSPKeys.TOOL_SETTINGS: tool_settings, + TSPKeys.OUTPUTS: outputs, + TSPKeys.TOOL_ID: tool_id, + TSPKeys.RUN_ID: run_id, + TSPKeys.FILE_HASH: file_hash, + TSPKeys.FILE_NAME: doc_name, + TSPKeys.FILE_PATH: file_path, + Common.LOG_EVENTS_ID: StateStore.get(Common.LOG_EVENTS_ID), + TSPKeys.EXECUTION_SOURCE: ExecutionSource.IDE.value, + TSPKeys.CUSTOM_DATA: tool.custom_data, + } + + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + payload[ToolStudioKeys.PLATFORM_SERVICE_API_KEY] = platform_api_key + payload[TSPKeys.INCLUDE_METADATA] = True + + log_events_id = StateStore.get(Common.LOG_EVENTS_ID) or "" + request_id = StateStore.get(Common.REQUEST_ID) or "" + + context = ExecutionContext( + executor_name="legacy", + operation="single_pass_extraction", + run_id=run_id or str(uuid.uuid4()), + execution_source="ide", + organization_id=org_id, + executor_params=payload, + request_id=request_id, + log_events_id=log_events_id, + ) + + cb_kwargs = { + "log_events_id": log_events_id, + "request_id": request_id, + "org_id": org_id, + "user_id": user_id, + "operation": "single_pass_extraction", + "run_id": run_id, + "document_id": document_id, + "tool_id": tool_id, + "prompt_ids": [str(p.prompt_id) for p in prompts], + "profile_manager_id": str(default_profile.profile_id), + "is_single_pass": True, + } + + return context, cb_kwargs + @staticmethod def get_select_fields() -> dict[str, Any]: """Method to fetch dropdown field values for frontend. @@ -274,9 +1203,8 @@ def get_select_fields() -> dict[str, Any]: Returns: dict[str, Any]: Dict for dropdown data """ - f = open(f"{os.path.dirname(__file__)}{CHOICES_JSON}") - choices = f.read() - f.close() + with open(f"{os.path.dirname(__file__)}{CHOICES_JSON}") as f: + choices = f.read() response: dict[str, Any] = json.loads(choices) # Update select choices with payload modifier plugin if available payload_modifier_plugin = get_plugin("payload_modifier") @@ -374,10 +1302,6 @@ def index_document( ) summary_profile = default_profile - if not tool: - logger.error(f"No tool instance found for the ID {tool_id}") - raise ToolNotValid() - # Validate the status of adapter in profile manager PromptStudioHelper.validate_adapter_status(default_profile) # Need to check the user who created profile manager @@ -798,6 +1722,9 @@ def _fetch_response( profile_manager_id=profile_manager_id ) + if not profile_manager: + raise DefaultProfileError() + monitor_llm_instance: AdapterInstance | None = tool.monitor_llm monitor_llm: str | None = None challenge_llm_instance: AdapterInstance | None = tool.challenge_llm @@ -827,8 +1754,6 @@ def _fetch_response( embedding_model = str(profile_manager.embedding_model.id) llm = str(profile_manager.llm.id) x2text = str(profile_manager.x2text.id) - if not profile_manager: - raise DefaultProfileError() fs_instance = EnvHelper.get_storage( storage_type=StorageType.PERMANENT, env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, @@ -856,13 +1781,6 @@ def _fetch_response( fs=fs_instance, tool=util, ) - if DocumentIndexingService.is_document_indexing( - org_id=org_id, user_id=user_id, doc_id_key=doc_id - ): - return { - "status": IndexingStatus.PENDING_STATUS.value, - "output": IndexingStatus.DOCUMENT_BEING_INDEXED.value, - } logger.info(f"Extracting text from {file_path} for {doc_id}") extracted_text = PromptStudioHelper.dynamic_extractor( profile_manager=profile_manager, @@ -995,24 +1913,29 @@ def _fetch_response( TSPKeys.CUSTOM_DATA: tool.custom_data, } - try: - responder = PromptTool( - tool=util, - prompt_host=settings.PROMPT_HOST, - prompt_port=settings.PROMPT_PORT, - request_id=StateStore.get(Common.REQUEST_ID), - ) - params = {TSPKeys.INCLUDE_METADATA: True} - return responder.answer_prompt(payload=payload, params=params) - except SdkError as e: - msg = str(e) - if e.actual_err and hasattr(e.actual_err, "response"): - msg = e.actual_err.response.json().get("error", str(e)) + # Add platform API key and metadata flag for executor + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + payload[ToolStudioKeys.PLATFORM_SERVICE_API_KEY] = platform_api_key + payload[TSPKeys.INCLUDE_METADATA] = True + + dispatcher = PromptStudioHelper._get_dispatcher() + context = ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id=run_id, + execution_source="ide", + organization_id=org_id, + executor_params=payload, + request_id=StateStore.get(Common.REQUEST_ID), + log_events_id=StateStore.get(Common.LOG_EVENTS_ID), + ) + result = dispatcher.dispatch(context) + if not result.success: raise AnswerFetchError( "Error while fetching response for " - f"'{prompt.prompt_key}' with '{doc_name}'. {msg}", - status_code=int(e.status_code or 500), + f"'{prompt.prompt_key}' with '{doc_name}'. {result.error}", ) + return result.data @staticmethod def fetch_table_settings_if_enabled( @@ -1109,14 +2032,14 @@ def dynamic_indexer( "status": IndexingStatus.COMPLETED_STATUS.value, "output": indexed_doc_id, } - # Polling if document is already being indexed - if DocumentIndexingService.is_document_indexing( + # Wait for in-progress indexing instead of returning PENDING + wait_result = PromptStudioHelper._wait_for_indexing( org_id=org_id, user_id=user_id, doc_id_key=doc_id_key - ): - return { - "status": IndexingStatus.PENDING_STATUS.value, - "output": IndexingStatus.DOCUMENT_BEING_INDEXED.value, - } + ) + if wait_result is not None: + return wait_result + # wait_result is None → indexing failed; fall through to + # re-index below # Set the document as being indexed DocumentIndexingService.set_document_indexing( @@ -1141,24 +2064,27 @@ def dynamic_indexer( TSPKeys.EXECUTION_SOURCE: ExecutionSource.IDE.value, } - util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id) - - try: - responder = PromptTool( - tool=util, - prompt_host=settings.PROMPT_HOST, - prompt_port=settings.PROMPT_PORT, - request_id=StateStore.get(Common.REQUEST_ID), - ) - doc_id = responder.index(payload=payload) - except SdkError as e: - msg = str(e) - if e.actual_err and hasattr(e.actual_err, "response"): - msg = e.actual_err.response.json().get("error", str(e)) + # Add platform API key for executor + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + payload["platform_api_key"] = platform_api_key + + dispatcher = PromptStudioHelper._get_dispatcher() + index_context = ExecutionContext( + executor_name="legacy", + operation="index", + run_id=run_id or str(uuid.uuid4()), + execution_source="ide", + organization_id=org_id, + executor_params=payload, + request_id=StateStore.get(Common.REQUEST_ID), + log_events_id=StateStore.get(Common.LOG_EVENTS_ID), + ) + result = dispatcher.dispatch(index_context) + if not result.success: raise IndexingAPIError( - f"Failed to index '{filename}'. {msg}", - status_code=int(e.status_code or 500), + f"Failed to index '{filename}'. {result.error}", ) + doc_id = result.data.get("doc_id") PromptStudioIndexHelper.handle_index_manager( document_id=document_id, @@ -1170,6 +2096,13 @@ def dynamic_indexer( ) return {"status": IndexingStatus.COMPLETED_STATUS.value, "output": doc_id} except (IndexingError, IndexingAPIError, SdkError) as e: + # Clear the indexing flag so subsequent requests are not blocked + try: + DocumentIndexingService.remove_document_indexing( + org_id=org_id, user_id=user_id, doc_id_key=doc_id_key + ) + except Exception: + logger.exception("Failed to clear indexing flag for %s", doc_id_key) msg = str(e) if isinstance(e, SdkError) and hasattr(e.actual_err, "response"): msg = e.actual_err.response.json().get("error", str(e)) @@ -1199,6 +2132,9 @@ def _fetch_single_pass_response( grammar: list[dict[str, Any]] = [] prompt_grammar = tool.prompt_grammer default_profile = ProfileManager.get_default_llm_profile(tool) + if not default_profile: + raise DefaultProfileError() + challenge_llm_instance: AdapterInstance | None = tool.challenge_llm challenge_llm: str | None = None # Using default profile manager llm if challenge_llm is None @@ -1215,14 +2151,10 @@ def _fetch_single_pass_response( for word, synonyms in prompt_grammar.items(): grammar.append({TSPKeys.WORD: word, TSPKeys.SYNONYMS: synonyms}) - if not default_profile: - raise DefaultProfileError() - fs_instance = EnvHelper.get_storage( storage_type=StorageType.PERMANENT, env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) - util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id) directory, filename = os.path.split(input_file_path) file_path = os.path.join( directory, "extract", os.path.splitext(filename)[0] + ".txt" @@ -1261,6 +2193,10 @@ def _fetch_single_pass_response( settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), "" ) tool_settings[TSPKeys.SUMMARIZE_AS_SOURCE] = tool.summarize_as_source + tool_settings[TSPKeys.RETRIEVAL_STRATEGY] = ( + default_profile.retrieval_strategy or TSPKeys.SIMPLE + ) + tool_settings[TSPKeys.SIMILARITY_TOP_K] = default_profile.similarity_top_k for prompt in prompts: if not prompt.prompt: raise EmptyPromptError() @@ -1289,14 +2225,28 @@ def _fetch_single_pass_response( TSPKeys.CUSTOM_DATA: tool.custom_data, } - responder = PromptTool( - tool=util, - prompt_host=settings.PROMPT_HOST, - prompt_port=settings.PROMPT_PORT, + # Add platform API key and metadata flag for executor + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + payload[ToolStudioKeys.PLATFORM_SERVICE_API_KEY] = platform_api_key + payload[TSPKeys.INCLUDE_METADATA] = True + + dispatcher = PromptStudioHelper._get_dispatcher() + context = ExecutionContext( + executor_name="legacy", + operation="single_pass_extraction", + run_id=run_id or str(uuid.uuid4()), + execution_source="ide", + organization_id=org_id, + executor_params=payload, request_id=StateStore.get(Common.REQUEST_ID), + log_events_id=StateStore.get(Common.LOG_EVENTS_ID), ) - params = {TSPKeys.INCLUDE_METADATA: True} - return responder.single_pass_extraction(payload=payload, params=params) + result = dispatcher.dispatch(context) + if not result.success: + raise AnswerFetchError( + f"Error fetching single pass response. {result.error}", + ) + return result.data @staticmethod def get_tool_from_tool_id(tool_id: str) -> CustomTool | None: @@ -1362,32 +2312,24 @@ def dynamic_extractor( IKeys.OUTPUT_FILE_PATH: extract_file_path, } - util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id) - - try: - responder = PromptTool( - tool=util, - prompt_host=settings.PROMPT_HOST, - prompt_port=settings.PROMPT_PORT, - request_id=StateStore.get(Common.REQUEST_ID), - ) - extracted_text = responder.extract(payload=payload) - success = PromptStudioIndexHelper.mark_extraction_status( - document_id=document_id, - profile_manager=profile_manager, - x2text_config_hash=x2text_config_hash, - enable_highlight=enable_highlight, - ) - if not success: - logger.warning( - f"Failed to mark extraction success for document {document_id}. " - f"Extraction completed but status not saved." - ) - except SdkError as e: - msg = str(e) - if e.actual_err and hasattr(e.actual_err, "response"): - msg = e.actual_err.response.json().get("error", str(e)) - + # Add platform API key for executor + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + payload["platform_api_key"] = platform_api_key + + dispatcher = PromptStudioHelper._get_dispatcher() + extract_context = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id=run_id or str(uuid.uuid4()), + execution_source="ide", + organization_id=org_id, + executor_params=payload, + request_id=StateStore.get(Common.REQUEST_ID), + log_events_id=StateStore.get(Common.LOG_EVENTS_ID), + ) + result = dispatcher.dispatch(extract_context) + if not result.success: + msg = result.error or "Unknown extraction error" success = PromptStudioIndexHelper.mark_extraction_status( document_id=document_id, profile_manager=profile_manager, @@ -1401,10 +2343,21 @@ def dynamic_extractor( f"Failed to mark extraction failure for document {document_id}. " f"Extraction failed but status not saved." ) - raise ExtractionAPIError( f"Failed to extract '{filename}'. {msg}", - status_code=int(e.status_code or 500), + ) + + extracted_text = result.data.get("extracted_text", "") + success = PromptStudioIndexHelper.mark_extraction_status( + document_id=document_id, + profile_manager=profile_manager, + x2text_config_hash=x2text_config_hash, + enable_highlight=enable_highlight, + ) + if not success: + logger.warning( + f"Failed to mark extraction success for document {document_id}. " + f"Extraction completed but status not saved." ) return extracted_text diff --git a/backend/prompt_studio/prompt_studio_core_v2/tasks.py b/backend/prompt_studio/prompt_studio_core_v2/tasks.py new file mode 100644 index 0000000000..633a4b9236 --- /dev/null +++ b/backend/prompt_studio/prompt_studio_core_v2/tasks.py @@ -0,0 +1,274 @@ +import json +import logging +import uuid +from datetime import date, datetime +from typing import Any + +from account_v2.constants import Common +from celery import shared_task +from utils.constants import Account +from utils.local_context import StateStore + +logger = logging.getLogger(__name__) + +PROMPT_STUDIO_RESULT_EVENT = "prompt_studio_result" + + +class _SafeEncoder(json.JSONEncoder): + """JSON encoder that converts uuid.UUID and datetime objects to strings.""" + + def default(self, obj: Any) -> Any: + if isinstance(obj, uuid.UUID): + return str(obj) + if isinstance(obj, (datetime, date)): + return obj.isoformat() + return super().default(obj) + + +def _json_safe(data: Any) -> Any: + """Round-trip through JSON to convert non-serializable types. + + Handles uuid.UUID (from DRF serializers) and datetime/date objects + (from plugins or ORM fields) that stdlib json.dumps cannot handle. + """ + return json.loads(json.dumps(data, cls=_SafeEncoder)) + + +def _setup_state_store(log_events_id: str, request_id: str, org_id: str = "") -> None: + """Restore thread-local context that was captured in the Django view.""" + StateStore.set(Common.LOG_EVENTS_ID, log_events_id) + StateStore.set(Common.REQUEST_ID, request_id) + if org_id: + StateStore.set(Account.ORGANIZATION_ID, org_id) + + +def _clear_state_store() -> None: + """Clean up thread-local context to prevent leaking between tasks.""" + StateStore.clear(Common.LOG_EVENTS_ID) + StateStore.clear(Common.REQUEST_ID) + StateStore.clear(Account.ORGANIZATION_ID) + + +def _emit_result( + log_events_id: str, + task_id: str, + operation: str, + result: dict[str, Any], + tool_id: str = "", + extra: dict[str, Any] | None = None, +) -> None: + """Push a success event to the frontend via Socket.IO.""" + from utils.log_events import ( + _emit_websocket_event, # Lazy import: task module loaded before Django apps ready + ) + + payload: dict[str, Any] = { + "task_id": task_id, + "status": "completed", + "operation": operation, + "result": result, + "tool_id": tool_id, + } + if extra: + payload.update(extra) + _emit_websocket_event( + room=log_events_id, + event=PROMPT_STUDIO_RESULT_EVENT, + data=_json_safe(payload), + ) + + +def _emit_error( + log_events_id: str, + task_id: str, + operation: str, + error: str, + extra: dict[str, Any] | None = None, + tool_id: str = "", +) -> None: + """Push a failure event to the frontend via Socket.IO.""" + from utils.log_events import ( + _emit_websocket_event, # Lazy import: task module loaded before Django apps ready + ) + + data: dict[str, Any] = { + "task_id": task_id, + "status": "failed", + "operation": operation, + "error": error, + "tool_id": tool_id, + } + if extra: + data.update(extra) + _emit_websocket_event( + room=log_events_id, + event=PROMPT_STUDIO_RESULT_EVENT, + data=data, + ) + + +# ------------------------------------------------------------------ +# IDE callback tasks (ide_index_complete, ide_index_error, +# ide_prompt_complete, ide_prompt_error) have been moved to the +# standalone ide_callback worker (workers/ide_callback/tasks.py). +# They now run on the workers image using InternalAPIClient. +# ------------------------------------------------------------------ + + +# ------------------------------------------------------------------ +# Legacy tasks (kept for backward compatibility during rollout) +# ------------------------------------------------------------------ + + +@shared_task(name="prompt_studio_index_document", bind=True) +def run_index_document( + self, + tool_id: str, + file_name: str, + org_id: str, + user_id: str, + document_id: str, + run_id: str, + log_events_id: str, + request_id: str, +) -> dict[str, Any]: + # Lazy import: circular dep (helper <-> tasks) + from prompt_studio.prompt_studio_core_v2.prompt_studio_helper import ( + PromptStudioHelper, + ) + + try: + _setup_state_store(log_events_id, request_id, org_id) + PromptStudioHelper.index_document( + tool_id=tool_id, + file_name=file_name, + org_id=org_id, + user_id=user_id, + document_id=document_id, + run_id=run_id, + ) + result: dict[str, Any] = { + "message": "Document indexed successfully.", + "document_id": document_id, + } + _emit_result( + log_events_id, + self.request.id, + "index_document", + result, + tool_id=tool_id, + ) + return result + except Exception as e: + logger.exception("run_index_document failed") + _emit_error( + log_events_id, + self.request.id, + "index_document", + str(e), + extra={"document_id": document_id}, + tool_id=tool_id, + ) + raise + finally: + _clear_state_store() + + +@shared_task(name="prompt_studio_fetch_response", bind=True) +def run_fetch_response( + self, + tool_id: str, + org_id: str, + user_id: str, + document_id: str, + run_id: str, + log_events_id: str, + request_id: str, + id: str | None = None, + profile_manager_id: str | None = None, +) -> dict[str, Any]: + # Lazy import: circular dep (helper <-> tasks) + from prompt_studio.prompt_studio_core_v2.prompt_studio_helper import ( + PromptStudioHelper, + ) + + try: + _setup_state_store(log_events_id, request_id, org_id) + response: dict[str, Any] = PromptStudioHelper.prompt_responder( + id=id, + tool_id=tool_id, + org_id=org_id, + user_id=user_id, + document_id=document_id, + run_id=run_id, + profile_manager_id=profile_manager_id, + ) + _emit_result( + log_events_id, + self.request.id, + "fetch_response", + response, + tool_id=tool_id, + ) + # Return minimal status to avoid logging sensitive extracted data + return {"status": "completed", "operation": "fetch_response"} + except Exception as e: + logger.exception("run_fetch_response failed") + _emit_error( + log_events_id, + self.request.id, + "fetch_response", + str(e), + tool_id=tool_id, + ) + raise + finally: + _clear_state_store() + + +@shared_task(name="prompt_studio_single_pass", bind=True) +def run_single_pass_extraction( + self, + tool_id: str, + org_id: str, + user_id: str, + document_id: str, + run_id: str, + log_events_id: str, + request_id: str, +) -> dict[str, Any]: + # Lazy import: circular dep (helper <-> tasks) + from prompt_studio.prompt_studio_core_v2.prompt_studio_helper import ( + PromptStudioHelper, + ) + + try: + _setup_state_store(log_events_id, request_id, org_id) + response: dict[str, Any] = PromptStudioHelper.prompt_responder( + tool_id=tool_id, + org_id=org_id, + user_id=user_id, + document_id=document_id, + run_id=run_id, + ) + _emit_result( + log_events_id, + self.request.id, + "single_pass_extraction", + response, + tool_id=tool_id, + ) + # Return minimal status to avoid logging sensitive extracted data + return {"status": "completed", "operation": "single_pass_extraction"} + except Exception as e: + logger.exception("run_single_pass_extraction failed") + _emit_error( + log_events_id, + self.request.id, + "single_pass_extraction", + str(e), + tool_id=tool_id, + ) + raise + finally: + _clear_state_store() diff --git a/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py b/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py new file mode 100644 index 0000000000..3fc741f952 --- /dev/null +++ b/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py @@ -0,0 +1,493 @@ +"""Phase 7-9 sanity tests for Prompt Studio IDE async backend. + +Tests the Celery task definitions (Phase 7), view dispatch (Phase 8), +and polling endpoint (Phase 9). + +Requires Django to be configured (source .env before running): + set -a && source .env && set +a + uv run pytest prompt_studio/prompt_studio_core_v2/test_tasks.py -v +""" + +import os +from unittest.mock import patch + +import django + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend.settings.dev") +django.setup() + +import pytest # noqa: E402 +from account_v2.constants import Common # noqa: E402 +from celery import Celery # noqa: E402 +from utils.local_context import StateStore # noqa: E402 + +from prompt_studio.prompt_studio_core_v2.tasks import ( # noqa: E402 + PROMPT_STUDIO_RESULT_EVENT, + ide_prompt_complete, + run_fetch_response, + run_index_document, + run_single_pass_extraction, +) + +# --------------------------------------------------------------------------- +# Celery eager-mode app for testing +# --------------------------------------------------------------------------- +test_app = Celery("test") +test_app.conf.update( + task_always_eager=True, + task_eager_propagates=True, + result_backend="cache+memory://", +) +run_index_document.bind(test_app) +run_fetch_response.bind(test_app) +run_single_pass_extraction.bind(test_app) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- +COMMON_KWARGS = { + "tool_id": "tool-123", + "org_id": "org-456", + "user_id": "user-789", + "document_id": "doc-abc", + "run_id": "run-def", + "log_events_id": "session-room-xyz", + "request_id": "req-001", +} + + +# =================================================================== +# Phase 7: Task definition tests +# =================================================================== +class TestTaskNames: + def test_index_document_task_name(self): + assert run_index_document.name == "prompt_studio_index_document" + + def test_fetch_response_task_name(self): + assert run_fetch_response.name == "prompt_studio_fetch_response" + + def test_single_pass_task_name(self): + assert run_single_pass_extraction.name == "prompt_studio_single_pass" + + +class TestRunIndexDocument: + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_success_returns_result(self, mock_helper, mock_emit): + mock_helper.index_document.return_value = "unique-id-123" + result = run_index_document.apply( + kwargs={**COMMON_KWARGS, "file_name": "test.pdf"} + ).get() + + assert result == { + "message": "Document indexed successfully.", + "document_id": "doc-abc", + } + mock_helper.index_document.assert_called_once_with( + tool_id="tool-123", + file_name="test.pdf", + org_id="org-456", + user_id="user-789", + document_id="doc-abc", + run_id="run-def", + ) + + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_success_emits_completed_event(self, mock_helper, mock_emit): + mock_helper.index_document.return_value = "unique-id-123" + run_index_document.apply(kwargs={**COMMON_KWARGS, "file_name": "test.pdf"}).get() + + mock_emit.assert_called_once() + kwargs = mock_emit.call_args.kwargs + assert kwargs["room"] == "session-room-xyz" + assert kwargs["event"] == PROMPT_STUDIO_RESULT_EVENT + assert kwargs["data"]["status"] == "completed" + assert kwargs["data"]["operation"] == "index_document" + assert kwargs["data"]["result"] == { + "message": "Document indexed successfully.", + "document_id": "doc-abc", + } + assert "task_id" in kwargs["data"] + + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_failure_emits_error_and_reraises(self, mock_helper, mock_emit): + mock_helper.index_document.side_effect = RuntimeError("index boom") + + with pytest.raises(RuntimeError, match="index boom"): + run_index_document.apply( + kwargs={**COMMON_KWARGS, "file_name": "test.pdf"} + ).get() + + mock_emit.assert_called_once() + assert mock_emit.call_args.kwargs["data"]["status"] == "failed" + assert "index boom" in mock_emit.call_args.kwargs["data"]["error"] + + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_state_store_cleared_on_success(self, mock_helper, mock_emit): + mock_helper.index_document.return_value = "ok" + run_index_document.apply(kwargs={**COMMON_KWARGS, "file_name": "test.pdf"}).get() + + assert StateStore.get(Common.LOG_EVENTS_ID) is None + assert StateStore.get(Common.REQUEST_ID) is None + + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_state_store_cleared_on_failure(self, mock_helper, mock_emit): + mock_helper.index_document.side_effect = RuntimeError("fail") + with pytest.raises(RuntimeError): + run_index_document.apply( + kwargs={**COMMON_KWARGS, "file_name": "test.pdf"} + ).get() + + assert StateStore.get(Common.LOG_EVENTS_ID) is None + assert StateStore.get(Common.REQUEST_ID) is None + + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_state_store_set_during_execution(self, mock_helper, mock_emit): + """Verify StateStore has the right values while the helper runs.""" + captured = {} + + def capture_state(**kwargs): + captured["log_events_id"] = StateStore.get(Common.LOG_EVENTS_ID) + captured["request_id"] = StateStore.get(Common.REQUEST_ID) + return "ok" + + mock_helper.index_document.side_effect = capture_state + run_index_document.apply(kwargs={**COMMON_KWARGS, "file_name": "test.pdf"}).get() + + assert captured["log_events_id"] == "session-room-xyz" + assert captured["request_id"] == "req-001" + # And cleared after + assert StateStore.get(Common.LOG_EVENTS_ID) is None + + +class TestRunFetchResponse: + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_success_returns_response(self, mock_helper, mock_emit): + mock_helper.prompt_responder.return_value = { + "output": {"field": "value"}, + "metadata": {"tokens": 42}, + } + + result = run_fetch_response.apply( + kwargs={ + **COMMON_KWARGS, + "id": "prompt-1", + "profile_manager_id": "pm-1", + } + ).get() + + assert result == {"status": "completed", "operation": "fetch_response"} + mock_helper.prompt_responder.assert_called_once_with( + id="prompt-1", + tool_id="tool-123", + org_id="org-456", + user_id="user-789", + document_id="doc-abc", + run_id="run-def", + profile_manager_id="pm-1", + ) + + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_success_emits_fetch_response_event(self, mock_helper, mock_emit): + mock_helper.prompt_responder.return_value = {"output": "data"} + run_fetch_response.apply( + kwargs={**COMMON_KWARGS, "id": "p1", "profile_manager_id": None} + ).get() + + data = mock_emit.call_args.kwargs["data"] + assert data["status"] == "completed" + assert data["operation"] == "fetch_response" + + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_failure_emits_error(self, mock_helper, mock_emit): + mock_helper.prompt_responder.side_effect = ValueError("prompt fail") + + with pytest.raises(ValueError, match="prompt fail"): + run_fetch_response.apply(kwargs=COMMON_KWARGS).get() + + data = mock_emit.call_args.kwargs["data"] + assert data["status"] == "failed" + assert "prompt fail" in data["error"] + + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_optional_params_default_none(self, mock_helper, mock_emit): + mock_helper.prompt_responder.return_value = {} + run_fetch_response.apply(kwargs=COMMON_KWARGS).get() + + mock_helper.prompt_responder.assert_called_once_with( + id=None, + tool_id="tool-123", + org_id="org-456", + user_id="user-789", + document_id="doc-abc", + run_id="run-def", + profile_manager_id=None, + ) + + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_state_store_cleared(self, mock_helper, mock_emit): + mock_helper.prompt_responder.return_value = {} + run_fetch_response.apply(kwargs=COMMON_KWARGS).get() + assert StateStore.get(Common.LOG_EVENTS_ID) is None + + +class TestRunSinglePassExtraction: + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_success_returns_response(self, mock_helper, mock_emit): + mock_helper.prompt_responder.return_value = {"output": {"key": "val"}} + + result = run_single_pass_extraction.apply(kwargs=COMMON_KWARGS).get() + + assert result == {"status": "completed", "operation": "single_pass_extraction"} + mock_helper.prompt_responder.assert_called_once_with( + tool_id="tool-123", + org_id="org-456", + user_id="user-789", + document_id="doc-abc", + run_id="run-def", + ) + + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_success_emits_single_pass_event(self, mock_helper, mock_emit): + mock_helper.prompt_responder.return_value = {"data": "ok"} + run_single_pass_extraction.apply(kwargs=COMMON_KWARGS).get() + + data = mock_emit.call_args.kwargs["data"] + assert data["status"] == "completed" + assert data["operation"] == "single_pass_extraction" + + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_failure_emits_error(self, mock_helper, mock_emit): + mock_helper.prompt_responder.side_effect = TypeError("single pass fail") + + with pytest.raises(TypeError, match="single pass fail"): + run_single_pass_extraction.apply(kwargs=COMMON_KWARGS).get() + + data = mock_emit.call_args.kwargs["data"] + assert data["status"] == "failed" + + @patch("utils.log_events._emit_websocket_event") + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") + def test_state_store_cleared(self, mock_helper, mock_emit): + mock_helper.prompt_responder.return_value = {} + run_single_pass_extraction.apply(kwargs=COMMON_KWARGS).get() + assert StateStore.get(Common.LOG_EVENTS_ID) is None + + +# =================================================================== +# Phase 8: View dispatch tests +# =================================================================== +class TestViewsDispatchTasks: + """Verify the three views use dispatch_with_callback, not direct helpers.""" + + def test_index_document_view_dispatches_with_callback(self): + import inspect + + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + source = inspect.getsource(PromptStudioCoreView.index_document) + assert "dispatch_with_callback" in source + assert "ide_index_complete" in source + assert "PromptStudioHelper.index_document(" not in source + assert "HTTP_202_ACCEPTED" in source + + def test_fetch_response_view_dispatches_with_callback(self): + import inspect + + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + source = inspect.getsource(PromptStudioCoreView.fetch_response) + assert "dispatch_with_callback" in source + assert "ide_prompt_complete" in source + assert "PromptStudioHelper.prompt_responder(" not in source + assert "HTTP_202_ACCEPTED" in source + + def test_single_pass_view_dispatches_with_callback(self): + import inspect + + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + source = inspect.getsource(PromptStudioCoreView.single_pass_extraction) + assert "dispatch_with_callback" in source + assert "ide_prompt_complete" in source + assert "PromptStudioHelper.prompt_responder(" not in source + assert "HTTP_202_ACCEPTED" in source + + def test_views_pass_callback_kwargs(self): + import inspect + + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + for method_name in [ + "index_document", + "fetch_response", + "single_pass_extraction", + ]: + source = inspect.getsource(getattr(PromptStudioCoreView, method_name)) + assert "callback_kwargs" in source, f"{method_name} missing callback_kwargs" + assert "executor_task_id" in source, f"{method_name} missing executor_task_id" + + +# =================================================================== +# Phase 9: Polling endpoint tests +# =================================================================== +class TestTaskStatusAction: + def test_task_status_method_exists(self): + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + assert hasattr(PromptStudioCoreView, "task_status") + assert callable(PromptStudioCoreView.task_status) + + def test_task_status_url_registered(self): + from prompt_studio.prompt_studio_core_v2.urls import urlpatterns + + task_status_urls = [ + p + for p in urlpatterns + if hasattr(p, "name") and p.name == "prompt-studio-task-status" + ] + assert len(task_status_urls) >= 1 + url = task_status_urls[0] + assert "" in str(url.pattern) + assert "" in str(url.pattern) + + @patch("prompt_studio.prompt_studio_core_v2.views.AsyncResult", create=True) + def test_task_status_processing(self, mock_async_result): + """Verify processing response for unfinished task.""" + import inspect + + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + source = inspect.getsource(PromptStudioCoreView.task_status) + assert "not result.ready()" in source + assert '"processing"' in source + + @patch("prompt_studio.prompt_studio_core_v2.views.AsyncResult", create=True) + def test_task_status_completed(self, mock_async_result): + """Verify completed response structure.""" + import inspect + + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + source = inspect.getsource(PromptStudioCoreView.task_status) + assert "result.successful()" in source + assert '"completed"' in source + assert "result.result" in source + + @patch("prompt_studio.prompt_studio_core_v2.views.AsyncResult", create=True) + def test_task_status_failed(self, mock_async_result): + """Verify failed response structure.""" + import inspect + + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + source = inspect.getsource(PromptStudioCoreView.task_status) + assert '"failed"' in source + assert "HTTP_500_INTERNAL_SERVER_ERROR" in source + + +# =================================================================== +# Phase 10: ide_prompt_complete callback tests +# =================================================================== +class TestIdePromptComplete: + """Tests for the new ide_prompt_complete Celery callback task.""" + + CALLBACK_KWARGS = { + "log_events_id": "session-room-xyz", + "request_id": "req-001", + "org_id": "org-456", + "operation": "fetch_response", + "run_id": "run-def", + "document_id": "doc-abc", + "prompt_ids": ["p1", "p2"], + "profile_manager_id": "pm-1", + "is_single_pass": False, + "executor_task_id": "exec-task-1", + "tool_id": "tool-123", + "dispatch_time": 0, + } + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_result") + @patch( + "prompt_studio.prompt_studio_output_manager_v2.output_manager_helper" + ".OutputManagerHelper.handle_prompt_output_update" + ) + @patch("prompt_studio.prompt_studio_v2.models.ToolStudioPrompt") + def test_success_logs_output_keys( + self, mock_model, mock_output_helper, mock_emit, caplog + ): + """Verifies Fix 4: ide_prompt_complete logs output_keys on success.""" + mock_model.objects.filter.return_value.order_by.return_value = [] + mock_output_helper.return_value = {"some": "response"} + + result_dict = { + "success": True, + "data": { + "output": {"field_a": "val1", "field_b": "val2"}, + "metadata": {}, + }, + } + + import logging + + with caplog.at_level(logging.INFO): + result = ide_prompt_complete( + result_dict, callback_kwargs=self.CALLBACK_KWARGS + ) + + assert result["status"] == "completed" + assert any("ide_prompt_complete" in msg for msg in caplog.messages) + assert any("output_keys" in msg for msg in caplog.messages) + mock_emit.assert_called_once() + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_error") + def test_executor_failure_emits_error(self, mock_emit_error): + """When executor reports failure, emit error and return failed status.""" + result_dict = { + "success": False, + "error": "LLM timeout", + } + + result = ide_prompt_complete(result_dict, callback_kwargs=self.CALLBACK_KWARGS) + + assert result == {"status": "failed", "error": "LLM timeout"} + mock_emit_error.assert_called_once() + call_args = mock_emit_error.call_args + assert call_args[0][3] == "LLM timeout" # error message arg + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_result") + @patch( + "prompt_studio.prompt_studio_output_manager_v2.output_manager_helper" + ".OutputManagerHelper.handle_prompt_output_update" + ) + @patch("prompt_studio.prompt_studio_v2.models.ToolStudioPrompt") + def test_state_store_cleared_after_success( + self, mock_model, mock_output_helper, mock_emit + ): + """StateStore should be cleaned up after callback completes.""" + mock_model.objects.filter.return_value.order_by.return_value = [] + mock_output_helper.return_value = {} + + result_dict = { + "success": True, + "data": {"output": {}, "metadata": {}}, + } + + ide_prompt_complete(result_dict, callback_kwargs=self.CALLBACK_KWARGS) + + assert StateStore.get(Common.LOG_EVENTS_ID) is None + assert StateStore.get(Common.REQUEST_ID) is None diff --git a/backend/prompt_studio/prompt_studio_core_v2/urls.py b/backend/prompt_studio/prompt_studio_core_v2/urls.py index f5db3bb337..86cbb97dd3 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/urls.py +++ b/backend/prompt_studio/prompt_studio_core_v2/urls.py @@ -33,6 +33,9 @@ prompt_studio_adapter_choices = PromptStudioCoreView.as_view( {"get": "get_adapter_choices"} ) +prompt_studio_bulk_fetch_response = PromptStudioCoreView.as_view( + {"post": "bulk_fetch_response"} +) prompt_studio_single_pass_extraction = PromptStudioCoreView.as_view( {"post": "single_pass_extraction"} ) @@ -61,6 +64,8 @@ {"get": "check_deployment_usage"} ) +prompt_studio_task_status = PromptStudioCoreView.as_view({"get": "task_status"}) + urlpatterns = format_suffix_patterns( [ @@ -105,6 +110,11 @@ prompt_studio_prompt_response, name="prompt-studio-prompt-response", ), + path( + "prompt-studio/bulk_fetch_response/", + prompt_studio_bulk_fetch_response, + name="prompt-studio-bulk-fetch-response", + ), path( "prompt-studio/adapter-choices/", prompt_studio_adapter_choices, @@ -150,5 +160,10 @@ prompt_studio_deployment_usage, name="prompt_studio_deployment_usage", ), + path( + "prompt-studio//task-status/", + prompt_studio_task_status, + name="prompt-studio-task-status", + ), ] ) diff --git a/backend/prompt_studio/prompt_studio_core_v2/views.py b/backend/prompt_studio/prompt_studio_core_v2/views.py index 014f825d17..5df55a2d86 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/views.py +++ b/backend/prompt_studio/prompt_studio_core_v2/views.py @@ -1,12 +1,15 @@ import json import logging +import time import uuid from datetime import datetime +from pathlib import Path from typing import Any import magic from account_v2.custom_exceptions import DuplicateData from api_v2.models import APIDeployment +from celery import signature from django.db import IntegrityError from django.db.models import QuerySet from django.http import HttpRequest, HttpResponse @@ -44,7 +47,6 @@ ) from prompt_studio.prompt_studio_core_v2.exceptions import ( DeploymentUsageCheckError, - IndexingAPIError, MaxProfilesReachedError, ToolDeleteError, ) @@ -58,7 +60,6 @@ PromptStudioDocumentHelper, ) from prompt_studio.prompt_studio_index_manager_v2.models import IndexManager -from prompt_studio.prompt_studio_output_manager_v2.models import PromptStudioOutputManager from prompt_studio.prompt_studio_registry_v2.models import PromptStudioRegistry from prompt_studio.prompt_studio_registry_v2.prompt_studio_registry_helper import ( PromptStudioRegistryHelper, @@ -299,7 +300,7 @@ def get_select_choices(self, request: HttpRequest) -> Response: select_choices: dict[str, Any] = PromptStudioHelper.get_select_fields() return Response(select_choices, status=status.HTTP_200_OK) except Exception as e: - logger.error(f"Error occured while fetching select fields {e}") + logger.error("Error occurred while fetching select fields: %s", e) return Response(select_choices, status=status.HTTP_204_NO_CONTENT) @action(detail=True, methods=["get"]) @@ -320,7 +321,7 @@ def get_retrieval_strategies(self, request: HttpRequest, pk: Any = None) -> Resp strategies = get_retrieval_strategy_metadata() return Response(strategies, status=status.HTTP_200_OK) except Exception as e: - logger.error(f"Error occurred while fetching retrieval strategies: {e}") + logger.error("Error occurred while fetching retrieval strategies: %s", e) return Response( {"error": "Failed to fetch retrieval strategies"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -365,6 +366,10 @@ def make_profile_default(self, request: HttpRequest, pk: Any = None) -> Response def index_document(self, request: HttpRequest, pk: Any = None) -> Response: """API Entry point method to index input file. + Builds the full execution payload (ORM work), then fires a + single executor task with Celery link/link_error callbacks. + The backend worker slot is freed immediately. + Args: request (HttpRequest) @@ -381,10 +386,9 @@ def index_document(self, request: HttpRequest, pk: Any = None) -> Response: document_id: str = serializer.validated_data.get(ToolStudioPromptKeys.DOCUMENT_ID) document: DocumentManager = DocumentManager.objects.get(pk=document_id) file_name: str = document.document_name - # Generate a run_id run_id = CommonUtils.generate_uuid() - unique_id = PromptStudioHelper.index_document( + context, cb_kwargs = PromptStudioHelper.build_index_payload( tool_id=str(tool.tool_id), file_name=file_name, org_id=UserSessionUtils.get_organization_id(request), @@ -392,93 +396,388 @@ def index_document(self, request: HttpRequest, pk: Any = None) -> Response: document_id=document_id, run_id=run_id, ) - if unique_id: - return Response( - {"message": "Document indexed successfully."}, - status=status.HTTP_200_OK, + + dispatcher = PromptStudioHelper._get_dispatcher() + + # Pre-generate task ID so callbacks can reference it + executor_task_id = str(uuid.uuid4()) + cb_kwargs["executor_task_id"] = executor_task_id + + # Mark as indexing in progress — placed here so the except block + # below can clean up the lock if dispatch_with_callback fails. + DocumentIndexingService.set_document_indexing( + org_id=cb_kwargs["org_id"], + user_id=cb_kwargs["user_id"], + doc_id_key=cb_kwargs["doc_id_key"], + ) + + try: + task = dispatcher.dispatch_with_callback( + context, + on_success=signature( + "ide_index_complete", + kwargs={"callback_kwargs": cb_kwargs}, + queue="ide_callback", + ), + on_error=signature( + "ide_index_error", + kwargs={"callback_kwargs": cb_kwargs}, + queue="ide_callback", + ), + task_id=executor_task_id, ) - else: - logger.error("Error occured while indexing. Unique ID is not valid.") - raise IndexingAPIError() + except Exception: + DocumentIndexingService.remove_document_indexing( + org_id=cb_kwargs["org_id"], + user_id=cb_kwargs["user_id"], + doc_id_key=cb_kwargs["doc_id_key"], + ) + raise + return Response( + {"task_id": task.id, "run_id": run_id, "status": "accepted"}, + status=status.HTTP_202_ACCEPTED, + ) @action(detail=True, methods=["post"]) def fetch_response(self, request: HttpRequest, pk: Any = None) -> Response: """API Entry point method to fetch response to prompt. - Args: - request (HttpRequest): _description_ + Builds the full execution payload (ORM work), then fires a + single executor task with Celery link/link_error callbacks. - Raises: - FilenameMissingError: _description_ + Args: + request (HttpRequest) Returns: Response """ custom_tool = self.get_object() - tool_id: str = str(custom_tool.tool_id) document_id: str = request.data.get(ToolStudioPromptKeys.DOCUMENT_ID) - id: str = request.data.get(ToolStudioPromptKeys.ID) + prompt_id: str = request.data.get(ToolStudioPromptKeys.ID) run_id: str = request.data.get(ToolStudioPromptKeys.RUN_ID) - profile_manager: str = request.data.get(ToolStudioPromptKeys.PROFILE_MANAGER_ID) + profile_manager_id: str = request.data.get( + ToolStudioPromptKeys.PROFILE_MANAGER_ID + ) if not run_id: - # Generate a run_id run_id = CommonUtils.generate_uuid() - # Check output count before prompt run for HubSpot notification - # Filter through tool FK to scope by organization (PromptStudioOutputManager - # lacks DefaultOrganizationManagerMixin) - output_count_before = PromptStudioOutputManager.objects.filter( + org_id = UserSessionUtils.get_organization_id(request) + user_id = custom_tool.created_by.user_id + + # Resolve prompt — guard against missing / stale prompt_id + if not prompt_id: + return Response( + {"error": "prompt id is required."}, + status=status.HTTP_400_BAD_REQUEST, + ) + try: + prompt = ToolStudioPrompt.objects.get(pk=prompt_id) + except ToolStudioPrompt.DoesNotExist: + return Response( + {"error": f"Prompt {prompt_id} not found."}, + status=status.HTTP_404_NOT_FOUND, + ) + + # Build file path + doc_path = PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( + org_id, + is_create=False, + user_id=user_id, + tool_id=str(custom_tool.tool_id), + ) + document: DocumentManager = DocumentManager.objects.get(pk=document_id) + doc_path = str(Path(doc_path) / document.document_name) + + context, cb_kwargs = PromptStudioHelper.build_fetch_response_payload( + tool=custom_tool, + doc_path=doc_path, + doc_name=document.document_name, + prompt=prompt, + org_id=org_id, + user_id=user_id, + document_id=document_id, + run_id=run_id, + profile_manager_id=profile_manager_id, + ) + + # If document is being indexed, return pending status + if context is None: + return Response(cb_kwargs, status=status.HTTP_202_ACCEPTED) + + # Capture HubSpot first-run state before dispatch so the callback + # can fire the PROMPT_RUN analytics event on success. + from prompt_studio.prompt_studio_output_manager_v2.models import ( + PromptStudioOutputManager, + ) + + cb_kwargs["hubspot_user_id"] = request.user.pk + cb_kwargs["is_first_prompt_run"] = not PromptStudioOutputManager.objects.filter( tool_id__in=CustomTool.objects.values_list("tool_id", flat=True) - ).count() + ).exists() + + dispatcher = PromptStudioHelper._get_dispatcher() + + executor_task_id = str(uuid.uuid4()) + cb_kwargs["executor_task_id"] = executor_task_id + cb_kwargs["dispatch_time"] = time.time() + + task = dispatcher.dispatch_with_callback( + context, + on_success=signature( + "ide_prompt_complete", + kwargs={"callback_kwargs": cb_kwargs}, + queue="ide_callback", + ), + on_error=signature( + "ide_prompt_error", + kwargs={"callback_kwargs": cb_kwargs}, + queue="ide_callback", + ), + task_id=executor_task_id, + ) + return Response( + {"task_id": task.id, "run_id": run_id, "status": "accepted"}, + status=status.HTTP_202_ACCEPTED, + ) - response: dict[str, Any] = PromptStudioHelper.prompt_responder( - id=id, - tool_id=tool_id, - org_id=UserSessionUtils.get_organization_id(request), - user_id=custom_tool.created_by.user_id, + @action(detail=True, methods=["post"]) + def bulk_fetch_response(self, request: HttpRequest, pk: Any = None) -> Response: + """Bulk fetch_response: accept multiple prompt IDs, extract and index + once, then dispatch a single executor task for all prompts. + + Prevents the "Document being indexed" race when the frontend fires + N individual fetch_response requests concurrently on an unindexed + document. + """ + custom_tool = self.get_object() + prompt_ids = request.data.get("prompt_ids", []) + if not prompt_ids: + return Response( + {"error": "prompt_ids is required and must be non-empty."}, + status=status.HTTP_400_BAD_REQUEST, + ) + document_id: str = request.data.get(ToolStudioPromptKeys.DOCUMENT_ID) + run_id: str = request.data.get(ToolStudioPromptKeys.RUN_ID) + profile_manager_id: str = request.data.get( + ToolStudioPromptKeys.PROFILE_MANAGER_ID + ) + if not run_id: + run_id = CommonUtils.generate_uuid() + + org_id = UserSessionUtils.get_organization_id(request) + user_id = custom_tool.created_by.user_id + + prompts = list( + ToolStudioPrompt.objects.filter(prompt_id__in=prompt_ids).order_by( + "sequence_number" + ) + ) + if not prompts: + return Response( + {"error": "No matching prompts found for the provided prompt_ids."}, + status=status.HTTP_404_NOT_FOUND, + ) + + doc_path = PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( + org_id, + is_create=False, + user_id=user_id, + tool_id=str(custom_tool.tool_id), + ) + if not document_id: + return Response( + {"error": "document_id is required."}, + status=status.HTTP_400_BAD_REQUEST, + ) + try: + document: DocumentManager = DocumentManager.objects.get(pk=document_id) + except DocumentManager.DoesNotExist: + return Response( + {"error": f"Document {document_id} not found."}, + status=status.HTTP_404_NOT_FOUND, + ) + doc_path = str(Path(doc_path) / document.document_name) + + context, cb_kwargs = PromptStudioHelper.build_bulk_fetch_response_payload( + tool=custom_tool, + doc_path=doc_path, + doc_name=document.document_name, + prompts=prompts, + org_id=org_id, + user_id=user_id, document_id=document_id, run_id=run_id, - profile_manager_id=profile_manager, + profile_manager_id=profile_manager_id, ) - # Notify HubSpot about first prompt run - notify_hubspot_event( - user=request.user, - event_name="PROMPT_RUN", - is_first_for_org=output_count_before == 0, - action_label="prompt run", + if context is None: + return Response(cb_kwargs, status=status.HTTP_202_ACCEPTED) + + # Capture HubSpot first-run state before dispatch so the callback + # can fire the PROMPT_RUN analytics event on success. + from prompt_studio.prompt_studio_output_manager_v2.models import ( + PromptStudioOutputManager, ) - return Response(response, status=status.HTTP_200_OK) + cb_kwargs["hubspot_user_id"] = request.user.pk + cb_kwargs["is_first_prompt_run"] = not PromptStudioOutputManager.objects.filter( + tool_id__in=CustomTool.objects.values_list("tool_id", flat=True) + ).exists() + + dispatcher = PromptStudioHelper._get_dispatcher() + + executor_task_id = str(uuid.uuid4()) + cb_kwargs["executor_task_id"] = executor_task_id + cb_kwargs["dispatch_time"] = time.time() + + task = dispatcher.dispatch_with_callback( + context, + on_success=signature( + "ide_prompt_complete", + kwargs={"callback_kwargs": cb_kwargs}, + queue="ide_callback", + ), + on_error=signature( + "ide_prompt_error", + kwargs={"callback_kwargs": cb_kwargs}, + queue="ide_callback", + ), + task_id=executor_task_id, + ) + return Response( + {"task_id": task.id, "run_id": run_id, "status": "accepted"}, + status=status.HTTP_202_ACCEPTED, + ) @action(detail=True, methods=["post"]) def single_pass_extraction(self, request: HttpRequest, pk: uuid) -> Response: - """API Entry point method to fetch response to prompt. + """API Entry point method for single pass extraction. + + Builds the full execution payload (ORM work), then fires a + single executor task with Celery link/link_error callbacks. Args: - request (HttpRequest): _description_ - pk (Any): Primary key of the CustomTool + request (HttpRequest) + pk: Primary key of the CustomTool Returns: Response """ - # TODO: Handle fetch_response and single_pass_ - # extraction using common function custom_tool = self.get_object() - tool_id: str = str(custom_tool.tool_id) document_id: str = request.data.get(ToolStudioPromptKeys.DOCUMENT_ID) run_id: str = request.data.get(ToolStudioPromptKeys.RUN_ID) if not run_id: - # Generate a run_id run_id = CommonUtils.generate_uuid() - response: dict[str, Any] = PromptStudioHelper.prompt_responder( - tool_id=tool_id, - org_id=UserSessionUtils.get_organization_id(request), - user_id=custom_tool.created_by.user_id, + + org_id = UserSessionUtils.get_organization_id(request) + user_id = custom_tool.created_by.user_id + + # Build file path + doc_path = PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( + org_id, + is_create=False, + user_id=user_id, + tool_id=str(custom_tool.tool_id), + ) + document: DocumentManager = DocumentManager.objects.get(pk=document_id) + doc_path = str(Path(doc_path) / document.document_name) + + # Fetch prompts eligible for single-pass extraction. + # Mirrors the filtering in _execute_prompts_in_single_pass: + # only active, non-NOTES, non-TABLE/RECORD prompts. + prompts = list( + ToolStudioPrompt.objects.filter(tool_id=custom_tool.tool_id).order_by( + "sequence_number" + ) + ) + prompts = [ + p + for p in prompts + if p.prompt_type != ToolStudioPromptKeys.NOTES + and p.active + and p.enforce_type != ToolStudioPromptKeys.TABLE + and p.enforce_type != ToolStudioPromptKeys.RECORD + ] + if not prompts: + return Response( + {"error": "No active prompts found for single pass extraction."}, + status=status.HTTP_400_BAD_REQUEST, + ) + + context, cb_kwargs = PromptStudioHelper.build_single_pass_payload( + tool=custom_tool, + doc_path=doc_path, + doc_name=document.document_name, + prompts=prompts, + org_id=org_id, + user_id=user_id, document_id=document_id, run_id=run_id, ) - return Response(response, status=status.HTTP_200_OK) + + dispatcher = PromptStudioHelper._get_dispatcher() + + executor_task_id = str(uuid.uuid4()) + cb_kwargs["executor_task_id"] = executor_task_id + cb_kwargs["dispatch_time"] = time.time() + + task = dispatcher.dispatch_with_callback( + context, + on_success=signature( + "ide_prompt_complete", + kwargs={"callback_kwargs": cb_kwargs}, + queue="ide_callback", + ), + on_error=signature( + "ide_prompt_error", + kwargs={"callback_kwargs": cb_kwargs}, + queue="ide_callback", + ), + task_id=executor_task_id, + ) + return Response( + {"task_id": task.id, "run_id": run_id, "status": "accepted"}, + status=status.HTTP_202_ACCEPTED, + ) + + @action(detail=True, methods=["get"]) + def task_status( + self, request: HttpRequest, pk: Any = None, task_id: str = None + ) -> Response: + """Poll the status of an async Prompt Studio task. + + Task IDs now point to executor worker tasks dispatched via the + worker-v2 Celery app. Both apps share the same PostgreSQL + result backend, so we use the worker app to look up results. + + Args: + request (HttpRequest) + pk: Primary key of the CustomTool (for permission check) + task_id: Celery task ID returned by the 202 response + + Returns: + Response with {task_id, status} and optionally result or error + """ + from celery.result import ( + AsyncResult, # Lazy import: Celery not needed for non-async views + ) + + from backend.worker_celery import ( + get_worker_celery_app, # Lazy import: avoids Celery init on module load + ) + + # Verify the user has access to this tool (triggers permission check) + self.get_object() + + result = AsyncResult(task_id, app=get_worker_celery_app()) + if not result.ready(): + return Response({"task_id": task_id, "status": "processing"}) + if result.successful(): + return Response({"task_id": task_id, "status": "completed"}) + return Response( + {"task_id": task_id, "status": "failed", "error": str(result.result)}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) @action(detail=True, methods=["get"]) def list_of_shared_users(self, request: HttpRequest, pk: Any = None) -> Response: @@ -583,7 +882,7 @@ def fetch_contents_ide(self, request: HttpRequest, pk: Any = None) -> Response: except (FileNotFoundError, FileNotFound): pass # No converted file — fall through to return original except Exception: - logger.exception(f"Error fetching converted file: {converted_name}") + logger.exception("Error fetching converted file: %s", converted_name) try: contents = PromptStudioFileHelper.fetch_file_contents( @@ -640,7 +939,9 @@ def upload_for_ide(self, request: HttpRequest, pk: Any = None) -> Response: file_data = uploaded_file # else: CSV/TXT/Excel — file_data stays as original, no conversion - logger.info(f"Uploading file: {file_name}" if file_name else "Uploading file") + logger.info("Uploading file: %s", file_name) if file_name else logger.info( + "Uploading file" + ) # Store original file in main dir (always the original) PromptStudioFileHelper.upload_for_ide( @@ -706,7 +1007,7 @@ def delete_for_ide(self, request: HttpRequest, pk: uuid) -> Response: status=status.HTTP_200_OK, ) except Exception as exc: - logger.error(f"Exception thrown from file deletion, error {exc}") + logger.error("Exception thrown from file deletion, error: %s", exc) return Response( {"data": "File deletion failed."}, status=status.HTTP_400_BAD_REQUEST, @@ -780,7 +1081,7 @@ def export_project(self, request: Request, pk: Any = None) -> HttpResponse: return response except Exception as exc: - logger.error(f"Error exporting project: {exc}") + logger.error("Error exporting project: %s", exc) return Response( {"error": "Failed to export project"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -819,7 +1120,7 @@ def import_project(self, request: Request) -> Response: status=status.HTTP_400_BAD_REQUEST, ) except Exception as e: - logger.error(f"Error creating profile manager: {e}") + logger.error("Error creating profile manager: %s", e) return Response( {"error": "Failed to create profile manager"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -847,7 +1148,7 @@ def import_project(self, request: Request) -> Response: return Response(response_data, status=status.HTTP_201_CREATED) except Exception as exc: - logger.error(f"Error importing project: {exc}") + logger.error("Error importing project: %s", exc) return Response( {"error": "Failed to import project"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -939,7 +1240,7 @@ def check_deployment_usage(self, request: Request, pk: Any = None) -> Response: return Response(deployment_info, status=status.HTTP_200_OK) except Exception as e: - logger.error(f"Error checking deployment usage for tool {pk}: {e}") + logger.error("Error checking deployment usage for tool %s: %s", pk, e) raise DeploymentUsageCheckError( detail=f"Failed to check deployment usage: {str(e)}" ) diff --git a/backend/usage_v2/helper.py b/backend/usage_v2/helper.py index 0b211514ab..c11949356e 100644 --- a/backend/usage_v2/helper.py +++ b/backend/usage_v2/helper.py @@ -102,6 +102,64 @@ def get_aggregated_token_count(run_id: str) -> dict: logger.error(f"An unexpected error occurred for run_id {run_id}: {str(e)}") raise APIException("Error while aggregating token counts") + @staticmethod + def get_usage_by_model(run_id: str) -> dict[str, list[dict[str, Any]]]: + """Get per-model usage breakdown matching prompt-service format. + + Groups usage data by (usage_type, llm_usage_reason, model_name) and + returns cost arrays keyed as 'extraction_llm', 'challenge_llm', + 'embedding', etc. — matching the legacy prompt-service response. + + Args: + run_id: The file_execution_id / run_id to query. + + Returns: + Dict with keys like 'extraction_llm', 'embedding' mapping to + lists of per-model cost entries. Empty dict on error. + """ + try: + rows = ( + Usage.objects.filter(run_id=run_id) + .values("usage_type", "llm_usage_reason", "model_name") + .annotate( + sum_input_tokens=Sum("prompt_tokens"), + sum_output_tokens=Sum("completion_tokens"), + sum_total_tokens=Sum("total_tokens"), + sum_embedding_tokens=Sum("embedding_tokens"), + sum_cost=Sum("cost_in_dollars"), + ) + ) + result: dict[str, list[dict[str, Any]]] = {} + for row in rows: + usage_type = row["usage_type"] + llm_reason = row["llm_usage_reason"] + cost_str = UsageHelper._format_float_positional(row["sum_cost"] or 0.0) + + key = usage_type + item: dict[str, Any] = { + "model_name": row["model_name"], + "cost_in_dollars": cost_str, + } + if llm_reason: + key = f"{llm_reason}_{usage_type}" + item["input_tokens"] = row["sum_input_tokens"] or 0 + item["output_tokens"] = row["sum_output_tokens"] or 0 + item["total_tokens"] = row["sum_total_tokens"] or 0 + else: + item["embedding_tokens"] = row["sum_embedding_tokens"] or 0 + + result.setdefault(key, []).append(item) + return result + except Exception as e: + logger.error("Error querying per-model usage for run_id %s: %s", run_id, e) + return {} + + @staticmethod + def _format_float_positional(value: float, precision: int = 10) -> str: + """Format float without scientific notation, stripping trailing zeros.""" + formatted: str = f"{value:.{precision}f}" + return formatted.rstrip("0").rstrip(".") if "." in formatted else formatted + @staticmethod def aggregate_usage_metrics(queryset: QuerySet) -> dict[str, Any]: """Aggregate usage metrics from a queryset of Usage objects. diff --git a/backend/workflow_manager/workflow_v2/dto.py b/backend/workflow_manager/workflow_v2/dto.py index b2398e883e..c567166a2e 100644 --- a/backend/workflow_manager/workflow_v2/dto.py +++ b/backend/workflow_manager/workflow_v2/dto.py @@ -29,6 +29,32 @@ def __init__(self, input_dict: dict[str, Any]) -> None: @dataclass class ExecutionResponse: + """DTO representing the response from a workflow/tool execution. + + Attributes: + workflow_id: UUID of the workflow that was executed. + execution_id: UUID of the specific execution run. + execution_status: Current status (e.g. "PENDING", "EXECUTING", + "COMPLETED", "ERROR"). + log_id: Optional ID for the associated execution log stream. + status_api: Optional URL/path for polling execution status. + error: Human-readable error message if execution failed. + mode: Execution mode (e.g. "DEFAULT", "QUEUE"). + result: Execution output — a list of per-file result dicts, each + containing: + - ``"file"`` (str): Source file identifier. + - ``"result"`` (dict): Extracted output with keys ``"output"`` + (the structured extraction), ``"metadata"`` (highlight data, + per-model usage, confidence scores), and ``"metrics"`` + (timing, token counts). + - ``"metadata"`` (dict): Workflow-level identifiers + (``source_name``, ``source_hash``, ``workflow_id``, etc.). + - ``"error"`` (str | None): Per-file error if extraction failed. + message: Optional human-readable status message. + result_acknowledged: Whether the caller has acknowledged/consumed + the result (used by async polling flows). + """ + workflow_id: str execution_id: str execution_status: str @@ -48,6 +74,19 @@ def __post_init__(self) -> None: self.message = self.message or None self.status_api = self.status_api or None + @staticmethod + def _remove_item_top_metadata(item: dict, keys_to_remove: list[str]) -> None: + """Remove metadata keys from top-level item['metadata'].""" + if "metadata" not in item: + return + if keys_to_remove: + item_metadata = item["metadata"] + if isinstance(item_metadata, dict): + for key in keys_to_remove: + item_metadata.pop(key, None) + else: + item.pop("metadata", None) + def remove_result_metadata_keys(self, keys_to_remove: list[str] = []) -> None: """Removes specified keys from the 'metadata' dictionary within each 'result' dictionary in the 'result' list attribute of the instance. If @@ -61,13 +100,34 @@ def remove_result_metadata_keys(self, keys_to_remove: list[str] = []) -> None: for item in self.result: if not isinstance(item, dict): - break + continue + # Handle metadata nested inside item["result"]["metadata"] result = item.get("result") - if not isinstance(result, dict): - break + if isinstance(result, dict): + self._remove_specific_keys(result=result, keys_to_remove=keys_to_remove) + + # Handle top-level item["metadata"] (workers cache path) + self._remove_item_top_metadata(item, keys_to_remove) + + def remove_inner_result_metadata(self) -> None: + """Removes only the inner item["result"]["metadata"] dict (extraction + metadata like highlight_data, per-model costs, etc.) while preserving + the outer item["metadata"] dict which contains workflow identification + keys (source_name, source_hash, workflow_id, etc.). - self._remove_specific_keys(result=result, keys_to_remove=keys_to_remove) + Use this instead of remove_result_metadata_keys() when you want to + strip extraction metadata but keep workflow identification metadata. + """ + if not isinstance(self.result, list): + return + + for item in self.result: + if not isinstance(item, dict): + continue + result = item.get("result") + if isinstance(result, dict): + result.pop("metadata", None) def remove_result_metrics(self) -> None: """Removes the 'metrics' key from the 'result' dictionary within each diff --git a/backend/workflow_manager/workflow_v2/workflow_helper.py b/backend/workflow_manager/workflow_v2/workflow_helper.py index 4dce7b7938..ee8c6ce931 100644 --- a/backend/workflow_manager/workflow_v2/workflow_helper.py +++ b/backend/workflow_manager/workflow_v2/workflow_helper.py @@ -592,6 +592,46 @@ def execute_workflow_async( error=str(error), ) + @staticmethod + @celery_app.task( + name="async_execute_bin", + autoretry_for=(Exception,), + max_retries=0, + retry_backoff=True, + retry_backoff_max=500, + retry_jitter=True, + ) + def execute_bin( + schema_name: str, + workflow_id: str, + execution_id: str, + hash_values_of_files: dict[str, dict[str, Any]], + scheduled: bool = False, + execution_mode: tuple[str, str] | None = None, + pipeline_id: str | None = None, + use_file_history: bool = True, + **kwargs: dict[str, Any], + ) -> list[Any] | None: + """Celery task entry point for async workflow execution. + + Dispatches to execute_workflow which builds and sends the + chord to v2 file_processing / callback workers. + """ + task_id = current_task.request.id + StateStore.set(Account.ORGANIZATION_ID, schema_name) + return WorkflowHelper.execute_workflow( + organization_id=schema_name, + task_id=task_id, + workflow_id=workflow_id, + execution_id=execution_id, + hash_values_of_files=hash_values_of_files, + scheduled=scheduled, + execution_mode=execution_mode, + pipeline_id=pipeline_id, + use_file_history=use_file_history, + **kwargs, + ) + @staticmethod def execute_workflow( organization_id: str, diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 6f1996818a..3845253b16 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -57,6 +57,29 @@ services: labels: - traefik.enable=false + # Celery worker for Prompt Studio IDE callbacks. + # Processes post-execution callbacks via InternalAPIClient (no Django). + # Handles: ide_index_complete/error, ide_prompt_complete/error. + worker-ide-callback: + image: unstract/worker-unified:${VERSION} + container_name: unstract-worker-ide-callback + restart: unless-stopped + command: ["ide_callback"] + env_file: + - ../workers/.env + - ./essentials.env + depends_on: + - db + - redis + - rabbitmq + environment: + - ENVIRONMENT=development + - APPLICATION_NAME=unstract-worker-ide-callback + - WORKER_TYPE=ide_callback + - WORKER_NAME=ide-callback-worker + labels: + - traefik.enable=false + # Celery Flower celery-flower: image: unstract/backend:${VERSION} @@ -483,6 +506,41 @@ services: - ./workflow_data:/data - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + worker-executor-v2: + image: unstract/worker-unified:${VERSION} + container_name: unstract-worker-executor-v2 + restart: unless-stopped + command: ["executor"] + ports: + - "8092:8088" + env_file: + - ../workers/.env + - ./essentials.env + depends_on: + - db + - redis + - rabbitmq + - platform-service + environment: + - ENVIRONMENT=development + - APPLICATION_NAME=unstract-worker-executor-v2 + - WORKER_TYPE=executor + - WORKER_NAME=executor-worker-v2 + - EXECUTOR_METRICS_PORT=8088 + - HEALTH_PORT=8088 + # Configurable Celery options + - CELERY_QUEUES_EXECUTOR=${CELERY_QUEUES_EXECUTOR:-celery_executor_legacy} + - CELERY_POOL=${WORKER_EXECUTOR_POOL:-prefork} + - CELERY_PREFETCH_MULTIPLIER=${WORKER_EXECUTOR_PREFETCH_MULTIPLIER:-1} + - CELERY_CONCURRENCY=${WORKER_EXECUTOR_CONCURRENCY:-2} + - CELERY_EXTRA_ARGS=${WORKER_EXECUTOR_EXTRA_ARGS:-} + labels: + - traefik.enable=false + volumes: + - ./workflow_data:/data + - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - prompt_studio_data:/app/prompt-studio-data + volumes: prompt_studio_data: unstract_data: diff --git a/docker/dockerfiles/worker-unified.Dockerfile b/docker/dockerfiles/worker-unified.Dockerfile index e4db729308..133a3be2f9 100644 --- a/docker/dockerfiles/worker-unified.Dockerfile +++ b/docker/dockerfiles/worker-unified.Dockerfile @@ -71,8 +71,12 @@ COPY ${BUILD_CONTEXT_PATH}/ ./ # Set shell with pipefail for proper error handling in pipes SHELL ["/bin/bash", "-o", "pipefail", "-c"] -# Install project and OpenTelemetry instrumentation (as root to avoid permission issues) -# No symlinks needed - PYTHONPATH handles the paths correctly +# Install project, OpenTelemetry instrumentation, and executor plugins. +# No symlinks needed - PYTHONPATH handles the paths correctly. +# Executor plugins (cloud-only, no-op for OSS) register via setuptools entry points: +# - unstract.executor.executors (executor classes, e.g. table_extractor) +# - unstract.executor.plugins (utility plugins, e.g. highlight-data, challenge) +# Editable installs (-e) ensure Path(__file__) resolves to the source directory. RUN uv sync --group deploy --locked && \ uv run opentelemetry-bootstrap -a requirements | uv pip install --requirement - && \ # Use OpenTelemetry v1 - v2 breaks LiteLLM with instrumentation enabled @@ -80,7 +84,14 @@ RUN uv sync --group deploy --locked && \ uv pip install opentelemetry-instrumentation-openai && \ { chmod +x ./run-worker.sh ./run-worker-docker.sh 2>/dev/null || true; } && \ touch requirements.txt && \ - { chown -R worker:worker ./run-worker.sh ./run-worker-docker.sh 2>/dev/null || true; } + { chown -R worker:worker ./run-worker.sh ./run-worker-docker.sh 2>/dev/null || true; } && \ + for plugin_dir in /app/plugins/*/; do \ + if [ -f "$plugin_dir/pyproject.toml" ] && \ + grep -qE 'unstract\.executor\.(executors|plugins)' "$plugin_dir/pyproject.toml" 2>/dev/null; then \ + echo "Installing executor plugin: $(basename $plugin_dir)" && \ + uv pip install -e "$plugin_dir" || true; \ + fi; \ + done # Switch to worker user USER worker diff --git a/docker/dockerfiles/worker-unified.Dockerfile.dockerignore b/docker/dockerfiles/worker-unified.Dockerfile.dockerignore index fca472f1f1..110627ea61 100644 --- a/docker/dockerfiles/worker-unified.Dockerfile.dockerignore +++ b/docker/dockerfiles/worker-unified.Dockerfile.dockerignore @@ -51,7 +51,6 @@ Thumbs.db # Documentation **/docs/ -**/*.md !README.md !unstract !unstract/** diff --git a/docker/sample.compose.override.yaml b/docker/sample.compose.override.yaml index 36606f8f2a..e762d45e79 100644 --- a/docker/sample.compose.override.yaml +++ b/docker/sample.compose.override.yaml @@ -320,6 +320,11 @@ services: path: ../workers/uv.lock ######################################################################################################### + # IDE callback worker (workers image, processes ide_callback queue) + worker-ide-callback: + build: + dockerfile: docker/dockerfiles/worker-unified.Dockerfile + context: .. # Celery worker for dashboard metrics (memory optimized: reduced autoscale) worker-metrics: build: diff --git a/docs/local-dev-setup-executor-migration.md b/docs/local-dev-setup-executor-migration.md new file mode 100644 index 0000000000..8bb6921fee --- /dev/null +++ b/docs/local-dev-setup-executor-migration.md @@ -0,0 +1,586 @@ +# Local Dev Setup: Executor Migration (Pluggable Executor System v2) + +> **Branch:** `feat/execution-backend` +> **Date:** 2026-02-19 + +This guide covers everything needed to run and test the executor migration locally. + +--- + +## Table of Contents + +1. [Architecture Overview (Post-Migration)](#1-architecture-overview-post-migration) +2. [Prerequisites](#2-prerequisites) +3. [Service Dependency Map](#3-service-dependency-map) +4. [Step-by-Step Setup](#4-step-by-step-setup) +5. [Environment Configuration](#5-environment-configuration) +6. [Running the Executor Worker](#6-running-the-executor-worker) +7. [Port Reference](#7-port-reference) +8. [Health Check Endpoints](#8-health-check-endpoints) +9. [Debugging & Troubleshooting](#9-debugging--troubleshooting) +10. [Test Verification Checklist](#10-test-verification-checklist) + +--- + +## 1. Architecture Overview (Post-Migration) + +``` +┌──────────────────────────────────────────────────────────────┐ +│ CALLERS │ +│ │ +│ Workflow Path: │ +│ process_file_batch → structure_tool_task │ +│ → ExecutionDispatcher.dispatch() [Celery] │ +│ → AsyncResult.get() │ +│ │ +│ Prompt Studio IDE: │ +│ Django View → PromptStudioHelper │ +│ → ExecutionDispatcher.dispatch() [Celery] │ +│ → AsyncResult.get() │ +└───────────────────────┬──────────────────────────────────────┘ + │ Celery task: execute_extraction + ▼ +┌──────────────────────────────────────────────────────────────┐ +│ EXECUTOR WORKER (dedicated, queue: "executor") │ +│ │ +│ execute_extraction task │ +│ → ExecutionOrchestrator → ExecutorRegistry → LegacyExecutor │ +│ → Returns ExecutionResult via Celery result backend │ +└──────────────────────────────────────────────────────────────┘ +``` + +**What changed:** +- `prompt-service` Flask app is **replaced** by the executor worker (Celery) +- Structure tool Docker container is **replaced** by `structure_tool_task` (Celery task in file_processing worker) +- `PromptTool` SDK HTTP client is **replaced** by `ExecutionDispatcher` (Celery dispatch) +- **No DB schema changes** — no Django migrations needed + +**What stays the same:** +- `platform-service` (port 3001) — still serves tool metadata +- `runner` (port 5002) — still needed for Classifier, Text Extractor, Translate tools +- `x2text-service` (port 3004) — still needed for text extraction +- All adapter SDKs (LLM, Embedding, VectorDB, X2Text) — used by LegacyExecutor via ExecutorToolShim +- Frontend — no changes (same REST API responses) + +--- + +## 2. Prerequisites + +### 2.1 System Requirements + +| Requirement | Minimum | Notes | +|---|---|---| +| Docker + Docker Compose | v2.20+ | `docker compose version` | +| Python | 3.11+ | System or pyenv | +| uv | Latest | `pip install uv` or use the repo-local binary at `backend/venv/bin/uv` | +| Git | 2.30+ | On `feat/execution-backend` branch | +| Free RAM | 8 GB+ | Many services run concurrently | +| Free Disk | 10 GB+ | Docker images + volumes | + +### 2.2 Verify Branch + +```bash +cd /home/harini/Documents/Workspace/unstract-poc/clean/unstract +git branch --show-current +# Expected: feat/execution-backend +``` + +### 2.3 Required Docker Images + +The system needs these images built: + +```bash +# Build all images (from docker/ directory) +cd docker +docker compose -f docker-compose.build.yaml build + +# Or build just the critical ones: +docker compose -f docker-compose.build.yaml build backend +docker compose -f docker-compose.build.yaml build platform-service +docker compose -f docker-compose.build.yaml build worker-unified # V2 workers including executor +docker compose -f docker-compose.build.yaml build runner +docker compose -f docker-compose.build.yaml build frontend +``` + +> **Tip:** For faster dev builds, set `MINIMAL_BUILD=1` in docker-compose.build.yaml args. + +--- + +## 3. Service Dependency Map + +### Essential Infrastructure (must be running for ANYTHING to work) + +| Service | Container | Port | Purpose | +|---|---|---|---| +| PostgreSQL (pgvector) | `unstract-db` | 5432 | Primary database | +| Redis | `unstract-redis` | 6379 | Cache + queues | +| RabbitMQ | `unstract-rabbitmq` | 5672 (AMQP), 15672 (UI) | Celery message broker | +| MinIO | `unstract-minio` | 9000 (S3), 9001 (Console) | Object storage | +| Traefik | `unstract-proxy` | 80, 8080 (Dashboard) | Reverse proxy | + +### Application Services + +| Service | Container | Port | Required For | +|---|---|---|---| +| Backend (Django) | `unstract-backend` | 8000 | API, auth, DB migrations | +| Platform Service | `unstract-platform-service` | 3001 | Tool metadata, adapter configs | +| X2Text Service | `unstract-x2text-service` | 3004 | Text extraction (used by executor) | +| Runner | `unstract-runner` | 5002 | Non-structure tools (Classifier, etc.) | +| Frontend | `unstract-frontend` | 3000 | Web UI | +| Flipt | `unstract-flipt` | 8082 (REST), 9005 (gRPC) | Feature flags | + +### Workers (V2 Unified — `--profile workers-v2`) + +| Worker | Container | Health Port | Queue(s) | +|---|---|---|---| +| **Executor** | `unstract-worker-executor-v2` | 8088 | `executor` | +| File Processing | `unstract-worker-file-processing-v2` | 8082 | `file_processing`, `api_file_processing` | +| API Deployment | `unstract-worker-api-deployment-v2` | 8090 | `celery_api_deployments` | +| Callback | `unstract-worker-callback-v2` | 8083 | `file_processing_callback`, `api_file_processing_callback` | +| General | `unstract-worker-general-v2` | 8082 | `celery` | +| Notification | `unstract-worker-notification-v2` | 8085 | `notifications`, `notifications_*` | +| Log Consumer | `unstract-worker-log-consumer-v2` | 8084 | `celery_log_task_queue` | +| Scheduler | `unstract-worker-scheduler-v2` | 8087 | `scheduler` | + +### Post-Migration: REMOVED Services + +| Service | Port | Replaced By | +|---|---|---| +| ~~Prompt Service~~ | ~~3003~~ | Executor Worker (LegacyExecutor inline) | +| ~~Structure Tool (Docker)~~ | N/A | `structure_tool_task` (Celery) | + +--- + +## 4. Step-by-Step Setup + +### 4.1 Start Essential Infrastructure + +```bash +cd /home/harini/Documents/Workspace/unstract-poc/clean/unstract/docker + +# Start infrastructure services only +docker compose -f docker-compose-dev-essentials.yaml up -d +``` + +Wait for all services to be healthy: +```bash +docker compose -f docker-compose-dev-essentials.yaml ps +``` + +### 4.2 Start Application Services + +**Option A: All via Docker Compose (recommended for first-time setup)** + +```bash +cd docker + +# Start everything including V2 workers (with executor) +docker compose --profile workers-v2 up -d +``` + +**Option B: Hybrid mode (services in Docker, workers local)** + +This is useful when you want to iterate on worker code without rebuilding images. + +```bash +# Start only infrastructure + app services (no V2 workers) +docker compose up -d + +# Then run executor worker locally (see Section 6) +``` + +### 4.3 Verify DB Migrations + +The backend container runs migrations on startup (`--migrate` flag). Verify: + +```bash +docker logs unstract-backend 2>&1 | grep -i "migration" +``` + +### 4.4 Create Workers .env for Local Development + +If running workers outside Docker, you need a local `.env`: + +```bash +cd /home/harini/Documents/Workspace/unstract-poc/clean/unstract/workers + +# Copy sample and adjust for local dev +cp sample.env .env +``` + +Then edit `workers/.env` — change all Docker hostnames to `localhost`: + +```ini +# === CRITICAL CHANGES FOR LOCAL DEV === +DJANGO_APP_BACKEND_URL=http://localhost:8000 +INTERNAL_API_BASE_URL=http://localhost:8000/internal +CELERY_BROKER_BASE_URL=amqp://localhost:5672// +DB_HOST=localhost +REDIS_HOST=localhost +CACHE_REDIS_HOST=localhost +PLATFORM_SERVICE_HOST=http://localhost +PLATFORM_SERVICE_PORT=3001 +PROMPT_HOST=http://localhost +PROMPT_PORT=3003 +X2TEXT_HOST=http://localhost +X2TEXT_PORT=3004 +UNSTRACT_RUNNER_HOST=http://localhost +UNSTRACT_RUNNER_PORT=5002 +WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://localhost:9000", "key": "minio", "secret": "minio123"}}' +API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://localhost:9000", "key": "minio", "secret": "minio123"}}' +``` + +> **Important:** The `INTERNAL_SERVICE_API_KEY` must match what the backend expects. Default dev value: `dev-internal-key-123`. + +--- + +## 5. Environment Configuration + +### 5.1 Key Environment Variables for Executor Worker + +| Variable | Default (Docker) | Local Override | Purpose | +|---|---|---|---| +| `CELERY_BROKER_BASE_URL` | `amqp://unstract-rabbitmq:5672//` | `amqp://localhost:5672//` | RabbitMQ connection | +| `CELERY_BROKER_USER` | `admin` | same | RabbitMQ user | +| `CELERY_BROKER_PASS` | `password` | same | RabbitMQ password | +| `DB_HOST` | `unstract-db` | `localhost` | PostgreSQL for result backend | +| `DB_USER` | `unstract_dev` | same | DB user | +| `DB_PASSWORD` | `unstract_pass` | same | DB password | +| `DB_NAME` | `unstract_db` | same | DB name | +| `DB_PORT` | `5432` | same | DB port | +| `REDIS_HOST` | `unstract-redis` | `localhost` | Redis for caching | +| `PLATFORM_SERVICE_HOST` | `http://unstract-platform-service` | `http://localhost` | Platform service URL | +| `PLATFORM_SERVICE_PORT` | `3001` | same | Platform service port | +| `X2TEXT_HOST` | `http://unstract-x2text-service` | `http://localhost` | X2Text service URL | +| `X2TEXT_PORT` | `3004` | same | X2Text service port | +| `INTERNAL_SERVICE_API_KEY` | `dev-internal-key-123` | same | Worker→Backend auth | +| `INTERNAL_API_BASE_URL` | `http://unstract-backend:8000/internal` | `http://localhost:8000/internal` | Backend internal API | +| `WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS` | (MinIO JSON, Docker host) | (MinIO JSON, localhost) | Shared file storage | + +### 5.2 Credentials Reference (Default Dev) + +| Service | Username | Password | +|---|---|---| +| PostgreSQL | `unstract_dev` | `unstract_pass` | +| RabbitMQ | `admin` | `password` | +| MinIO | `minio` | `minio123` | +| Redis | (none) | (none) | + +### 5.3 Hierarchical Celery Config + +Worker settings use a 3-tier hierarchy (most specific wins): + +1. **Worker-specific:** `EXECUTOR_TASK_TIME_LIMIT=7200` +2. **Global Celery:** `CELERY_TASK_TIME_LIMIT=3600` +3. **Code default:** (hardcoded fallback) + +--- + +## 6. Running the Executor Worker + +### 6.1 Via Docker Compose (easiest) + +```bash +cd docker + +# Start just the executor worker (assumes infra is up) +docker compose --profile workers-v2 up -d worker-executor-v2 + +# Check logs +docker logs -f unstract-worker-executor-v2 +``` + +### 6.2 Locally with run-worker.sh + +```bash +cd /home/harini/Documents/Workspace/unstract-poc/clean/unstract/workers + +# Ensure .env has local overrides (Section 4.4) +./run-worker.sh executor +``` + +Options: +```bash +./run-worker.sh -l DEBUG executor # Debug logging +./run-worker.sh -c 4 executor # 4 concurrent tasks +./run-worker.sh -P threads executor # Thread pool instead of prefork +./run-worker.sh -d executor # Run in background (detached) +./run-worker.sh -s # Show status of all workers +./run-worker.sh -k # Kill all workers +``` + +### 6.3 Locally with uv (manual) + +```bash +cd /home/harini/Documents/Workspace/unstract-poc/clean/unstract/workers + +# Load env +set -a && source .env && set +a + +# Run executor worker +uv run celery -A worker worker \ + --queues=executor \ + --loglevel=INFO \ + --pool=prefork \ + --concurrency=2 \ + --hostname=executor-worker@%h +``` + +### 6.4 Verify Executor Worker is Running + +```bash +# Check health endpoint +curl -s http://localhost:8088/health | python3 -m json.tool + +# Check Celery registered tasks +uv run celery -A worker inspect registered \ + --destination=executor-worker@$(hostname) + +# Expected task: execute_extraction +``` + +### 6.5 Running All V2 Workers + +```bash +# Via Docker +cd docker && docker compose --profile workers-v2 up -d + +# Via script (local) +cd workers && ./run-worker.sh all +``` + +--- + +## 7. Port Reference + +### Infrastructure + +| Service | Port | URL | +|---|---|---| +| PostgreSQL | 5432 | `psql -h localhost -U unstract_dev -d unstract_db` | +| Redis | 6379 | `redis-cli -h localhost` | +| RabbitMQ AMQP | 5672 | `amqp://admin:password@localhost:5672//` | +| RabbitMQ Management | 15672 | http://localhost:15672 (admin/password) | +| MinIO S3 API | 9000 | http://localhost:9000 | +| MinIO Console | 9001 | http://localhost:9001 (minio/minio123) | +| Qdrant | 6333 | http://localhost:6333 | +| Traefik Dashboard | 8080 | http://localhost:8080 | + +### Application + +| Service | Port | URL | +|---|---|---| +| Backend API | 8000 | http://localhost:8000/api/v1/ | +| Frontend | 3000 | http://frontend.unstract.localhost | +| Platform Service | 3001 | http://localhost:3001 | +| X2Text Service | 3004 | http://localhost:3004 | +| Runner | 5002 | http://localhost:5002 | +| Celery Flower (optional) | 5555 | http://localhost:5555 | + +### V2 Worker Health Ports + +| Worker | Internal Port | External Port (Docker) | +|---|---|---| +| API Deployment | 8090 | 8085 | +| Callback | 8083 | 8086 | +| File Processing | 8082 | 8087 | +| General | 8082 | 8088 | +| Notification | 8085 | 8089 | +| Log Consumer | 8084 | 8090 | +| Scheduler | 8087 | 8091 | +| **Executor** | **8088** | **8092** | + +### Debug Ports (Docker dev mode via compose.override.yaml) + +| Service | Debug Port | +|---|---| +| Backend | 5678 | +| Runner | 5679 | +| Platform Service | 5680 | +| Prompt Service | 5681 | +| File Processing Worker | 5682 | +| Callback Worker | 5683 | +| API Deployment Worker | 5684 | +| General Worker | 5685 | + +--- + +## 8. Health Check Endpoints + +Every V2 worker exposes `GET /health` on its health port: + +```bash +# Executor worker +curl -s http://localhost:8088/health + +# Expected response: +# {"status": "healthy", "worker_type": "executor", ...} +``` + +All endpoints: +``` +http://localhost:8080/health — API Deployment worker +http://localhost:8081/health — General worker +http://localhost:8082/health — File Processing worker +http://localhost:8083/health — Callback worker +http://localhost:8084/health — Log Consumer worker +http://localhost:8085/health — Notification worker +http://localhost:8087/health — Scheduler worker +http://localhost:8088/health — Executor worker +``` + +--- + +## 9. Debugging & Troubleshooting + +### 9.1 Common Issues + +**"Connection refused" to RabbitMQ/Redis/DB** +- Check Docker containers are running: `docker ps` +- Check if using Docker hostnames vs localhost (see Section 5.1) +- Ensure ports are exposed: `docker port unstract-rabbitmq` + +**Executor worker starts but tasks don't execute** +- Check queue binding: Worker must listen on `executor` queue +- Check RabbitMQ UI (http://localhost:15672) → Queues tab → look for `executor` queue +- Check task is registered: `celery -A worker inspect registered` +- Check task routing in `workers/shared/infrastructure/config/registry.py` + +**"Module not found" errors in executor worker** +- Ensure `PYTHONPATH` includes the workers directory +- If running locally, `cd workers` before starting +- If using `run-worker.sh`, it sets PYTHONPATH automatically + +**MinIO file access errors** +- Check `WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS` has correct endpoint (localhost vs Docker hostname) +- Verify MinIO bucket exists: `mc ls minio/unstract/` +- MinIO bootstrap container creates the bucket on first start + +**Platform service connection errors** +- Executor needs `PLATFORM_SERVICE_HOST` and `PLATFORM_SERVICE_PORT` +- Verify platform-service is running: `curl http://localhost:3001/health` + +### 9.2 Useful Debug Commands + +```bash +# Check all Docker containers +docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" + +# Check RabbitMQ queues +docker exec unstract-rabbitmq rabbitmqctl list_queues name messages consumers + +# Check Celery worker status (from workers/ dir) +cd workers && uv run celery -A worker inspect active + +# Check registered tasks +cd workers && uv run celery -A worker inspect registered + +# Send a test task to executor +cd workers && uv run python -c " +from worker import app +from shared.enums.task_enums import TaskName +result = app.send_task( + TaskName.EXECUTE_EXTRACTION, + args=[{ + 'executor_name': 'legacy', + 'operation': 'extract', + 'run_id': 'test-123', + 'execution_source': 'tool', + 'executor_params': {} + }], + queue='executor' +) +print(f'Task ID: {result.id}') +print(f'Result: {result.get(timeout=30)}') +" + +# Monitor Celery events in real-time +cd workers && uv run celery -A worker events + +# Check Postgres (Celery result backend) +docker exec -it unstract-db psql -U unstract_dev -d unstract_db -c "SELECT task_id, status FROM public.celery_taskmeta ORDER BY date_done DESC LIMIT 10;" +``` + +### 9.3 Log Locations + +| Context | Location | +|---|---| +| Docker container | `docker logs ` | +| Local worker (foreground) | stdout/stderr | +| Local worker (detached) | `workers//.log` | +| Backend | `docker logs unstract-backend` | + +--- + +## 10. Test Verification Checklist + +### Phase 1 Sanity (Executor Framework) + +- [ ] Executor worker starts and connects to Celery broker +- [ ] Health check responds: `curl http://localhost:8088/health` +- [ ] `execute_extraction` task is registered in Celery +- [ ] No-op task dispatch round-trips successfully +- [ ] Task routing: task goes to `executor` queue, processed by executor worker + +### Phase 2 Sanity (LegacyExecutor) + +- [ ] `extract` operation returns `{"extracted_text": "..."}` +- [ ] `index` operation returns `{"doc_id": "..."}` +- [ ] `answer_prompt` returns `{"output": {...}, "metadata": {...}, "metrics": {...}}` +- [ ] `single_pass_extraction` returns same shape as answer_prompt +- [ ] `summarize` returns `{"data": "..."}` +- [ ] Error cases return `ExecutionResult(success=False, error="...")` not unhandled exceptions + +### Phase 3 Sanity (Structure Tool as Celery Task) + +- [ ] Run workflow with structure tool via new Celery path +- [ ] Compare output with Docker-based structure tool output +- [ ] Non-structure tools still work via Docker/Runner (regression check) + +### Phase 4 Sanity (IDE Path) + +- [ ] Open Prompt Studio IDE, create/load a project +- [ ] Run extraction on a document — result displays correctly +- [ ] Run prompt answering — output persists in DB +- [ ] Error cases display properly in IDE + +### Phase 5 Sanity (Decommission) + +- [ ] `docker compose up` boots cleanly — no errors from missing services +- [ ] No dangling references to prompt-service, PromptTool, PROMPT_HOST, PROMPT_PORT +- [ ] All health checks pass + +### Running Unit Tests + +```bash +# SDK1 tests (execution framework) +cd /home/harini/Documents/Workspace/unstract-poc/clean/unstract/unstract/sdk1 +/home/harini/Documents/Workspace/unstract-poc/clean/unstract/backend/venv/bin/uv run pytest -v + +# Workers tests (executor, LegacyExecutor, retrievers, etc.) +cd /home/harini/Documents/Workspace/unstract-poc/clean/unstract/workers +/home/harini/Documents/Workspace/unstract-poc/clean/unstract/backend/venv/bin/uv run pytest -v +``` + +--- + +## Quick Reference: One-Liner Setup + +```bash +# From repo root: +cd docker + +# 1. Build images +docker compose -f docker-compose.build.yaml build + +# 2. Start everything with V2 workers +docker compose --profile workers-v2 up -d + +# 3. Verify +docker ps --format "table {{.Names}}\t{{.Status}}" + +# 4. Check executor health +curl -s http://localhost:8092/health # 8092 = external Docker port for executor +``` + +For the automated version, use the setup check script: `scripts/check-local-setup.sh` diff --git a/frontend/bun.lock b/frontend/bun.lock index 662c50ef8e..c71305c255 100644 --- a/frontend/bun.lock +++ b/frontend/bun.lock @@ -63,6 +63,8 @@ "@biomejs/biome": "^2.3.13", "@vitejs/plugin-react": "^4.4.0", "baseline-browser-mapping": "^2.9.19", + "happy-dom": "^20.8.8", + "jsdom": "^27.0.1", "vite": "^7.0.0", "vite-plugin-svgr": "^4.5.0", "vitest": "^3.2.0", @@ -70,6 +72,8 @@ }, }, "packages": { + "@acemir/cssom": ["@acemir/cssom@0.9.31", "", {}, "sha512-ZnR3GSaH+/vJ0YlHau21FjfLYjMpYVIzTD8M8vIEQvIGxeOXyXdzCI140rrCY862p/C/BbzWsjc1dgnM9mkoTA=="], + "@adobe/css-tools": ["@adobe/css-tools@4.4.4", "", {}, "sha512-Elp+iwUx5rN5+Y8xLt5/GRoG20WGoDCQ/1Fb+1LiGtvwbDavuSk0jhD/eZdckHAuzcDzccnkv+rEjyWfRx18gg=="], "@ant-design/colors": ["@ant-design/colors@7.2.1", "", { "dependencies": { "@ant-design/fast-color": "^2.0.6" } }, "sha512-lCHDcEzieu4GA3n8ELeZ5VQ8pKQAWcGGLRTQ50aQM2iqPpq2evTxER84jfdPvsPAtEcZ7m44NI45edFMo8oOYQ=="], @@ -86,6 +90,12 @@ "@ant-design/react-slick": ["@ant-design/react-slick@1.1.2", "", { "dependencies": { "@babel/runtime": "^7.10.4", "classnames": "^2.2.5", "json2mq": "^0.2.0", "resize-observer-polyfill": "^1.5.1", "throttle-debounce": "^5.0.0" }, "peerDependencies": { "react": ">=16.9.0" } }, "sha512-EzlvzE6xQUBrZuuhSAFTdsr4P2bBBHGZwKFemEfq8gIGyIQCxalYfZW/T2ORbtQx5rU69o+WycP3exY/7T1hGA=="], + "@asamuzakjp/css-color": ["@asamuzakjp/css-color@4.1.2", "", { "dependencies": { "@csstools/css-calc": "^3.0.0", "@csstools/css-color-parser": "^4.0.1", "@csstools/css-parser-algorithms": "^4.0.0", "@csstools/css-tokenizer": "^4.0.0", "lru-cache": "^11.2.5" } }, "sha512-NfBUvBaYgKIuq6E/RBLY1m0IohzNHAYyaJGuTK79Z23uNwmz2jl1mPsC5ZxCCxylinKhT1Amn5oNTlx1wN8cQg=="], + + "@asamuzakjp/dom-selector": ["@asamuzakjp/dom-selector@6.8.1", "", { "dependencies": { "@asamuzakjp/nwsapi": "^2.3.9", "bidi-js": "^1.0.3", "css-tree": "^3.1.0", "is-potential-custom-element-name": "^1.0.1", "lru-cache": "^11.2.6" } }, "sha512-MvRz1nCqW0fsy8Qz4dnLIvhOlMzqDVBabZx6lH+YywFDdjXhMY37SmpV1XFX3JzG5GWHn63j6HX6QPr3lZXHvQ=="], + + "@asamuzakjp/nwsapi": ["@asamuzakjp/nwsapi@2.3.9", "", {}, "sha512-n8GuYSrI9bF7FFZ/SjhwevlHc8xaVlb/7HmHelnc/PZXBD2ZR49NnN9sMMuDdEGPeeRQ5d0hqlSlEpgCX3Wl0Q=="], + "@babel/code-frame": ["@babel/code-frame@7.28.6", "", { "dependencies": { "@babel/helper-validator-identifier": "^7.28.5", "js-tokens": "^4.0.0", "picocolors": "^1.1.1" } }, "sha512-JYgintcMjRiCvS8mMECzaEn+m3PfoQiyqukOMCCVQtoJGYJw8j/8LBJEiqkHLkfwCcs74E3pbAUFNg7d9VNJ+Q=="], "@babel/compat-data": ["@babel/compat-data@7.28.6", "", {}, "sha512-2lfu57JtzctfIrcGMz992hyLlByuzgIk58+hhGCxjKZ3rWI82NnVLjXcaTqkI2NvlcvOskZaiZ5kjUALo3Lpxg=="], @@ -144,6 +154,18 @@ "@biomejs/cli-win32-x64": ["@biomejs/cli-win32-x64@2.3.13", "", { "os": "win32", "cpu": "x64" }, "sha512-trDw2ogdM2lyav9WFQsdsfdVy1dvZALymRpgmWsvSez0BJzBjulhOT/t+wyKeh3pZWvwP3VMs1SoOKwO3wecMQ=="], + "@csstools/color-helpers": ["@csstools/color-helpers@6.0.2", "", {}, "sha512-LMGQLS9EuADloEFkcTBR3BwV/CGHV7zyDxVRtVDTwdI2Ca4it0CCVTT9wCkxSgokjE5Ho41hEPgb8OEUwoXr6Q=="], + + "@csstools/css-calc": ["@csstools/css-calc@3.1.1", "", { "peerDependencies": { "@csstools/css-parser-algorithms": "^4.0.0", "@csstools/css-tokenizer": "^4.0.0" } }, "sha512-HJ26Z/vmsZQqs/o3a6bgKslXGFAungXGbinULZO3eMsOyNJHeBBZfup5FiZInOghgoM4Hwnmw+OgbJCNg1wwUQ=="], + + "@csstools/css-color-parser": ["@csstools/css-color-parser@4.0.2", "", { "dependencies": { "@csstools/color-helpers": "^6.0.2", "@csstools/css-calc": "^3.1.1" }, "peerDependencies": { "@csstools/css-parser-algorithms": "^4.0.0", "@csstools/css-tokenizer": "^4.0.0" } }, "sha512-0GEfbBLmTFf0dJlpsNU7zwxRIH0/BGEMuXLTCvFYxuL1tNhqzTbtnFICyJLTNK4a+RechKP75e7w42ClXSnJQw=="], + + "@csstools/css-parser-algorithms": ["@csstools/css-parser-algorithms@4.0.0", "", { "peerDependencies": { "@csstools/css-tokenizer": "^4.0.0" } }, "sha512-+B87qS7fIG3L5h3qwJ/IFbjoVoOe/bpOdh9hAjXbvx0o8ImEmUsGXN0inFOnk2ChCFgqkkGFQ+TpM5rbhkKe4w=="], + + "@csstools/css-syntax-patches-for-csstree": ["@csstools/css-syntax-patches-for-csstree@1.1.2", "", { "peerDependencies": { "css-tree": "^3.2.1" }, "optionalPeers": ["css-tree"] }, "sha512-5GkLzz4prTIpoyeUiIu3iV6CSG3Plo7xRVOFPKI7FVEJ3mZ0A8SwK0XU3Gl7xAkiQ+mDyam+NNp875/C5y+jSA=="], + + "@csstools/css-tokenizer": ["@csstools/css-tokenizer@4.0.0", "", {}, "sha512-QxULHAm7cNu72w97JUNCBFODFaXpbDg+dP8b/oWFAZ2MTRppA3U00Y2L1HqaS4J6yBqxwa/Y3nMBaxVKbB/NsA=="], + "@emotion/babel-plugin": ["@emotion/babel-plugin@11.13.5", "", { "dependencies": { "@babel/helper-module-imports": "^7.16.7", "@babel/runtime": "^7.18.3", "@emotion/hash": "^0.9.2", "@emotion/memoize": "^0.9.0", "@emotion/serialize": "^1.3.3", "babel-plugin-macros": "^3.1.0", "convert-source-map": "^1.5.0", "escape-string-regexp": "^4.0.0", "find-root": "^1.1.0", "source-map": "^0.5.7", "stylis": "4.2.0" } }, "sha512-pxHCpT2ex+0q+HH91/zsdHkw/lXd468DIN2zvfvLtPKLLMo6gQj7oLObq8PhkrxOZb/gGCq03S3Z7PDhS8pduQ=="], "@emotion/cache": ["@emotion/cache@11.14.0", "", { "dependencies": { "@emotion/memoize": "^0.9.0", "@emotion/sheet": "^1.4.0", "@emotion/utils": "^1.4.2", "@emotion/weak-memoize": "^0.4.0", "stylis": "4.2.0" } }, "sha512-L/B1lc/TViYk4DcpGxtAVbx0ZyiKM5ktoIyafGkH6zg/tj+mA+NE//aPYKG0k8kCHSHVJrpLpcAlOBEXQ3SavA=="], @@ -216,6 +238,8 @@ "@esbuild/win32-x64": ["@esbuild/win32-x64@0.27.2", "", { "os": "win32", "cpu": "x64" }, "sha512-sRdU18mcKf7F+YgheI/zGf5alZatMUTKj/jNS6l744f9u3WFu4v7twcUI9vu4mknF4Y9aDlblIie0IM+5xxaqQ=="], + "@exodus/bytes": ["@exodus/bytes@1.15.0", "", { "peerDependencies": { "@noble/hashes": "^1.8.0 || ^2.0.0" }, "optionalPeers": ["@noble/hashes"] }, "sha512-UY0nlA+feH81UGSHv92sLEPLCeZFjXOuHhrIo0HQydScuQc8s0A7kL/UdgwgDq8g8ilksmuoF35YVTNphV2aBQ=="], + "@jest/diff-sequences": ["@jest/diff-sequences@30.0.1", "", {}, "sha512-n5H8QLDJ47QqbCNn5SuFjCRDrOLEZ0h8vAHCK5RL9Ls7Xa8AQLa/YxAc9UjFqoEDM48muwtBGjtMY5cr0PLDCw=="], "@jest/expect-utils": ["@jest/expect-utils@30.2.0", "", { "dependencies": { "@jest/get-type": "30.1.0" } }, "sha512-1JnRfhqpD8HGpOmQp180Fo9Zt69zNtC+9lR+kT7NVL05tNXIi+QC8Csz7lfidMoVLPD3FnOtcmp0CEFnxExGEA=="], @@ -540,6 +564,10 @@ "@types/use-sync-external-store": ["@types/use-sync-external-store@0.0.6", "", {}, "sha512-zFDAD+tlpf2r4asuHEj0XH6pY6i0g5NeAHPn+15wk3BV6JA69eERFXC1gyGThDkVa1zCyKr5jox1+2LbV/AMLg=="], + "@types/whatwg-mimetype": ["@types/whatwg-mimetype@3.0.2", "", {}, "sha512-c2AKvDT8ToxLIOUlN51gTiHXflsfIFisS4pO7pDPoKouJCESkhZnEy623gwP9laCy5lnLDAw1vAzu2vM2YLOrA=="], + + "@types/ws": ["@types/ws@8.18.1", "", { "dependencies": { "@types/node": "*" } }, "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg=="], + "@types/yargs": ["@types/yargs@17.0.35", "", { "dependencies": { "@types/yargs-parser": "*" } }, "sha512-qUHkeCyQFxMXg79wQfTtfndEC+N9ZZg76HJftDJp+qH2tV7Gj4OJi7l+PiWwJ+pWtW8GwSmqsDj/oymhrTWXjg=="], "@types/yargs-parser": ["@types/yargs-parser@21.0.3", "", {}, "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ=="], @@ -562,7 +590,7 @@ "abbrev": ["abbrev@1.1.1", "", {}, "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q=="], - "agent-base": ["agent-base@6.0.2", "", { "dependencies": { "debug": "4" } }, "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ=="], + "agent-base": ["agent-base@7.1.4", "", {}, "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ=="], "ajv": ["ajv@8.17.1", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g=="], @@ -600,6 +628,8 @@ "baseline-browser-mapping": ["baseline-browser-mapping@2.9.19", "", { "bin": { "baseline-browser-mapping": "dist/cli.js" } }, "sha512-ipDqC8FrAl/76p2SSWKSI+H9tFwm7vYqXQrItCuiVPt26Km0jS+NzSsBWAaBusvSbQcfJG+JitdMm+wZAgTYqg=="], + "bidi-js": ["bidi-js@1.0.3", "", { "dependencies": { "require-from-string": "^2.0.2" } }, "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw=="], + "brace-expansion": ["brace-expansion@1.1.12", "", { "dependencies": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" } }, "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg=="], "braces": ["braces@3.0.3", "", { "dependencies": { "fill-range": "^7.1.1" } }, "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA=="], @@ -680,8 +710,12 @@ "cross-spawn": ["cross-spawn@7.0.6", "", { "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", "which": "^2.0.1" } }, "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA=="], + "css-tree": ["css-tree@3.2.1", "", { "dependencies": { "mdn-data": "2.27.1", "source-map-js": "^1.2.1" } }, "sha512-X7sjQzceUhu1u7Y/ylrRZFU2FS6LRiFVp6rKLPg23y3x3c3DOKAwuXGDp+PAGjh6CSnCjYeAul8pcT8bAl+lSA=="], + "css.escape": ["css.escape@1.5.1", "", {}, "sha512-YUifsXXuknHlUsmlgyY0PKzgPOr7/FjCePfHNt0jxm83wHZi44VDMQ7/fGNkjY3/jV1MC+1CmZbaHzugyeRtpg=="], + "cssstyle": ["cssstyle@5.3.7", "", { "dependencies": { "@asamuzakjp/css-color": "^4.1.1", "@csstools/css-syntax-patches-for-csstree": "^1.0.21", "css-tree": "^3.1.0", "lru-cache": "^11.2.4" } }, "sha512-7D2EPVltRrsTkhpQmksIu+LxeWAIEk6wRDMJ1qljlv+CKHJM+cJLlfhWIzNA44eAsHXSNe3+vO6DW1yCYx8SuQ=="], + "csstype": ["csstype@3.2.3", "", {}, "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ=="], "d3-array": ["d3-array@3.2.4", "", { "dependencies": { "internmap": "1 - 2" } }, "sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg=="], @@ -706,12 +740,16 @@ "d3-timer": ["d3-timer@3.0.1", "", {}, "sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA=="], + "data-urls": ["data-urls@6.0.1", "", { "dependencies": { "whatwg-mimetype": "^5.0.0", "whatwg-url": "^15.1.0" } }, "sha512-euIQENZg6x8mj3fO6o9+fOW8MimUI4PpD/fZBhJfeioZVy9TUpM4UY7KjQNVZFlqwJ0UdzRDzkycB997HEq1BQ=="], + "date-fns": ["date-fns@4.1.0", "", {}, "sha512-Ukq0owbQXxa/U3EGtsdVBkR1w7KOQ5gIBqdH2hkvknzZPYvBxb/aa6E8L7tmjFtkwZBu3UXBbjIgPo/Ez4xaNg=="], "dayjs": ["dayjs@1.11.19", "", {}, "sha512-t5EcLVS6QPBNqM2z8fakk/NKel+Xzshgt8FFKAn+qwlD1pzZWxh0nVCrvFK7ZDb6XucZeF9z8C7CBWTRIVApAw=="], "debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="], + "decimal.js": ["decimal.js@10.6.0", "", {}, "sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg=="], + "decimal.js-light": ["decimal.js-light@2.5.1", "", {}, "sha512-qIMFpTMZmny+MMIitAB6D7iVPEorVw6YQRWkvarTkT4tBeSLLiHzcwj6q0MmYSFCiVpiqPJTJEYIrpcPzVEIvg=="], "decode-named-character-reference": ["decode-named-character-reference@1.3.0", "", { "dependencies": { "character-entities": "^2.0.0" } }, "sha512-GtpQYB283KrPp6nRw50q3U9/VfOutZOe103qlN7BPP6Ad27xYnOIWv4lPzo8HCAL+mMZofJ9KEy30fq6MfaK6Q=="], @@ -762,7 +800,7 @@ "engine.io-parser": ["engine.io-parser@5.2.3", "", {}, "sha512-HqD3yTBfnBxIrbnM1DoD6Pcq8NECnh8d4As1Qgh0z5Gg3jRRIqijury0CL3ghu/edArpUYiYqQiDUQBIs4np3Q=="], - "entities": ["entities@4.5.0", "", {}, "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw=="], + "entities": ["entities@7.0.1", "", {}, "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA=="], "error-ex": ["error-ex@1.3.4", "", { "dependencies": { "is-arrayish": "^0.2.1" } }, "sha512-sqQamAnR14VgCr1A618A3sGrygcpK+HEbenA/HiEAkkUwcZIIB/tgWqHFxWgOyDh4nB4JCRimh79dR5Ywc9MDQ=="], @@ -850,6 +888,8 @@ "handlebars": ["handlebars@4.7.8", "", { "dependencies": { "minimist": "^1.2.5", "neo-async": "^2.6.2", "source-map": "^0.6.1", "wordwrap": "^1.0.0" }, "optionalDependencies": { "uglify-js": "^3.1.4" }, "bin": { "handlebars": "bin/handlebars" } }, "sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ=="], + "happy-dom": ["happy-dom@20.8.9", "", { "dependencies": { "@types/node": ">=20.0.0", "@types/whatwg-mimetype": "^3.0.2", "@types/ws": "^8.18.1", "entities": "^7.0.1", "whatwg-mimetype": "^3.0.0", "ws": "^8.18.3" } }, "sha512-Tz23LR9T9jOGVZm2x1EPdXqwA37G/owYMxRwU0E4miurAtFsPMQ1d2Jc2okUaSjZqAFz2oEn3FLXC5a0a+siyA=="], + "has-bigints": ["has-bigints@1.1.0", "", {}, "sha512-R3pbpkcIqv2Pm3dUwgjclDRVmWpTJW2DcMzcIhEXEx1oh/CEMObMm3KLmRJOdvhM7o4uQBnwr8pzRK2sJWIqfg=="], "has-flag": ["has-flag@4.0.0", "", {}, "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ=="], @@ -876,11 +916,15 @@ "hoist-non-react-statics": ["hoist-non-react-statics@3.3.2", "", { "dependencies": { "react-is": "^16.7.0" } }, "sha512-/gGivxi8JPKWNm/W0jSmzcMPpfpPLc3dY/6GxhX2hQ9iGj3aDfklV4ET7NjKpSinLpJ5vafa9iiGIEZg10SfBw=="], + "html-encoding-sniffer": ["html-encoding-sniffer@6.0.0", "", { "dependencies": { "@exodus/bytes": "^1.6.0" } }, "sha512-CV9TW3Y3f8/wT0BRFc1/KAVQ3TUHiXmaAb6VW9vtiMFf7SLoMd1PdAc4W3KFOFETBJUb90KatHqlsZMWV+R9Gg=="], + "http-proxy": ["http-proxy@1.18.1", "", { "dependencies": { "eventemitter3": "^4.0.0", "follow-redirects": "^1.0.0", "requires-port": "^1.0.0" } }, "sha512-7mz/721AbnJwIVbnaSv1Cz3Am0ZLT/UBwkC92VlxhXv/k/BBQfM2fXElQNC27BVGr0uwUpplYPQM9LnaBMR5NQ=="], + "http-proxy-agent": ["http-proxy-agent@7.0.2", "", { "dependencies": { "agent-base": "^7.1.0", "debug": "^4.3.4" } }, "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig=="], + "http-proxy-middleware": ["http-proxy-middleware@2.0.9", "", { "dependencies": { "@types/http-proxy": "^1.17.8", "http-proxy": "^1.18.1", "is-glob": "^4.0.1", "is-plain-obj": "^3.0.0", "micromatch": "^4.0.2" }, "peerDependencies": { "@types/express": "^4.17.13" }, "optionalPeers": ["@types/express"] }, "sha512-c1IyJYLYppU574+YI7R4QyX2ystMtVXZwIdzazUIPIJsHuWNd+mho2j+bKoHftndicGj9yh+xjd+l0yj7VeT1Q=="], - "https-proxy-agent": ["https-proxy-agent@5.0.1", "", { "dependencies": { "agent-base": "6", "debug": "4" } }, "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA=="], + "https-proxy-agent": ["https-proxy-agent@7.0.6", "", { "dependencies": { "agent-base": "^7.1.2", "debug": "4" } }, "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw=="], "i18next": ["i18next@23.16.8", "", { "dependencies": { "@babel/runtime": "^7.23.2" } }, "sha512-06r/TitrM88Mg5FdUXAKL96dJMzgqLE5dv3ryBAra4KCwD9mJ4ndOTS95ZuymIGoE+2hzfdaMak2X11/es7ZWg=="], @@ -944,6 +988,8 @@ "is-plain-obj": ["is-plain-obj@3.0.0", "", {}, "sha512-gwsOE28k+23GP1B6vFl1oVh/WOzmawBrKwo5Ev6wMKzPkaXaCDIQKzLnvsA42DRlbVTWorkgTKIviAKCWkfUwA=="], + "is-potential-custom-element-name": ["is-potential-custom-element-name@1.0.1", "", {}, "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ=="], + "is-regex": ["is-regex@1.2.1", "", { "dependencies": { "call-bound": "^1.0.2", "gopd": "^1.2.0", "has-tostringtag": "^1.0.2", "hasown": "^2.0.2" } }, "sha512-MjYsKHO5O7mCsmRGxWcLWheFqN9DJ/2TmngvjKXihe6efViPqc274+Fx/4fYj/r03+ESvBdTXK0V6tA3rgez1g=="], "is-set": ["is-set@2.0.3", "", {}, "sha512-iPAjerrse27/ygGLxw+EBR9agv9Y6uLeYVJMu+QNCoouJ1/1ri0mGrcWpfCqFZuzzx3WjtwxG098X+n4OuRkPg=="], @@ -980,6 +1026,8 @@ "js-yaml": ["js-yaml@4.1.1", "", { "dependencies": { "argparse": "^2.0.1" }, "bin": { "js-yaml": "bin/js-yaml.js" } }, "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA=="], + "jsdom": ["jsdom@27.4.0", "", { "dependencies": { "@acemir/cssom": "^0.9.28", "@asamuzakjp/dom-selector": "^6.7.6", "@exodus/bytes": "^1.6.0", "cssstyle": "^5.3.4", "data-urls": "^6.0.0", "decimal.js": "^10.6.0", "html-encoding-sniffer": "^6.0.0", "http-proxy-agent": "^7.0.2", "https-proxy-agent": "^7.0.6", "is-potential-custom-element-name": "^1.0.1", "parse5": "^8.0.0", "saxes": "^6.0.0", "symbol-tree": "^3.2.4", "tough-cookie": "^6.0.0", "w3c-xmlserializer": "^5.0.0", "webidl-conversions": "^8.0.0", "whatwg-mimetype": "^4.0.0", "whatwg-url": "^15.1.0", "ws": "^8.18.3", "xml-name-validator": "^5.0.0" }, "peerDependencies": { "canvas": "^3.0.0" }, "optionalPeers": ["canvas"] }, "sha512-mjzqwWRD9Y1J1KUi7W97Gja1bwOOM5Ug0EZ6UDK3xS7j7mndrkwozHtSblfomlzyB4NepioNt+B2sOSzczVgtQ=="], + "jsesc": ["jsesc@3.1.0", "", { "bin": { "jsesc": "bin/jsesc" } }, "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA=="], "json-2-csv": ["json-2-csv@5.5.10", "", { "dependencies": { "deeks": "3.1.0", "doc-path": "4.1.1" } }, "sha512-Dep8wO3Fr5wNjQevO2Z8Y7yeee/nYSGRsi7q6zJDKEVHxXkXT+v21vxHmDX923UzmCXXkSo62HaTz6eTWzFLaw=="], @@ -1020,7 +1068,7 @@ "lowlight": ["lowlight@1.20.0", "", { "dependencies": { "fault": "^1.0.0", "highlight.js": "~10.7.0" } }, "sha512-8Ktj+prEb1RoCPkEOrPMYUN/nCggB7qAWe3a7OpMjWQkh3l2RD5wKRQ+o8Q8YuI9RG/xs95waaI/E6ym/7NsTw=="], - "lru-cache": ["lru-cache@5.1.1", "", { "dependencies": { "yallist": "^3.0.2" } }, "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w=="], + "lru-cache": ["lru-cache@11.2.7", "", {}, "sha512-aY/R+aEsRelme17KGQa/1ZSIpLpNYYrhcrepKTZgE+W3WM16YMCaPwOHLHsmopZHELU0Ojin1lPVxKR0MihncA=="], "lz-string": ["lz-string@1.5.0", "", { "bin": { "lz-string": "bin/bin.js" } }, "sha512-h5bgJWpxJNswbU7qCrV0tIKQCaS3blPDrqKWx+QxzuzL1zGUzij9XCWLrSLsJPu5t+eWA/ycetzYAO5IOMcWAQ=="], @@ -1062,6 +1110,8 @@ "mdast-util-to-string": ["mdast-util-to-string@3.2.0", "", { "dependencies": { "@types/mdast": "^3.0.0" } }, "sha512-V4Zn/ncyN1QNSqSBxTrMOLpjr+IKdHl2v3KVLoWmDPscP4r9GcCi71gjgvUV1SFSKh92AjAG4peFuBl2/YgCJg=="], + "mdn-data": ["mdn-data@2.27.1", "", {}, "sha512-9Yubnt3e8A0OKwxYSXyhLymGW4sCufcLG6VdiDdUGVkPhpqLxlvP5vl1983gQjJl3tqbrM731mjaZaP68AgosQ=="], + "memoize-one": ["memoize-one@6.0.0", "", {}, "sha512-rkpe71W0N0c0Xz6QD0eJETuWAJGnJ9afsl1srmwPrI+yBCkge5EycXXbYRyvL29zZVUWQCY7InPRCv3GDXuZNw=="], "micromark": ["micromark@3.2.0", "", { "dependencies": { "@types/debug": "^4.0.0", "debug": "^4.0.0", "decode-named-character-reference": "^1.0.0", "micromark-core-commonmark": "^1.0.1", "micromark-factory-space": "^1.0.0", "micromark-util-character": "^1.0.0", "micromark-util-chunked": "^1.0.0", "micromark-util-combine-extensions": "^1.0.0", "micromark-util-decode-numeric-character-reference": "^1.0.0", "micromark-util-encode": "^1.0.0", "micromark-util-normalize-identifier": "^1.0.0", "micromark-util-resolve-all": "^1.0.0", "micromark-util-sanitize-uri": "^1.0.0", "micromark-util-subtokenize": "^1.0.0", "micromark-util-symbol": "^1.0.0", "micromark-util-types": "^1.0.1", "uvu": "^0.5.0" } }, "sha512-uD66tJj54JLYq0De10AhWycZWGQNUvDI55xPgk2sQM5kn1JYlhbCMTtEeT27+vAhW2FBQxLlOmS3pmA7/2z4aA=="], @@ -1188,6 +1238,8 @@ "parse-json": ["parse-json@5.2.0", "", { "dependencies": { "@babel/code-frame": "^7.0.0", "error-ex": "^1.3.1", "json-parse-even-better-errors": "^2.3.0", "lines-and-columns": "^1.1.6" } }, "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg=="], + "parse5": ["parse5@8.0.0", "", { "dependencies": { "entities": "^6.0.0" } }, "sha512-9m4m5GSgXjL4AjumKzq1Fgfp3Z8rsvjRNbnkVwfu2ImRqE5D0LnY2QfDen18FSY9C573YU5XxSapdHZTZ2WolA=="], + "path-is-absolute": ["path-is-absolute@1.0.1", "", {}, "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg=="], "path-key": ["path-key@3.1.1", "", {}, "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="], @@ -1232,6 +1284,8 @@ "proxy-from-env": ["proxy-from-env@1.1.0", "", {}, "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="], + "punycode": ["punycode@2.3.1", "", {}, "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg=="], + "query-selector-shadow-dom": ["query-selector-shadow-dom@1.0.1", "", {}, "sha512-lT5yCqEBgfoMYpf3F2xQRK7zEr1rhIIZuceDK6+xRkJQ4NMbHTwXqk4NkwDwQMNqXgG9r9fyHnzwNVs6zV5KRw=="], "rc-align": ["rc-align@4.0.15", "", { "dependencies": { "@babel/runtime": "^7.10.1", "classnames": "2.x", "dom-align": "^1.7.0", "rc-util": "^5.26.0", "resize-observer-polyfill": "^1.5.1" }, "peerDependencies": { "react": ">=16.9.0", "react-dom": ">=16.9.0" } }, "sha512-wqJtVH60pka/nOX7/IspElA8gjPNQKIx/ZqJ6heATCkXpe1Zg4cPVrMD2vC96wjsFFL8WsmhPbx9tdMo1qqlIA=="], @@ -1388,6 +1442,8 @@ "safe-regex-test": ["safe-regex-test@1.1.0", "", { "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", "is-regex": "^1.2.1" } }, "sha512-x/+Cz4YrimQxQccJf5mKEbIa1NzeCRNI5Ecl/ekmlYaampdNLPalVyIcCZNNH3MvmqBugV5TMYZXv0ljslUlaw=="], + "saxes": ["saxes@6.0.0", "", { "dependencies": { "xmlchars": "^2.2.0" } }, "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA=="], + "scheduler": ["scheduler@0.23.2", "", { "dependencies": { "loose-envify": "^1.1.0" } }, "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ=="], "scroll-into-view-if-needed": ["scroll-into-view-if-needed@3.1.0", "", { "dependencies": { "compute-scroll-into-view": "^3.0.2" } }, "sha512-49oNpRjWRvnU8NyGVmUaYG4jtTkNonFZI86MmGRDqBphEK2EXT9gdEUoQPZhuBM8yWHxCWbobltqYO5M4XrUvQ=="], @@ -1472,6 +1528,8 @@ "svg-parser": ["svg-parser@2.0.4", "", {}, "sha512-e4hG1hRwoOdRb37cIMSgzNsxyzKfayW6VOflrwvR+/bzrkyxY/31WkbgnQpgtrNp1SdpJvpUAGTa/ZoiPNDuRQ=="], + "symbol-tree": ["symbol-tree@3.2.4", "", {}, "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw=="], + "tar": ["tar@6.2.1", "", { "dependencies": { "chownr": "^2.0.0", "fs-minipass": "^2.0.0", "minipass": "^5.0.0", "minizlib": "^2.1.1", "mkdirp": "^1.0.3", "yallist": "^4.0.0" } }, "sha512-DZ4yORTwrbTj/7MZYq2w+/ZFdI6OZ/f9SFHR+71gIVUZhOQPHzVCLpvRnPgyaMpfWxxk/4ONva3GQSyNIKRv6A=="], "throttle-debounce": ["throttle-debounce@5.0.2", "", {}, "sha512-B71/4oyj61iNH0KeCamLuE2rmKuTO5byTOSVwECM5FA7TiAiAW+UqTKZ9ERueC4qvgSttUhdmq1mXC3kJqGX7A=="], @@ -1490,11 +1548,17 @@ "tinyspy": ["tinyspy@4.0.4", "", {}, "sha512-azl+t0z7pw/z958Gy9svOTuzqIk6xq+NSheJzn5MMWtWTFywIacg2wUlzKFGtt3cthx0r2SxMK0yzJOR0IES7Q=="], + "tldts": ["tldts@7.0.27", "", { "dependencies": { "tldts-core": "^7.0.27" }, "bin": { "tldts": "bin/cli.js" } }, "sha512-I4FZcVFcqCRuT0ph6dCDpPuO4Xgzvh+spkcTr1gK7peIvxWauoloVO0vuy1FQnijT63ss6AsHB6+OIM4aXHbPg=="], + + "tldts-core": ["tldts-core@7.0.27", "", {}, "sha512-YQ7uPjgWUibIK6DW5lrKujGwUKhLevU4hcGbP5O6TcIUb+oTjJYJVWPS4nZsIHrEEEG6myk/oqAJUEQmpZrHsg=="], + "to-regex-range": ["to-regex-range@5.0.1", "", { "dependencies": { "is-number": "^7.0.0" } }, "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ=="], "toggle-selection": ["toggle-selection@1.0.6", "", {}, "sha512-BiZS+C1OS8g/q2RRbJmy59xpyghNBqrr6k5L/uKBGRsTfxmu3ffiRnd8mlGPUVayg8pvfi5urfnu8TU7DVOkLQ=="], - "tr46": ["tr46@0.0.3", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="], + "tough-cookie": ["tough-cookie@6.0.1", "", { "dependencies": { "tldts": "^7.0.5" } }, "sha512-LktZQb3IeoUWB9lqR5EWTHgW/VTITCXg4D21M+lvybRVdylLrRMnqaIONLVb5mav8vM19m44HIcGq4qASeu2Qw=="], + + "tr46": ["tr46@6.0.0", "", { "dependencies": { "punycode": "^2.3.1" } }, "sha512-bLVMLPtstlZ4iMQHpFHTR7GAGj2jxi8Dg0s2h2MafAE4uSWF98FC/3MomU51iQAMf8/qDUbKWf5GxuvvVcXEhw=="], "trim-lines": ["trim-lines@3.0.1", "", {}, "sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg=="], @@ -1554,11 +1618,15 @@ "vitest": ["vitest@3.2.4", "", { "dependencies": { "@types/chai": "^5.2.2", "@vitest/expect": "3.2.4", "@vitest/mocker": "3.2.4", "@vitest/pretty-format": "^3.2.4", "@vitest/runner": "3.2.4", "@vitest/snapshot": "3.2.4", "@vitest/spy": "3.2.4", "@vitest/utils": "3.2.4", "chai": "^5.2.0", "debug": "^4.4.1", "expect-type": "^1.2.1", "magic-string": "^0.30.17", "pathe": "^2.0.3", "picomatch": "^4.0.2", "std-env": "^3.9.0", "tinybench": "^2.9.0", "tinyexec": "^0.3.2", "tinyglobby": "^0.2.14", "tinypool": "^1.1.1", "tinyrainbow": "^2.0.0", "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0", "vite-node": "3.2.4", "why-is-node-running": "^2.3.0" }, "peerDependencies": { "@edge-runtime/vm": "*", "@types/debug": "^4.1.12", "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", "@vitest/browser": "3.2.4", "@vitest/ui": "3.2.4", "happy-dom": "*", "jsdom": "*" }, "optionalPeers": ["@edge-runtime/vm", "@types/debug", "@types/node", "@vitest/browser", "@vitest/ui", "happy-dom", "jsdom"], "bin": { "vitest": "vitest.mjs" } }, "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A=="], + "w3c-xmlserializer": ["w3c-xmlserializer@5.0.0", "", { "dependencies": { "xml-name-validator": "^5.0.0" } }, "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA=="], + "web-vitals": ["web-vitals@5.1.0", "", {}, "sha512-ArI3kx5jI0atlTtmV0fWU3fjpLmq/nD3Zr1iFFlJLaqa5wLBkUSzINwBPySCX/8jRyjlmy1Volw1kz1g9XE4Jg=="], - "webidl-conversions": ["webidl-conversions@3.0.1", "", {}, "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="], + "webidl-conversions": ["webidl-conversions@8.0.1", "", {}, "sha512-BMhLD/Sw+GbJC21C/UgyaZX41nPt8bUTg+jWyDeg7e7YN4xOM05YPSIXceACnXVtqyEw/LMClUQMtMZ+PGGpqQ=="], - "whatwg-url": ["whatwg-url@5.0.0", "", { "dependencies": { "tr46": "~0.0.3", "webidl-conversions": "^3.0.0" } }, "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw=="], + "whatwg-mimetype": ["whatwg-mimetype@3.0.0", "", {}, "sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q=="], + + "whatwg-url": ["whatwg-url@15.1.0", "", { "dependencies": { "tr46": "^6.0.0", "webidl-conversions": "^8.0.0" } }, "sha512-2ytDk0kiEj/yu90JOAp44PVPUkO9+jVhyf+SybKlRHSDlvOOZhdPIrr7xTH64l4WixO2cP+wQIcgujkGBPPz6g=="], "which": ["which@2.0.2", "", { "dependencies": { "isexe": "^2.0.0" }, "bin": { "node-which": "./bin/node-which" } }, "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA=="], @@ -1578,6 +1646,10 @@ "ws": ["ws@8.18.3", "", { "peerDependencies": { "bufferutil": "^4.0.1", "utf-8-validate": ">=5.0.2" }, "optionalPeers": ["bufferutil", "utf-8-validate"] }, "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg=="], + "xml-name-validator": ["xml-name-validator@5.0.0", "", {}, "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg=="], + + "xmlchars": ["xmlchars@2.2.0", "", {}, "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw=="], + "xmlhttprequest-ssl": ["xmlhttprequest-ssl@2.1.2", "", {}, "sha512-TEU+nJVUUnA4CYJFLvK5X9AOeH4KvDvhIfm0vV1GaQRtchnG0hgK5p8hw/xjv8cunWYCsiPCSDzObPyhEwq3KQ=="], "xtend": ["xtend@4.0.2", "", {}, "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ=="], @@ -1590,6 +1662,8 @@ "zwitch": ["zwitch@2.0.4", "", {}, "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A=="], + "@babel/helper-compilation-targets/lru-cache": ["lru-cache@5.1.1", "", { "dependencies": { "yallist": "^3.0.2" } }, "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w=="], + "@emotion/babel-plugin/@emotion/hash": ["@emotion/hash@0.9.2", "", {}, "sha512-MyqliTZGuOm3+5ZRSaaBGP3USLw6+EGykkwZns2EPC5g8jJ4z9OrdZY9apkl3+UP9+sdz76YYkwCKP5gh8iY3g=="], "@emotion/babel-plugin/convert-source-map": ["convert-source-map@1.9.0", "", {}, "sha512-ASFBup0Mz1uyiIjANan1jzLQami9z1PoYSZCiiYW2FczPbenXc45FZdBZLzOT+r6+iciuEModtmCti+hjaAk0A=="], @@ -1606,6 +1680,8 @@ "@jest/types/chalk": ["chalk@4.1.2", "", { "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" } }, "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA=="], + "@mapbox/node-pre-gyp/https-proxy-agent": ["https-proxy-agent@5.0.1", "", { "dependencies": { "agent-base": "6", "debug": "4" } }, "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA=="], + "@mapbox/node-pre-gyp/semver": ["semver@7.7.3", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q=="], "@opentelemetry/otlp-transformer/@opentelemetry/resources": ["@opentelemetry/resources@2.2.0", "", { "dependencies": { "@opentelemetry/core": "2.2.0", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-1pNQf/JazQTMA0BiO5NINUzH0cbLbbl7mntLa4aJNmCCXSj0q03T5ZXXL0zw4G55TjdL9Tz32cznGClf+8zr5A=="], @@ -1626,6 +1702,8 @@ "@rjsf/antd/rc-picker": ["rc-picker@2.7.6", "", { "dependencies": { "@babel/runtime": "^7.10.1", "classnames": "^2.2.1", "date-fns": "2.x", "dayjs": "1.x", "moment": "^2.24.0", "rc-trigger": "^5.0.4", "rc-util": "^5.37.0", "shallowequal": "^1.1.0" }, "peerDependencies": { "react": ">=16.9.0", "react-dom": ">=16.9.0" } }, "sha512-H9if/BUJUZBOhPfWcPeT15JUI3/ntrG9muzERrXDkSoWmDj4yzmBvumozpxYrHwjcKnjyDGAke68d+whWwvhHA=="], + "@svgr/hast-util-to-babel-ast/entities": ["entities@4.5.0", "", {}, "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw=="], + "@testing-library/dom/aria-query": ["aria-query@5.1.3", "", { "dependencies": { "deep-equal": "^2.0.5" } }, "sha512-R5iJ5lkuHybztUfuOAznmboyjWq8O6sqNqtK7CLOqdydi54VNbORp49mb14KbWgG1QD3JFO9hJdZ+y4KutfdOQ=="], "@testing-library/dom/chalk": ["chalk@4.1.2", "", { "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" } }, "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA=="], @@ -1636,6 +1714,8 @@ "babel-plugin-macros/cosmiconfig": ["cosmiconfig@7.1.0", "", { "dependencies": { "@types/parse-json": "^4.0.0", "import-fresh": "^3.2.1", "parse-json": "^5.0.0", "path-type": "^4.0.0", "yaml": "^1.10.0" } }, "sha512-AdmX6xUzdNASswsFtmwSt7Vj8po9IuqXm0UXz7QKPuEUmPB4XyjGfaAr2PSuELMwkRMVH1EpIkX5bTZGRB3eCA=="], + "data-urls/whatwg-mimetype": ["whatwg-mimetype@5.0.0", "", {}, "sha512-sXcNcHOC51uPGF0P/D4NVtrkjSU2fNsm9iog4ZvZJsL3rjoDAzXZhkm2MWt1y+PUdggKAYVoMAIYcs78wJ51Cw=="], + "decode-named-character-reference/character-entities": ["character-entities@2.0.2", "", {}, "sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ=="], "fs-minipass/minipass": ["minipass@3.3.6", "", { "dependencies": { "yallist": "^4.0.0" } }, "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw=="], @@ -1664,6 +1744,8 @@ "jest-util/chalk": ["chalk@4.1.2", "", { "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" } }, "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA=="], + "jsdom/whatwg-mimetype": ["whatwg-mimetype@4.0.0", "", {}, "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg=="], + "mdast-util-find-and-replace/escape-string-regexp": ["escape-string-regexp@5.0.0", "", {}, "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw=="], "micromatch/picomatch": ["picomatch@2.3.1", "", {}, "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA=="], @@ -1674,6 +1756,10 @@ "monaco-editor/dompurify": ["dompurify@3.2.7", "", { "optionalDependencies": { "@types/trusted-types": "^2.0.7" } }, "sha512-WhL/YuveyGXJaerVlMYGWhvQswa7myDG17P7Vu65EWC05o8vfeNbvNf4d/BOvH99+ZW+LlQsc1GDKMa1vNK6dw=="], + "node-fetch/whatwg-url": ["whatwg-url@5.0.0", "", { "dependencies": { "tr46": "~0.0.3", "webidl-conversions": "^3.0.0" } }, "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw=="], + + "parse5/entities": ["entities@6.0.1", "", {}, "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g=="], + "pretty-format/ansi-styles": ["ansi-styles@5.2.0", "", {}, "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA=="], "pretty-format/react-is": ["react-is@17.0.2", "", {}, "sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w=="], @@ -1694,6 +1780,8 @@ "unified/is-plain-obj": ["is-plain-obj@4.1.0", "", {}, "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg=="], + "@mapbox/node-pre-gyp/https-proxy-agent/agent-base": ["agent-base@6.0.2", "", { "dependencies": { "debug": "4" } }, "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ=="], + "@react-awesome-query-builder/ui/react-redux/@types/use-sync-external-store": ["@types/use-sync-external-store@0.0.3", "", {}, "sha512-EwmlvuaxPNej9+T4v5AuBPJa2x2UOJVdjCtDHgcDqitUeOtjnJKJ+apYjVcAoBEMjKW1VVFGZLUb5+qqa09XFA=="], "@rjsf/antd/rc-picker/date-fns": ["date-fns@2.30.0", "", { "dependencies": { "@babel/runtime": "^7.21.0" } }, "sha512-fnULvOpxnC5/Vg3NCiWelDsLiUc9bRwAPs/+LfTLNvetFCtCTN+yQz15C/fs4AwX1R9K5GLtLfn8QW+dWisaAw=="], @@ -1707,5 +1795,9 @@ "jest-matcher-utils/pretty-format/ansi-styles": ["ansi-styles@5.2.0", "", {}, "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA=="], "jest-message-util/pretty-format/ansi-styles": ["ansi-styles@5.2.0", "", {}, "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA=="], + + "node-fetch/whatwg-url/tr46": ["tr46@0.0.3", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="], + + "node-fetch/whatwg-url/webidl-conversions": ["webidl-conversions@3.0.1", "", {}, "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="], } } diff --git a/frontend/package-lock.json b/frontend/package-lock.json new file mode 100644 index 0000000000..8eaf46bd8e --- /dev/null +++ b/frontend/package-lock.json @@ -0,0 +1,10521 @@ +{ + "name": "frontend", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "frontend", + "version": "0.1.0", + "dependencies": { + "@ant-design/icons": "^5.1.4", + "@monaco-editor/react": "^4.7.0", + "@react-awesome-query-builder/antd": "^6.6.10", + "@react-pdf-viewer/core": "^3.12.0", + "@react-pdf-viewer/default-layout": "^3.12.0", + "@react-pdf-viewer/highlight": "^3.12.0", + "@react-pdf-viewer/page-navigation": "^3.12.0", + "@rjsf/antd": "^5.16.1", + "@rjsf/core": "^5.8.1", + "@rjsf/utils": "^5.8.1", + "@rjsf/validator-ajv8": "^5.8.1", + "@stripe/stripe-js": "^4.2.0", + "@testing-library/jest-dom": "^5.16.5", + "@testing-library/react": "^13.4.0", + "@testing-library/user-event": "^13.5.0", + "antd": "^5.5.1", + "axios": "^1.4.0", + "cron-validator": "^1.3.1", + "cronstrue": "^2.48.0", + "date-fns": "^4.1.0", + "emoji-picker-react": "^4.8.0", + "emoji-regex": "^10.3.0", + "file-saver": "^2.0.5", + "framer-motion": "^11.2.10", + "handlebars": "^4.7.8", + "http-proxy-middleware": "^2.0.6", + "js-cookie": "^3.0.5", + "js-yaml": "^4.1.0", + "json-2-csv": "^5.5.1", + "moment": "^2.29.4", + "moment-timezone": "^0.5.45", + "pdfjs-dist": "^3.4.120", + "posthog-js": "^1.116.5", + "prismjs": "^1.29.0", + "react": "^18.2.0", + "react-diff-viewer-continued": "^3.4.0", + "react-dnd": "^16.0.1", + "react-dnd-html5-backend": "^16.0.1", + "react-dom": "^18.2.0", + "react-google-recaptcha": "^3.1.0", + "react-gtm-module": "^2.0.11", + "react-helmet-async": "^2.0.5", + "react-js-cron": "^5.0.1", + "react-markdown": "^8.0.7", + "react-product-fruits": "^2.2.6", + "react-router-dom": "^6.11.2", + "react-social-login-buttons": "^3.9.1", + "react-syntax-highlighter": "^15.6.1", + "recharts": "^3.5.1", + "remark-gfm": "^3.0.1", + "socket.io-client": "^4.7.2", + "uuid": "^9.0.1", + "zustand": "^4.3.8" + }, + "devDependencies": { + "@biomejs/biome": "^2.3.13", + "@vitejs/plugin-react": "^4.4.0", + "baseline-browser-mapping": "^2.9.19", + "happy-dom": "^20.8.8", + "jsdom": "^27.0.1", + "vite": "^7.0.0", + "vite-plugin-svgr": "^4.5.0", + "vitest": "^3.2.0" + }, + "engines": { + "node": ">=20.19.0" + } + }, + "node_modules/@adobe/css-tools": { + "version": "4.2.0", + "license": "MIT" + }, + "node_modules/@ant-design/colors": { + "version": "7.0.2", + "license": "MIT", + "dependencies": { + "@ctrl/tinycolor": "^3.6.1" + } + }, + "node_modules/@ant-design/cssinjs": { + "version": "1.18.4", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.11.1", + "@emotion/hash": "^0.8.0", + "@emotion/unitless": "^0.7.5", + "classnames": "^2.3.1", + "csstype": "^3.1.3", + "rc-util": "^5.35.0", + "stylis": "^4.0.13" + }, + "peerDependencies": { + "react": ">=16.0.0", + "react-dom": ">=16.0.0" + } + }, + "node_modules/@ant-design/icons": { + "version": "5.2.6", + "license": "MIT", + "dependencies": { + "@ant-design/colors": "^7.0.0", + "@ant-design/icons-svg": "^4.3.0", + "@babel/runtime": "^7.11.2", + "classnames": "^2.2.6", + "rc-util": "^5.31.1" + }, + "engines": { + "node": ">=8" + }, + "peerDependencies": { + "react": ">=16.0.0", + "react-dom": ">=16.0.0" + } + }, + "node_modules/@ant-design/icons-svg": { + "version": "4.3.1", + "license": "MIT" + }, + "node_modules/@ant-design/react-slick": { + "version": "1.0.2", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.4", + "classnames": "^2.2.5", + "json2mq": "^0.2.0", + "resize-observer-polyfill": "^1.5.1", + "throttle-debounce": "^5.0.0" + }, + "peerDependencies": { + "react": ">=16.9.0" + } + }, + "node_modules/@asamuzakjp/css-color": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-4.1.2.tgz", + "integrity": "sha512-NfBUvBaYgKIuq6E/RBLY1m0IohzNHAYyaJGuTK79Z23uNwmz2jl1mPsC5ZxCCxylinKhT1Amn5oNTlx1wN8cQg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@csstools/css-calc": "^3.0.0", + "@csstools/css-color-parser": "^4.0.1", + "@csstools/css-parser-algorithms": "^4.0.0", + "@csstools/css-tokenizer": "^4.0.0", + "lru-cache": "^11.2.5" + } + }, + "node_modules/@asamuzakjp/css-color/node_modules/lru-cache": { + "version": "11.2.7", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.7.tgz", + "integrity": "sha512-aY/R+aEsRelme17KGQa/1ZSIpLpNYYrhcrepKTZgE+W3WM16YMCaPwOHLHsmopZHELU0Ojin1lPVxKR0MihncA==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/@asamuzakjp/dom-selector": { + "version": "6.8.1", + "resolved": "https://registry.npmjs.org/@asamuzakjp/dom-selector/-/dom-selector-6.8.1.tgz", + "integrity": "sha512-MvRz1nCqW0fsy8Qz4dnLIvhOlMzqDVBabZx6lH+YywFDdjXhMY37SmpV1XFX3JzG5GWHn63j6HX6QPr3lZXHvQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@asamuzakjp/nwsapi": "^2.3.9", + "bidi-js": "^1.0.3", + "css-tree": "^3.1.0", + "is-potential-custom-element-name": "^1.0.1", + "lru-cache": "^11.2.6" + } + }, + "node_modules/@asamuzakjp/dom-selector/node_modules/lru-cache": { + "version": "11.2.7", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.7.tgz", + "integrity": "sha512-aY/R+aEsRelme17KGQa/1ZSIpLpNYYrhcrepKTZgE+W3WM16YMCaPwOHLHsmopZHELU0Ojin1lPVxKR0MihncA==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/@asamuzakjp/nwsapi": { + "version": "2.3.9", + "resolved": "https://registry.npmjs.org/@asamuzakjp/nwsapi/-/nwsapi-2.3.9.tgz", + "integrity": "sha512-n8GuYSrI9bF7FFZ/SjhwevlHc8xaVlb/7HmHelnc/PZXBD2ZR49NnN9sMMuDdEGPeeRQ5d0hqlSlEpgCX3Wl0Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/@babel/code-frame": { + "version": "7.29.0", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz", + "integrity": "sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==", + "license": "MIT", + "dependencies": { + "@babel/helper-validator-identifier": "^7.28.5", + "js-tokens": "^4.0.0", + "picocolors": "^1.1.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/compat-data": { + "version": "7.29.0", + "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.29.0.tgz", + "integrity": "sha512-T1NCJqT/j9+cn8fvkt7jtwbLBfLC/1y1c7NtCeXFRgzGTsafi68MRv8yzkYSapBnFA6L3U2VSc02ciDzoAJhJg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/core": { + "version": "7.29.0", + "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.29.0.tgz", + "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.29.0", + "@babel/generator": "^7.29.0", + "@babel/helper-compilation-targets": "^7.28.6", + "@babel/helper-module-transforms": "^7.28.6", + "@babel/helpers": "^7.28.6", + "@babel/parser": "^7.29.0", + "@babel/template": "^7.28.6", + "@babel/traverse": "^7.29.0", + "@babel/types": "^7.29.0", + "@jridgewell/remapping": "^2.3.5", + "convert-source-map": "^2.0.0", + "debug": "^4.1.0", + "gensync": "^1.0.0-beta.2", + "json5": "^2.2.3", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/babel" + } + }, + "node_modules/@babel/core/node_modules/convert-source-map": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", + "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", + "dev": true, + "license": "MIT" + }, + "node_modules/@babel/generator": { + "version": "7.29.1", + "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.29.1.tgz", + "integrity": "sha512-qsaF+9Qcm2Qv8SRIMMscAvG4O3lJ0F1GuMo5HR/Bp02LopNgnZBC/EkbevHFeGs4ls/oPz9v+Bsmzbkbe+0dUw==", + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.29.0", + "@babel/types": "^7.29.0", + "@jridgewell/gen-mapping": "^0.3.12", + "@jridgewell/trace-mapping": "^0.3.28", + "jsesc": "^3.0.2" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-compilation-targets": { + "version": "7.28.6", + "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.28.6.tgz", + "integrity": "sha512-JYtls3hqi15fcx5GaSNL7SCTJ2MNmjrkHXg4FSpOA/grxK8KwyZ5bubHsCq8FXCkua6xhuaaBit+3b7+VZRfcA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/compat-data": "^7.28.6", + "@babel/helper-validator-option": "^7.27.1", + "browserslist": "^4.24.0", + "lru-cache": "^5.1.1", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-globals": { + "version": "7.28.0", + "resolved": "https://registry.npmjs.org/@babel/helper-globals/-/helper-globals-7.28.0.tgz", + "integrity": "sha512-+W6cISkXFa1jXsDEdYA8HeevQT/FULhxzR99pxphltZcVaugps53THCeiWA8SguxxpSp3gKPiuYfSWopkLQ4hw==", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-imports": { + "version": "7.28.6", + "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.28.6.tgz", + "integrity": "sha512-l5XkZK7r7wa9LucGw9LwZyyCUscb4x37JWTPz7swwFE/0FMQAGpiWUZn8u9DzkSBWEcK25jmvubfpw2dnAMdbw==", + "license": "MIT", + "dependencies": { + "@babel/traverse": "^7.28.6", + "@babel/types": "^7.28.6" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-transforms": { + "version": "7.28.6", + "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.28.6.tgz", + "integrity": "sha512-67oXFAYr2cDLDVGLXTEABjdBJZ6drElUSI7WKp70NrpyISso3plG9SAGEF6y7zbha/wOzUByWWTJvEDVNIUGcA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-module-imports": "^7.28.6", + "@babel/helper-validator-identifier": "^7.28.5", + "@babel/traverse": "^7.28.6" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/@babel/helper-plugin-utils": { + "version": "7.28.6", + "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.28.6.tgz", + "integrity": "sha512-S9gzZ/bz83GRysI7gAD4wPT/AI3uCnY+9xn+Mx/KPs2JwHJIz1W8PZkg2cqyt3RNOBM8ejcXhV6y8Og7ly/Dug==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-string-parser": { + "version": "7.27.1", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz", + "integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.28.5", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz", + "integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-option": { + "version": "7.27.1", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.27.1.tgz", + "integrity": "sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helpers": { + "version": "7.29.2", + "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.29.2.tgz", + "integrity": "sha512-HoGuUs4sCZNezVEKdVcwqmZN8GoHirLUcLaYVNBK2J0DadGtdcqgr3BCbvH8+XUo4NGjNl3VOtSjEKNzqfFgKw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/template": "^7.28.6", + "@babel/types": "^7.29.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/parser": { + "version": "7.29.2", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.29.2.tgz", + "integrity": "sha512-4GgRzy/+fsBa72/RZVJmGKPmZu9Byn8o4MoLpmNe1m8ZfYnz5emHLQz3U4gLud6Zwl0RZIcgiLD7Uq7ySFuDLA==", + "license": "MIT", + "dependencies": { + "@babel/types": "^7.29.0" + }, + "bin": { + "parser": "bin/babel-parser.js" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/plugin-transform-react-jsx-self": { + "version": "7.27.1", + "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-self/-/plugin-transform-react-jsx-self-7.27.1.tgz", + "integrity": "sha512-6UzkCs+ejGdZ5mFFC/OCUrv028ab2fp1znZmCZjAOBKiBK2jXD1O+BPSfX8X2qjJ75fZBMSnQn3Rq2mrBJK2mw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-plugin-utils": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-transform-react-jsx-source": { + "version": "7.27.1", + "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-source/-/plugin-transform-react-jsx-source-7.27.1.tgz", + "integrity": "sha512-zbwoTsBruTeKB9hSq73ha66iFeJHuaFkUbwvqElnygoNbj/jHRsSeokowZFN3CZ64IvEqcmmkVe89OPXc7ldAw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-plugin-utils": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/runtime": { + "version": "7.26.0", + "license": "MIT", + "dependencies": { + "regenerator-runtime": "^0.14.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/runtime/node_modules/regenerator-runtime": { + "version": "0.14.1", + "license": "MIT" + }, + "node_modules/@babel/template": { + "version": "7.28.6", + "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz", + "integrity": "sha512-YA6Ma2KsCdGb+WC6UpBVFJGXL58MDA6oyONbjyF/+5sBgxY/dwkhLogbMT2GXXyU84/IhRw/2D1Os1B/giz+BQ==", + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.28.6", + "@babel/parser": "^7.28.6", + "@babel/types": "^7.28.6" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/traverse": { + "version": "7.29.0", + "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.29.0.tgz", + "integrity": "sha512-4HPiQr0X7+waHfyXPZpWPfWL/J7dcN1mx9gL6WdQVMbPnF3+ZhSMs8tCxN7oHddJE9fhNE7+lxdnlyemKfJRuA==", + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.29.0", + "@babel/generator": "^7.29.0", + "@babel/helper-globals": "^7.28.0", + "@babel/parser": "^7.29.0", + "@babel/template": "^7.28.6", + "@babel/types": "^7.29.0", + "debug": "^4.3.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/types": { + "version": "7.29.0", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.29.0.tgz", + "integrity": "sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==", + "license": "MIT", + "dependencies": { + "@babel/helper-string-parser": "^7.27.1", + "@babel/helper-validator-identifier": "^7.28.5" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@biomejs/biome": { + "version": "2.4.9", + "resolved": "https://registry.npmjs.org/@biomejs/biome/-/biome-2.4.9.tgz", + "integrity": "sha512-wvZW92FrwitTcacvCBT8xdAbfbxWfDLwjYMmU3djjqQTh7Ni4ZdiWIT/x5VcZ+RQuxiKzIOzi5D+dcyJDFZMsA==", + "dev": true, + "license": "MIT OR Apache-2.0", + "bin": { + "biome": "bin/biome" + }, + "engines": { + "node": ">=14.21.3" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/biome" + }, + "optionalDependencies": { + "@biomejs/cli-darwin-arm64": "2.4.9", + "@biomejs/cli-darwin-x64": "2.4.9", + "@biomejs/cli-linux-arm64": "2.4.9", + "@biomejs/cli-linux-arm64-musl": "2.4.9", + "@biomejs/cli-linux-x64": "2.4.9", + "@biomejs/cli-linux-x64-musl": "2.4.9", + "@biomejs/cli-win32-arm64": "2.4.9", + "@biomejs/cli-win32-x64": "2.4.9" + } + }, + "node_modules/@biomejs/cli-darwin-arm64": { + "version": "2.4.9", + "resolved": "https://registry.npmjs.org/@biomejs/cli-darwin-arm64/-/cli-darwin-arm64-2.4.9.tgz", + "integrity": "sha512-d5G8Gf2RpH5pYwiHLPA+UpG3G9TLQu4WM+VK6sfL7K68AmhcEQ9r+nkj/DvR/GYhYox6twsHUtmWWWIKfcfQQA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT OR Apache-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=14.21.3" + } + }, + "node_modules/@biomejs/cli-darwin-x64": { + "version": "2.4.9", + "resolved": "https://registry.npmjs.org/@biomejs/cli-darwin-x64/-/cli-darwin-x64-2.4.9.tgz", + "integrity": "sha512-LNCLNgqDMG7BLdc3a8aY/dwKPK7+R8/JXJoXjCvZh2gx8KseqBdFDKbhrr7HCWF8SzNhbTaALhTBoh/I6rf9lA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT OR Apache-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=14.21.3" + } + }, + "node_modules/@biomejs/cli-linux-arm64": { + "version": "2.4.9", + "resolved": "https://registry.npmjs.org/@biomejs/cli-linux-arm64/-/cli-linux-arm64-2.4.9.tgz", + "integrity": "sha512-4adnkAUi6K4C/emPRgYznMOcLlUqZdXWM6aIui4VP4LraE764g6Q4YguygnAUoxKjKIXIWPteKMgRbN0wsgwcg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT OR Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=14.21.3" + } + }, + "node_modules/@biomejs/cli-linux-arm64-musl": { + "version": "2.4.9", + "resolved": "https://registry.npmjs.org/@biomejs/cli-linux-arm64-musl/-/cli-linux-arm64-musl-2.4.9.tgz", + "integrity": "sha512-8RCww5xnPn2wpK4L/QDGDOW0dq80uVWfppPxHIUg6mOs9B6gRmqPp32h1Ls3T8GnW8Wo5A8u7vpTwz4fExN+sw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT OR Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=14.21.3" + } + }, + "node_modules/@biomejs/cli-linux-x64": { + "version": "2.4.9", + "resolved": "https://registry.npmjs.org/@biomejs/cli-linux-x64/-/cli-linux-x64-2.4.9.tgz", + "integrity": "sha512-L10na7POF0Ks/cgLFNF1ZvIe+X4onLkTi5oP9hY+Rh60Q+7fWzKDDCeGyiHUFf1nGIa9dQOOUPGe2MyYg8nMSQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT OR Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=14.21.3" + } + }, + "node_modules/@biomejs/cli-linux-x64-musl": { + "version": "2.4.9", + "resolved": "https://registry.npmjs.org/@biomejs/cli-linux-x64-musl/-/cli-linux-x64-musl-2.4.9.tgz", + "integrity": "sha512-5TD+WS9v5vzXKzjetF0hgoaNFHMcpQeBUwKKVi3JbG1e9UCrFuUK3Gt185fyTzvRdwYkJJEMqglRPjmesmVv4A==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT OR Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=14.21.3" + } + }, + "node_modules/@biomejs/cli-win32-arm64": { + "version": "2.4.9", + "resolved": "https://registry.npmjs.org/@biomejs/cli-win32-arm64/-/cli-win32-arm64-2.4.9.tgz", + "integrity": "sha512-aDZr0RBC3sMGJOU10BvG7eZIlWLK/i51HRIfScE2lVhfts2dQTreowLiJJd+UYg/tHKxS470IbzpuKmd0MiD6g==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT OR Apache-2.0", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=14.21.3" + } + }, + "node_modules/@biomejs/cli-win32-x64": { + "version": "2.4.9", + "resolved": "https://registry.npmjs.org/@biomejs/cli-win32-x64/-/cli-win32-x64-2.4.9.tgz", + "integrity": "sha512-NS4g/2G9SoQ4ktKtz31pvyc/rmgzlcIDCGU/zWbmHJAqx6gcRj2gj5Q/guXhoWTzCUaQZDIqiCQXHS7BcGYc0w==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT OR Apache-2.0", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=14.21.3" + } + }, + "node_modules/@csstools/color-helpers": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-6.0.2.tgz", + "integrity": "sha512-LMGQLS9EuADloEFkcTBR3BwV/CGHV7zyDxVRtVDTwdI2Ca4it0CCVTT9wCkxSgokjE5Ho41hEPgb8OEUwoXr6Q==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT-0", + "engines": { + "node": ">=20.19.0" + } + }, + "node_modules/@csstools/css-calc": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/@csstools/css-calc/-/css-calc-3.1.1.tgz", + "integrity": "sha512-HJ26Z/vmsZQqs/o3a6bgKslXGFAungXGbinULZO3eMsOyNJHeBBZfup5FiZInOghgoM4Hwnmw+OgbJCNg1wwUQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=20.19.0" + }, + "peerDependencies": { + "@csstools/css-parser-algorithms": "^4.0.0", + "@csstools/css-tokenizer": "^4.0.0" + } + }, + "node_modules/@csstools/css-color-parser": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/@csstools/css-color-parser/-/css-color-parser-4.0.2.tgz", + "integrity": "sha512-0GEfbBLmTFf0dJlpsNU7zwxRIH0/BGEMuXLTCvFYxuL1tNhqzTbtnFICyJLTNK4a+RechKP75e7w42ClXSnJQw==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "dependencies": { + "@csstools/color-helpers": "^6.0.2", + "@csstools/css-calc": "^3.1.1" + }, + "engines": { + "node": ">=20.19.0" + }, + "peerDependencies": { + "@csstools/css-parser-algorithms": "^4.0.0", + "@csstools/css-tokenizer": "^4.0.0" + } + }, + "node_modules/@csstools/css-parser-algorithms": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@csstools/css-parser-algorithms/-/css-parser-algorithms-4.0.0.tgz", + "integrity": "sha512-+B87qS7fIG3L5h3qwJ/IFbjoVoOe/bpOdh9hAjXbvx0o8ImEmUsGXN0inFOnk2ChCFgqkkGFQ+TpM5rbhkKe4w==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=20.19.0" + }, + "peerDependencies": { + "@csstools/css-tokenizer": "^4.0.0" + } + }, + "node_modules/@csstools/css-syntax-patches-for-csstree": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@csstools/css-syntax-patches-for-csstree/-/css-syntax-patches-for-csstree-1.1.1.tgz", + "integrity": "sha512-BvqN0AMWNAnLk9G8jnUT77D+mUbY/H2b3uDTvg2isJkHaOufUE2R3AOwxWo7VBQKT1lOdwdvorddo2B/lk64+w==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT-0", + "peerDependencies": { + "css-tree": "^3.2.1" + }, + "peerDependenciesMeta": { + "css-tree": { + "optional": true + } + } + }, + "node_modules/@csstools/css-tokenizer": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@csstools/css-tokenizer/-/css-tokenizer-4.0.0.tgz", + "integrity": "sha512-QxULHAm7cNu72w97JUNCBFODFaXpbDg+dP8b/oWFAZ2MTRppA3U00Y2L1HqaS4J6yBqxwa/Y3nMBaxVKbB/NsA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=20.19.0" + } + }, + "node_modules/@ctrl/tinycolor": { + "version": "3.6.1", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, + "node_modules/@emotion/babel-plugin": { + "version": "11.13.5", + "resolved": "https://registry.npmjs.org/@emotion/babel-plugin/-/babel-plugin-11.13.5.tgz", + "integrity": "sha512-pxHCpT2ex+0q+HH91/zsdHkw/lXd468DIN2zvfvLtPKLLMo6gQj7oLObq8PhkrxOZb/gGCq03S3Z7PDhS8pduQ==", + "license": "MIT", + "dependencies": { + "@babel/helper-module-imports": "^7.16.7", + "@babel/runtime": "^7.18.3", + "@emotion/hash": "^0.9.2", + "@emotion/memoize": "^0.9.0", + "@emotion/serialize": "^1.3.3", + "babel-plugin-macros": "^3.1.0", + "convert-source-map": "^1.5.0", + "escape-string-regexp": "^4.0.0", + "find-root": "^1.1.0", + "source-map": "^0.5.7", + "stylis": "4.2.0" + } + }, + "node_modules/@emotion/babel-plugin/node_modules/@emotion/hash": { + "version": "0.9.2", + "resolved": "https://registry.npmjs.org/@emotion/hash/-/hash-0.9.2.tgz", + "integrity": "sha512-MyqliTZGuOm3+5ZRSaaBGP3USLw6+EGykkwZns2EPC5g8jJ4z9OrdZY9apkl3+UP9+sdz76YYkwCKP5gh8iY3g==", + "license": "MIT" + }, + "node_modules/@emotion/babel-plugin/node_modules/source-map": { + "version": "0.5.7", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.5.7.tgz", + "integrity": "sha512-LbrmJOMUSdEVxIKvdcJzQC+nQhe8FUZQTXQy6+I75skNgn3OoQ0DZA8YnFa7gp8tqtL3KPf1kmo0R5DoApeSGQ==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/@emotion/babel-plugin/node_modules/stylis": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/stylis/-/stylis-4.2.0.tgz", + "integrity": "sha512-Orov6g6BB1sDfYgzWfTHDOxamtX1bE/zo104Dh9e6fqJ3PooipYyfJ0pUmrZO2wAvO8YbEyeFrkV91XTsGMSrw==", + "license": "MIT" + }, + "node_modules/@emotion/cache": { + "version": "11.14.0", + "resolved": "https://registry.npmjs.org/@emotion/cache/-/cache-11.14.0.tgz", + "integrity": "sha512-L/B1lc/TViYk4DcpGxtAVbx0ZyiKM5ktoIyafGkH6zg/tj+mA+NE//aPYKG0k8kCHSHVJrpLpcAlOBEXQ3SavA==", + "license": "MIT", + "dependencies": { + "@emotion/memoize": "^0.9.0", + "@emotion/sheet": "^1.4.0", + "@emotion/utils": "^1.4.2", + "@emotion/weak-memoize": "^0.4.0", + "stylis": "4.2.0" + } + }, + "node_modules/@emotion/cache/node_modules/stylis": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/stylis/-/stylis-4.2.0.tgz", + "integrity": "sha512-Orov6g6BB1sDfYgzWfTHDOxamtX1bE/zo104Dh9e6fqJ3PooipYyfJ0pUmrZO2wAvO8YbEyeFrkV91XTsGMSrw==", + "license": "MIT" + }, + "node_modules/@emotion/css": { + "version": "11.13.5", + "resolved": "https://registry.npmjs.org/@emotion/css/-/css-11.13.5.tgz", + "integrity": "sha512-wQdD0Xhkn3Qy2VNcIzbLP9MR8TafI0MJb7BEAXKp+w4+XqErksWR4OXomuDzPsN4InLdGhVe6EYcn2ZIUCpB8w==", + "license": "MIT", + "dependencies": { + "@emotion/babel-plugin": "^11.13.5", + "@emotion/cache": "^11.13.5", + "@emotion/serialize": "^1.3.3", + "@emotion/sheet": "^1.4.0", + "@emotion/utils": "^1.4.2" + } + }, + "node_modules/@emotion/hash": { + "version": "0.8.0", + "license": "MIT" + }, + "node_modules/@emotion/memoize": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/@emotion/memoize/-/memoize-0.9.0.tgz", + "integrity": "sha512-30FAj7/EoJ5mwVPOWhAyCX+FPfMDrVecJAM+Iw9NRoSl4BBAQeqj4cApHHUXOVvIPgLVDsCFoz/hGD+5QQD1GQ==", + "license": "MIT" + }, + "node_modules/@emotion/serialize": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/@emotion/serialize/-/serialize-1.3.3.tgz", + "integrity": "sha512-EISGqt7sSNWHGI76hC7x1CksiXPahbxEOrC5RjmFRJTqLyEK9/9hZvBbiYn70dw4wuwMKiEMCUlR6ZXTSWQqxA==", + "license": "MIT", + "dependencies": { + "@emotion/hash": "^0.9.2", + "@emotion/memoize": "^0.9.0", + "@emotion/unitless": "^0.10.0", + "@emotion/utils": "^1.4.2", + "csstype": "^3.0.2" + } + }, + "node_modules/@emotion/serialize/node_modules/@emotion/hash": { + "version": "0.9.2", + "resolved": "https://registry.npmjs.org/@emotion/hash/-/hash-0.9.2.tgz", + "integrity": "sha512-MyqliTZGuOm3+5ZRSaaBGP3USLw6+EGykkwZns2EPC5g8jJ4z9OrdZY9apkl3+UP9+sdz76YYkwCKP5gh8iY3g==", + "license": "MIT" + }, + "node_modules/@emotion/serialize/node_modules/@emotion/unitless": { + "version": "0.10.0", + "resolved": "https://registry.npmjs.org/@emotion/unitless/-/unitless-0.10.0.tgz", + "integrity": "sha512-dFoMUuQA20zvtVTuxZww6OHoJYgrzfKM1t52mVySDJnMSEa08ruEvdYQbhvyu6soU+NeLVd3yKfTfT0NeV6qGg==", + "license": "MIT" + }, + "node_modules/@emotion/sheet": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/@emotion/sheet/-/sheet-1.4.0.tgz", + "integrity": "sha512-fTBW9/8r2w3dXWYM4HCB1Rdp8NLibOw2+XELH5m5+AkWiL/KqYX6dc0kKYlaYyKjrQ6ds33MCdMPEwgs2z1rqg==", + "license": "MIT" + }, + "node_modules/@emotion/unitless": { + "version": "0.7.5", + "license": "MIT" + }, + "node_modules/@emotion/utils": { + "version": "1.4.2", + "resolved": "https://registry.npmjs.org/@emotion/utils/-/utils-1.4.2.tgz", + "integrity": "sha512-3vLclRofFziIa3J2wDh9jjbkUz9qk5Vi3IZ/FSTKViB0k+ef0fPV7dYrUIugbgupYDx7v9ud/SjrtEP8Y4xLoA==", + "license": "MIT" + }, + "node_modules/@emotion/weak-memoize": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/@emotion/weak-memoize/-/weak-memoize-0.4.0.tgz", + "integrity": "sha512-snKqtPW01tN0ui7yu9rGv69aJXr/a/Ywvl11sUjNtEcRc+ng/mQriFL0wLXMef74iHa/EkftbDzU9F8iFbH+zg==", + "license": "MIT" + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.4.tgz", + "integrity": "sha512-cQPwL2mp2nSmHHJlCyoXgHGhbEPMrEEU5xhkcy3Hs/O7nGZqEpZ2sUtLaL9MORLtDfRvVl2/3PAuEkYZH0Ty8Q==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.4.tgz", + "integrity": "sha512-X9bUgvxiC8CHAGKYufLIHGXPJWnr0OCdR0anD2e21vdvgCI8lIfqFbnoeOz7lBjdrAGUhqLZLcQo6MLhTO2DKQ==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.4.tgz", + "integrity": "sha512-gdLscB7v75wRfu7QSm/zg6Rx29VLdy9eTr2t44sfTW7CxwAtQghZ4ZnqHk3/ogz7xao0QAgrkradbBzcqFPasw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.4.tgz", + "integrity": "sha512-PzPFnBNVF292sfpfhiyiXCGSn9HZg5BcAz+ivBuSsl6Rk4ga1oEXAamhOXRFyMcjwr2DVtm40G65N3GLeH1Lvw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.4.tgz", + "integrity": "sha512-b7xaGIwdJlht8ZFCvMkpDN6uiSmnxxK56N2GDTMYPr2/gzvfdQN8rTfBsvVKmIVY/X7EM+/hJKEIbbHs9oA4tQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.4.tgz", + "integrity": "sha512-sR+OiKLwd15nmCdqpXMnuJ9W2kpy0KigzqScqHI3Hqwr7IXxBp3Yva+yJwoqh7rE8V77tdoheRYataNKL4QrPw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.4.tgz", + "integrity": "sha512-jnfpKe+p79tCnm4GVav68A7tUFeKQwQyLgESwEAUzyxk/TJr4QdGog9sqWNcUbr/bZt/O/HXouspuQDd9JxFSw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.4.tgz", + "integrity": "sha512-2kb4ceA/CpfUrIcTUl1wrP/9ad9Atrp5J94Lq69w7UwOMolPIGrfLSvAKJp0RTvkPPyn6CIWrNy13kyLikZRZQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.4.tgz", + "integrity": "sha512-aBYgcIxX/wd5n2ys0yESGeYMGF+pv6g0DhZr3G1ZG4jMfruU9Tl1i2Z+Wnj9/KjGz1lTLCcorqE2viePZqj4Eg==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.4.tgz", + "integrity": "sha512-7nQOttdzVGth1iz57kxg9uCz57dxQLHWxopL6mYuYthohPKEK0vU0C3O21CcBK6KDlkYVcnDXY099HcCDXd9dA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.4.tgz", + "integrity": "sha512-oPtixtAIzgvzYcKBQM/qZ3R+9TEUd1aNJQu0HhGyqtx6oS7qTpvjheIWBbes4+qu1bNlo2V4cbkISr8q6gRBFA==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.4.tgz", + "integrity": "sha512-8mL/vh8qeCoRcFH2nM8wm5uJP+ZcVYGGayMavi8GmRJjuI3g1v6Z7Ni0JJKAJW+m0EtUuARb6Lmp4hMjzCBWzA==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.4.tgz", + "integrity": "sha512-1RdrWFFiiLIW7LQq9Q2NES+HiD4NyT8Itj9AUeCl0IVCA459WnPhREKgwrpaIfTOe+/2rdntisegiPWn/r/aAw==", + "cpu": [ + "mips64el" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.4.tgz", + "integrity": "sha512-tLCwNG47l3sd9lpfyx9LAGEGItCUeRCWeAx6x2Jmbav65nAwoPXfewtAdtbtit/pJFLUWOhpv0FpS6GQAmPrHA==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.4.tgz", + "integrity": "sha512-BnASypppbUWyqjd1KIpU4AUBiIhVr6YlHx/cnPgqEkNoVOhHg+YiSVxM1RLfiy4t9cAulbRGTNCKOcqHrEQLIw==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.4.tgz", + "integrity": "sha512-+eUqgb/Z7vxVLezG8bVB9SfBie89gMueS+I0xYh2tJdw3vqA/0ImZJ2ROeWwVJN59ihBeZ7Tu92dF/5dy5FttA==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.4.tgz", + "integrity": "sha512-S5qOXrKV8BQEzJPVxAwnryi2+Iq5pB40gTEIT69BQONqR7JH1EPIcQ/Uiv9mCnn05jff9umq/5nqzxlqTOg9NA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.4.tgz", + "integrity": "sha512-xHT8X4sb0GS8qTqiwzHqpY00C95DPAq7nAwX35Ie/s+LO9830hrMd3oX0ZMKLvy7vsonee73x0lmcdOVXFzd6Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.4.tgz", + "integrity": "sha512-RugOvOdXfdyi5Tyv40kgQnI0byv66BFgAqjdgtAKqHoZTbTF2QqfQrFwa7cHEORJf6X2ht+l9ABLMP0dnKYsgg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.4.tgz", + "integrity": "sha512-2MyL3IAaTX+1/qP0O1SwskwcwCoOI4kV2IBX1xYnDDqthmq5ArrW94qSIKCAuRraMgPOmG0RDTA74mzYNQA9ow==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.4.tgz", + "integrity": "sha512-u8fg/jQ5aQDfsnIV6+KwLOf1CmJnfu1ShpwqdwC0uA7ZPwFws55Ngc12vBdeUdnuWoQYx/SOQLGDcdlfXhYmXQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openharmony-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.4.tgz", + "integrity": "sha512-JkTZrl6VbyO8lDQO3yv26nNr2RM2yZzNrNHEsj9bm6dOwwu9OYN28CjzZkH57bh4w0I2F7IodpQvUAEd1mbWXg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.4.tgz", + "integrity": "sha512-/gOzgaewZJfeJTlsWhvUEmUG4tWEY2Spp5M20INYRg2ZKl9QPO3QEEgPeRtLjEWSW8FilRNacPOg8R1uaYkA6g==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.4.tgz", + "integrity": "sha512-Z9SExBg2y32smoDQdf1HRwHRt6vAHLXcxD2uGgO/v2jK7Y718Ix4ndsbNMU/+1Qiem9OiOdaqitioZwxivhXYg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.4.tgz", + "integrity": "sha512-DAyGLS0Jz5G5iixEbMHi5KdiApqHBWMGzTtMiJ72ZOLhbu/bzxgAe8Ue8CTS3n3HbIUHQz/L51yMdGMeoxXNJw==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.4.tgz", + "integrity": "sha512-+knoa0BDoeXgkNvvV1vvbZX4+hizelrkwmGJBdT17t8FNPwG2lKemmuMZlmaNQ3ws3DKKCxpb4zRZEIp3UxFCg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@jest/expect-utils": { + "version": "29.5.0", + "license": "MIT", + "dependencies": { + "jest-get-type": "^29.4.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/schemas": { + "version": "29.4.3", + "license": "MIT", + "dependencies": { + "@sinclair/typebox": "^0.25.16" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/types": { + "version": "29.5.0", + "license": "MIT", + "dependencies": { + "@jest/schemas": "^29.4.3", + "@types/istanbul-lib-coverage": "^2.0.0", + "@types/istanbul-reports": "^3.0.0", + "@types/node": "*", + "@types/yargs": "^17.0.8", + "chalk": "^4.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.13", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", + "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==", + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.0", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "node_modules/@jridgewell/remapping": { + "version": "2.3.5", + "resolved": "https://registry.npmjs.org/@jridgewell/remapping/-/remapping-2.3.5.tgz", + "integrity": "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.0", + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/source-map": { + "version": "0.3.3", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.0", + "@jridgewell/trace-mapping": "^0.3.9" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", + "license": "MIT" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.31", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz", + "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==", + "license": "MIT", + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, + "node_modules/@mapbox/node-pre-gyp": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz", + "integrity": "sha512-Yhlar6v9WQgUp/He7BdgzOz8lqMQ8sU+jkCq7Wx8Myc5YFJLbEe7lgui/V7G1qB1DJykHSGwreceSaD60Y0PUQ==", + "license": "BSD-3-Clause", + "optional": true, + "dependencies": { + "detect-libc": "^2.0.0", + "https-proxy-agent": "^5.0.0", + "make-dir": "^3.1.0", + "node-fetch": "^2.6.7", + "nopt": "^5.0.0", + "npmlog": "^5.0.1", + "rimraf": "^3.0.2", + "semver": "^7.3.5", + "tar": "^6.1.11" + }, + "bin": { + "node-pre-gyp": "bin/node-pre-gyp" + } + }, + "node_modules/@mapbox/node-pre-gyp/node_modules/agent-base": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", + "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==", + "license": "MIT", + "optional": true, + "dependencies": { + "debug": "4" + }, + "engines": { + "node": ">= 6.0.0" + } + }, + "node_modules/@mapbox/node-pre-gyp/node_modules/https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "license": "MIT", + "optional": true, + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/@mapbox/node-pre-gyp/node_modules/semver": { + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "license": "ISC", + "optional": true, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/@monaco-editor/loader": { + "version": "1.5.0", + "license": "MIT", + "dependencies": { + "state-local": "^1.0.6" + } + }, + "node_modules/@monaco-editor/react": { + "version": "4.7.0", + "license": "MIT", + "dependencies": { + "@monaco-editor/loader": "^1.5.0" + }, + "peerDependencies": { + "monaco-editor": ">= 0.25.0 < 1", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", + "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/@rc-component/color-picker": { + "version": "1.5.1", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.23.6", + "@ctrl/tinycolor": "^3.6.1", + "classnames": "^2.2.6", + "rc-util": "^5.38.1" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/@rc-component/context": { + "version": "1.4.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "rc-util": "^5.27.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/@rc-component/mini-decimal": { + "version": "1.1.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.18.0" + }, + "engines": { + "node": ">=8.x" + } + }, + "node_modules/@rc-component/mutate-observer": { + "version": "1.1.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.18.0", + "classnames": "^2.3.2", + "rc-util": "^5.24.4" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/@rc-component/portal": { + "version": "1.1.2", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.18.0", + "classnames": "^2.3.2", + "rc-util": "^5.24.4" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/@rc-component/tour": { + "version": "1.12.3", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.18.0", + "@rc-component/portal": "^1.0.0-9", + "@rc-component/trigger": "^1.3.6", + "classnames": "^2.3.2", + "rc-util": "^5.24.4" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/@rc-component/trigger": { + "version": "1.18.2", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.23.2", + "@rc-component/portal": "^1.1.0", + "classnames": "^2.3.2", + "rc-motion": "^2.0.0", + "rc-resize-observer": "^1.3.1", + "rc-util": "^5.38.0" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/@react-awesome-query-builder/antd": { + "version": "6.6.10", + "license": "MIT", + "dependencies": { + "@react-awesome-query-builder/ui": "^6.6.10", + "lodash": "^4.17.21", + "prop-types": "^15.8.1", + "rc-picker": "^4.5.0" + }, + "peerDependencies": { + "@ant-design/icons": "^4.0.0 || ^5.0.0", + "antd": "^4.17.0 || ^5.0.0", + "react": "^16.8.4 || ^17.0.1 || ^18.0.0", + "react-dom": "^16.8.4 || ^17.0.1 || ^18.0.0" + } + }, + "node_modules/@react-awesome-query-builder/antd/node_modules/@rc-component/trigger": { + "version": "2.2.6", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.23.2", + "@rc-component/portal": "^1.1.0", + "classnames": "^2.3.2", + "rc-motion": "^2.0.0", + "rc-resize-observer": "^1.3.1", + "rc-util": "^5.44.0" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/@react-awesome-query-builder/antd/node_modules/rc-picker": { + "version": "4.9.2", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.24.7", + "@rc-component/trigger": "^2.0.0", + "classnames": "^2.2.1", + "rc-overflow": "^1.3.2", + "rc-resize-observer": "^1.4.0", + "rc-util": "^5.43.0" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "date-fns": ">= 2.x", + "dayjs": ">= 1.x", + "luxon": ">= 3.x", + "moment": ">= 2.x", + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + }, + "peerDependenciesMeta": { + "date-fns": { + "optional": true + }, + "dayjs": { + "optional": true + }, + "luxon": { + "optional": true + }, + "moment": { + "optional": true + } + } + }, + "node_modules/@react-awesome-query-builder/core": { + "version": "6.6.10", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.24.5", + "clone": "^2.1.2", + "i18next": "^23.11.5", + "immutable": "^4.3.6", + "json-logic-js": "^2.0.2", + "lodash": "^4.17.21", + "moment": "^2.30.1", + "spel2js": "^0.2.8", + "sqlstring": "^2.3.3" + } + }, + "node_modules/@react-awesome-query-builder/ui": { + "version": "6.6.10", + "license": "MIT", + "dependencies": { + "@react-awesome-query-builder/core": "^6.6.10", + "classnames": "^2.5.1", + "lodash": "^4.17.21", + "prop-types": "^15.8.1", + "react-redux": "^8.1.3", + "redux": "^4.2.1" + }, + "peerDependencies": { + "react": "^16.8.4 || ^17.0.1 || ^18.0.0", + "react-dom": "^16.8.4 || ^17.0.1 || ^18.0.0" + } + }, + "node_modules/@react-dnd/asap": { + "version": "5.0.2", + "license": "MIT" + }, + "node_modules/@react-dnd/invariant": { + "version": "4.0.2", + "license": "MIT" + }, + "node_modules/@react-dnd/shallowequal": { + "version": "4.0.2", + "license": "MIT" + }, + "node_modules/@react-pdf-viewer/attachment": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/bookmark": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/core": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "peerDependencies": { + "pdfjs-dist": "^2.16.105 || ^3.0.279", + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/default-layout": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/attachment": "3.12.0", + "@react-pdf-viewer/bookmark": "3.12.0", + "@react-pdf-viewer/core": "3.12.0", + "@react-pdf-viewer/thumbnail": "3.12.0", + "@react-pdf-viewer/toolbar": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/full-screen": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/get-file": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/highlight": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/open": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/page-navigation": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/print": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/properties": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/rotate": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/scroll-mode": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/search": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/selection-mode": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/theme": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/thumbnail": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/toolbar": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0", + "@react-pdf-viewer/full-screen": "3.12.0", + "@react-pdf-viewer/get-file": "3.12.0", + "@react-pdf-viewer/open": "3.12.0", + "@react-pdf-viewer/page-navigation": "3.12.0", + "@react-pdf-viewer/print": "3.12.0", + "@react-pdf-viewer/properties": "3.12.0", + "@react-pdf-viewer/rotate": "3.12.0", + "@react-pdf-viewer/scroll-mode": "3.12.0", + "@react-pdf-viewer/search": "3.12.0", + "@react-pdf-viewer/selection-mode": "3.12.0", + "@react-pdf-viewer/theme": "3.12.0", + "@react-pdf-viewer/zoom": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@react-pdf-viewer/zoom": { + "version": "3.12.0", + "license": "https://react-pdf-viewer.dev/license", + "dependencies": { + "@react-pdf-viewer/core": "3.12.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@reduxjs/toolkit": { + "version": "2.11.2", + "resolved": "https://registry.npmjs.org/@reduxjs/toolkit/-/toolkit-2.11.2.tgz", + "integrity": "sha512-Kd6kAHTA6/nUpp8mySPqj3en3dm0tdMIgbttnQ1xFMVpufoj+ADi8pXLBsd4xzTRHQa7t/Jv8W5UnCuW4kuWMQ==", + "license": "MIT", + "dependencies": { + "@standard-schema/spec": "^1.0.0", + "@standard-schema/utils": "^0.3.0", + "immer": "^11.0.0", + "redux": "^5.0.1", + "redux-thunk": "^3.1.0", + "reselect": "^5.1.0" + }, + "peerDependencies": { + "react": "^16.9.0 || ^17.0.0 || ^18 || ^19", + "react-redux": "^7.2.1 || ^8.1.3 || ^9.0.0" + }, + "peerDependenciesMeta": { + "react": { + "optional": true + }, + "react-redux": { + "optional": true + } + } + }, + "node_modules/@reduxjs/toolkit/node_modules/immer": { + "version": "11.1.4", + "resolved": "https://registry.npmjs.org/immer/-/immer-11.1.4.tgz", + "integrity": "sha512-XREFCPo6ksxVzP4E0ekD5aMdf8WMwmdNaz6vuvxgI40UaEiu6q3p8X52aU6GdyvLY3XXX/8R7JOTXStz/nBbRw==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/immer" + } + }, + "node_modules/@reduxjs/toolkit/node_modules/redux": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/redux/-/redux-5.0.1.tgz", + "integrity": "sha512-M9/ELqF6fy8FwmkpnF0S3YKOqMyoWJ4+CS5Efg2ct3oY9daQvd/Pc71FpGZsVsbl3Cpb+IIcjBDUnnyBdQbq4w==", + "license": "MIT" + }, + "node_modules/@reduxjs/toolkit/node_modules/redux-thunk": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/redux-thunk/-/redux-thunk-3.1.0.tgz", + "integrity": "sha512-NW2r5T6ksUKXCabzhL9z+h206HQw/NJkcLm1GPImRQ8IzfXwRGqjVhKJGauHirT0DAuyy6hjdnMZaRoAcy0Klw==", + "license": "MIT", + "peerDependencies": { + "redux": "^5.0.0" + } + }, + "node_modules/@remix-run/router": { + "version": "1.6.3", + "license": "MIT", + "engines": { + "node": ">=14" + } + }, + "node_modules/@rjsf/antd": { + "version": "5.16.1", + "license": "Apache-2.0", + "dependencies": { + "classnames": "^2.5.1", + "lodash": "^4.17.21", + "lodash-es": "^4.17.21", + "rc-picker": "^2.7.6" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@ant-design/icons": "^4.0.0 || ^5.0.0", + "@rjsf/core": "^5.16.x", + "@rjsf/utils": "^5.16.x", + "antd": "^4.24.0 || ^5.8.5", + "dayjs": "^1.8.0", + "react": "^16.14.0 || >=17" + } + }, + "node_modules/@rjsf/core": { + "version": "5.16.1", + "license": "Apache-2.0", + "dependencies": { + "lodash": "^4.17.21", + "lodash-es": "^4.17.21", + "markdown-to-jsx": "^7.4.0", + "nanoid": "^3.3.7", + "prop-types": "^15.8.1" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@rjsf/utils": "^5.16.x", + "react": "^16.14.0 || >=17" + } + }, + "node_modules/@rjsf/utils": { + "version": "5.16.1", + "license": "Apache-2.0", + "dependencies": { + "json-schema-merge-allof": "^0.8.1", + "jsonpointer": "^5.0.1", + "lodash": "^4.17.21", + "lodash-es": "^4.17.21", + "react-is": "^18.2.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "react": "^16.14.0 || >=17" + } + }, + "node_modules/@rjsf/validator-ajv8": { + "version": "5.8.1", + "license": "Apache-2.0", + "dependencies": { + "ajv": "^8.12.0", + "ajv-formats": "^2.1.1", + "lodash": "^4.17.21", + "lodash-es": "^4.17.21" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@rjsf/utils": "^5.8.x" + } + }, + "node_modules/@rolldown/pluginutils": { + "version": "1.0.0-beta.27", + "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.27.tgz", + "integrity": "sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@rollup/rollup-android-arm-eabi": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.60.0.tgz", + "integrity": "sha512-WOhNW9K8bR3kf4zLxbfg6Pxu2ybOUbB2AjMDHSQx86LIF4rH4Ft7vmMwNt0loO0eonglSNy4cpD3MKXXKQu0/A==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-android-arm64": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.60.0.tgz", + "integrity": "sha512-u6JHLll5QKRvjciE78bQXDmqRqNs5M/3GVqZeMwvmjaNODJih/WIrJlFVEihvV0MiYFmd+ZyPr9wxOVbPAG2Iw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-darwin-arm64": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.60.0.tgz", + "integrity": "sha512-qEF7CsKKzSRc20Ciu2Zw1wRrBz4g56F7r/vRwY430UPp/nt1x21Q/fpJ9N5l47WWvJlkNCPJz3QRVw008fi7yA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-darwin-x64": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.60.0.tgz", + "integrity": "sha512-WADYozJ4QCnXCH4wPB+3FuGmDPoFseVCUrANmA5LWwGmC6FL14BWC7pcq+FstOZv3baGX65tZ378uT6WG8ynTw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-freebsd-arm64": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.60.0.tgz", + "integrity": "sha512-6b8wGHJlDrGeSE3aH5mGNHBjA0TTkxdoNHik5EkvPHCt351XnigA4pS7Wsj/Eo9Y8RBU6f35cjN9SYmCFBtzxw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-freebsd-x64": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.60.0.tgz", + "integrity": "sha512-h25Ga0t4jaylMB8M/JKAyrvvfxGRjnPQIR8lnCayyzEjEOx2EJIlIiMbhpWxDRKGKF8jbNH01NnN663dH638mA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-linux-arm-gnueabihf": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.60.0.tgz", + "integrity": "sha512-RzeBwv0B3qtVBWtcuABtSuCzToo2IEAIQrcyB/b2zMvBWVbjo8bZDjACUpnaafaxhTw2W+imQbP2BD1usasK4g==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm-musleabihf": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.60.0.tgz", + "integrity": "sha512-Sf7zusNI2CIU1HLzuu9Tc5YGAHEZs5Lu7N1ssJG4Tkw6e0MEsN7NdjUDDfGNHy2IU+ENyWT+L2obgWiguWibWQ==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-gnu": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.60.0.tgz", + "integrity": "sha512-DX2x7CMcrJzsE91q7/O02IJQ5/aLkVtYFryqCjduJhUfGKG6yJV8hxaw8pZa93lLEpPTP/ohdN4wFz7yp/ry9A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-musl": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.60.0.tgz", + "integrity": "sha512-09EL+yFVbJZlhcQfShpswwRZ0Rg+z/CsSELFCnPt3iK+iqwGsI4zht3secj5vLEs957QvFFXnzAT0FFPIxSrkQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-loong64-gnu": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.60.0.tgz", + "integrity": "sha512-i9IcCMPr3EXm8EQg5jnja0Zyc1iFxJjZWlb4wr7U2Wx/GrddOuEafxRdMPRYVaXjgbhvqalp6np07hN1w9kAKw==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-loong64-musl": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.60.0.tgz", + "integrity": "sha512-DGzdJK9kyJ+B78MCkWeGnpXJ91tK/iKA6HwHxF4TAlPIY7GXEvMe8hBFRgdrR9Ly4qebR/7gfUs9y2IoaVEyog==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-ppc64-gnu": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.60.0.tgz", + "integrity": "sha512-RwpnLsqC8qbS8z1H1AxBA1H6qknR4YpPR9w2XX0vo2Sz10miu57PkNcnHVaZkbqyw/kUWfKMI73jhmfi9BRMUQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-ppc64-musl": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.60.0.tgz", + "integrity": "sha512-Z8pPf54Ly3aqtdWC3G4rFigZgNvd+qJlOE52fmko3KST9SoGfAdSRCwyoyG05q1HrrAblLbk1/PSIV+80/pxLg==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-gnu": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.60.0.tgz", + "integrity": "sha512-3a3qQustp3COCGvnP4SvrMHnPQ9d1vzCakQVRTliaz8cIp/wULGjiGpbcqrkv0WrHTEp8bQD/B3HBjzujVWLOA==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-musl": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.60.0.tgz", + "integrity": "sha512-pjZDsVH/1VsghMJ2/kAaxt6dL0psT6ZexQVrijczOf+PeP2BUqTHYejk3l6TlPRydggINOeNRhvpLa0AYpCWSQ==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-s390x-gnu": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.60.0.tgz", + "integrity": "sha512-3ObQs0BhvPgiUVZrN7gqCSvmFuMWvWvsjG5ayJ3Lraqv+2KhOsp+pUbigqbeWqueGIsnn+09HBw27rJ+gYK4VQ==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-gnu": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.60.0.tgz", + "integrity": "sha512-EtylprDtQPdS5rXvAayrNDYoJhIz1/vzN2fEubo3yLE7tfAw+948dO0g4M0vkTVFhKojnF+n6C8bDNe+gDRdTg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-musl": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.60.0.tgz", + "integrity": "sha512-k09oiRCi/bHU9UVFqD17r3eJR9bn03TyKraCrlz5ULFJGdJGi7VOmm9jl44vOJvRJ6P7WuBi/s2A97LxxHGIdw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-openbsd-x64": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.60.0.tgz", + "integrity": "sha512-1o/0/pIhozoSaDJoDcec+IVLbnRtQmHwPV730+AOD29lHEEo4F5BEUB24H0OBdhbBBDwIOSuf7vgg0Ywxdfiiw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ] + }, + "node_modules/@rollup/rollup-openharmony-arm64": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.60.0.tgz", + "integrity": "sha512-pESDkos/PDzYwtyzB5p/UoNU/8fJo68vcXM9ZW2V0kjYayj1KaaUfi1NmTUTUpMn4UhU4gTuK8gIaFO4UGuMbA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ] + }, + "node_modules/@rollup/rollup-win32-arm64-msvc": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.60.0.tgz", + "integrity": "sha512-hj1wFStD7B1YBeYmvY+lWXZ7ey73YGPcViMShYikqKT1GtstIKQAtfUI6yrzPjAy/O7pO0VLXGmUVWXQMaYgTQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-ia32-msvc": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.60.0.tgz", + "integrity": "sha512-SyaIPFoxmUPlNDq5EHkTbiKzmSEmq/gOYFI/3HHJ8iS/v1mbugVa7dXUzcJGQfoytp9DJFLhHH4U3/eTy2Bq4w==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-gnu": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.60.0.tgz", + "integrity": "sha512-RdcryEfzZr+lAr5kRm2ucN9aVlCCa2QNq4hXelZxb8GG0NJSazq44Z3PCCc8wISRuCVnGs0lQJVX5Vp6fKA+IA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-msvc": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.60.0.tgz", + "integrity": "sha512-PrsWNQ8BuE00O3Xsx3ALh2Df8fAj9+cvvX9AIA6o4KpATR98c9mud4XtDWVvsEuyia5U4tVSTKygawyJkjm60w==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@sinclair/typebox": { + "version": "0.25.24", + "license": "MIT" + }, + "node_modules/@socket.io/component-emitter": { + "version": "3.1.0", + "license": "MIT" + }, + "node_modules/@standard-schema/spec": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.1.0.tgz", + "integrity": "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==", + "license": "MIT" + }, + "node_modules/@standard-schema/utils": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/@standard-schema/utils/-/utils-0.3.0.tgz", + "integrity": "sha512-e7Mew686owMaPJVNNLs55PUvgz371nKgwsc4vxE49zsODpJEnxgxRo2y/OKrqueavXgZNMDVj3DdHFlaSAeU8g==", + "license": "MIT" + }, + "node_modules/@stripe/stripe-js": { + "version": "4.2.0", + "license": "MIT", + "engines": { + "node": ">=12.16" + } + }, + "node_modules/@testing-library/dom": { + "version": "9.3.1", + "license": "MIT", + "peer": true, + "dependencies": { + "@babel/code-frame": "^7.10.4", + "@babel/runtime": "^7.12.5", + "@types/aria-query": "^5.0.1", + "aria-query": "5.1.3", + "chalk": "^4.1.0", + "dom-accessibility-api": "^0.5.9", + "lz-string": "^1.5.0", + "pretty-format": "^27.0.2" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@testing-library/jest-dom": { + "version": "5.16.5", + "license": "MIT", + "dependencies": { + "@adobe/css-tools": "^4.0.1", + "@babel/runtime": "^7.9.2", + "@types/testing-library__jest-dom": "^5.9.1", + "aria-query": "^5.0.0", + "chalk": "^3.0.0", + "css.escape": "^1.5.1", + "dom-accessibility-api": "^0.5.6", + "lodash": "^4.17.15", + "redent": "^3.0.0" + }, + "engines": { + "node": ">=8", + "npm": ">=6", + "yarn": ">=1" + } + }, + "node_modules/@testing-library/jest-dom/node_modules/chalk": { + "version": "3.0.0", + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/@testing-library/react": { + "version": "13.4.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.12.5", + "@testing-library/dom": "^8.5.0", + "@types/react-dom": "^18.0.0" + }, + "engines": { + "node": ">=12" + }, + "peerDependencies": { + "react": "^18.0.0", + "react-dom": "^18.0.0" + } + }, + "node_modules/@testing-library/react/node_modules/@testing-library/dom": { + "version": "8.20.0", + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.10.4", + "@babel/runtime": "^7.12.5", + "@types/aria-query": "^5.0.1", + "aria-query": "^5.0.0", + "chalk": "^4.1.0", + "dom-accessibility-api": "^0.5.9", + "lz-string": "^1.4.4", + "pretty-format": "^27.0.2" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@testing-library/user-event": { + "version": "13.5.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.12.5" + }, + "engines": { + "node": ">=10", + "npm": ">=6" + }, + "peerDependencies": { + "@testing-library/dom": ">=7.21.4" + } + }, + "node_modules/@types/aria-query": { + "version": "5.0.1", + "license": "MIT" + }, + "node_modules/@types/babel__core": { + "version": "7.20.5", + "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz", + "integrity": "sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.20.7", + "@babel/types": "^7.20.7", + "@types/babel__generator": "*", + "@types/babel__template": "*", + "@types/babel__traverse": "*" + } + }, + "node_modules/@types/babel__generator": { + "version": "7.6.4", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/types": "^7.0.0" + } + }, + "node_modules/@types/babel__template": { + "version": "7.4.1", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.1.0", + "@babel/types": "^7.0.0" + } + }, + "node_modules/@types/babel__traverse": { + "version": "7.20.1", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/types": "^7.20.7" + } + }, + "node_modules/@types/body-parser": { + "version": "1.19.2", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "@types/connect": "*", + "@types/node": "*" + } + }, + "node_modules/@types/chai": { + "version": "5.2.3", + "resolved": "https://registry.npmjs.org/@types/chai/-/chai-5.2.3.tgz", + "integrity": "sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/deep-eql": "*", + "assertion-error": "^2.0.1" + } + }, + "node_modules/@types/connect": { + "version": "3.4.35", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/@types/d3-array": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/@types/d3-array/-/d3-array-3.2.2.tgz", + "integrity": "sha512-hOLWVbm7uRza0BYXpIIW5pxfrKe0W+D5lrFiAEYR+pb6w3N2SwSMaJbXdUfSEv+dT4MfHBLtn5js0LAWaO6otw==", + "license": "MIT" + }, + "node_modules/@types/d3-color": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/@types/d3-color/-/d3-color-3.1.3.tgz", + "integrity": "sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A==", + "license": "MIT" + }, + "node_modules/@types/d3-ease": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@types/d3-ease/-/d3-ease-3.0.2.tgz", + "integrity": "sha512-NcV1JjO5oDzoK26oMzbILE6HW7uVXOHLQvHshBUW4UMdZGfiY6v5BeQwh9a9tCzv+CeefZQHJt5SRgK154RtiA==", + "license": "MIT" + }, + "node_modules/@types/d3-interpolate": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/d3-interpolate/-/d3-interpolate-3.0.4.tgz", + "integrity": "sha512-mgLPETlrpVV1YRJIglr4Ez47g7Yxjl1lj7YKsiMCb27VJH9W8NVM6Bb9d8kkpG/uAQS5AmbA48q2IAolKKo1MA==", + "license": "MIT", + "dependencies": { + "@types/d3-color": "*" + } + }, + "node_modules/@types/d3-path": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/@types/d3-path/-/d3-path-3.1.1.tgz", + "integrity": "sha512-VMZBYyQvbGmWyWVea0EHs/BwLgxc+MKi1zLDCONksozI4YJMcTt8ZEuIR4Sb1MMTE8MMW49v0IwI5+b7RmfWlg==", + "license": "MIT" + }, + "node_modules/@types/d3-scale": { + "version": "4.0.9", + "resolved": "https://registry.npmjs.org/@types/d3-scale/-/d3-scale-4.0.9.tgz", + "integrity": "sha512-dLmtwB8zkAeO/juAMfnV+sItKjlsw2lKdZVVy6LRr0cBmegxSABiLEpGVmSJJ8O08i4+sGR6qQtb6WtuwJdvVw==", + "license": "MIT", + "dependencies": { + "@types/d3-time": "*" + } + }, + "node_modules/@types/d3-shape": { + "version": "3.1.8", + "resolved": "https://registry.npmjs.org/@types/d3-shape/-/d3-shape-3.1.8.tgz", + "integrity": "sha512-lae0iWfcDeR7qt7rA88BNiqdvPS5pFVPpo5OfjElwNaT2yyekbM0C9vK+yqBqEmHr6lDkRnYNoTBYlAgJa7a4w==", + "license": "MIT", + "dependencies": { + "@types/d3-path": "*" + } + }, + "node_modules/@types/d3-time": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/d3-time/-/d3-time-3.0.4.tgz", + "integrity": "sha512-yuzZug1nkAAaBlBBikKZTgzCeA+k1uy4ZFwWANOfKw5z5LRhV0gNA7gNkKm7HoK+HRN0wX3EkxGk0fpbWhmB7g==", + "license": "MIT" + }, + "node_modules/@types/d3-timer": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@types/d3-timer/-/d3-timer-3.0.2.tgz", + "integrity": "sha512-Ps3T8E8dZDam6fUyNiMkekK3XUsaUEik+idO9/YjPtfj2qruF8tFBXS7XhtE4iIXBLxhmLjP3SXpLhVf21I9Lw==", + "license": "MIT" + }, + "node_modules/@types/debug": { + "version": "4.1.13", + "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.13.tgz", + "integrity": "sha512-KSVgmQmzMwPlmtljOomayoR89W4FynCAi3E8PPs7vmDVPe84hT+vGPKkJfThkmXs0x0jAaa9U8uW8bbfyS2fWw==", + "license": "MIT", + "dependencies": { + "@types/ms": "*" + } + }, + "node_modules/@types/deep-eql": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/@types/deep-eql/-/deep-eql-4.0.2.tgz", + "integrity": "sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/estree": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", + "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/express": { + "version": "4.17.17", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "@types/body-parser": "*", + "@types/express-serve-static-core": "^4.17.33", + "@types/qs": "*", + "@types/serve-static": "*" + } + }, + "node_modules/@types/express-serve-static-core": { + "version": "4.17.35", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "@types/node": "*", + "@types/qs": "*", + "@types/range-parser": "*", + "@types/send": "*" + } + }, + "node_modules/@types/hast": { + "version": "2.3.10", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-2.3.10.tgz", + "integrity": "sha512-McWspRw8xx8J9HurkVBfYj0xKoE25tOFlHGdx4MJ5xORQrMGZNqJhVQWaIbm6Oyla5kYOXtDiopzKRJzEOkwJw==", + "license": "MIT", + "dependencies": { + "@types/unist": "^2" + } + }, + "node_modules/@types/hoist-non-react-statics": { + "version": "3.3.6", + "license": "MIT", + "dependencies": { + "@types/react": "*", + "hoist-non-react-statics": "^3.3.0" + } + }, + "node_modules/@types/http-proxy": { + "version": "1.17.11", + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/@types/istanbul-lib-coverage": { + "version": "2.0.4", + "license": "MIT" + }, + "node_modules/@types/istanbul-lib-report": { + "version": "3.0.0", + "license": "MIT", + "dependencies": { + "@types/istanbul-lib-coverage": "*" + } + }, + "node_modules/@types/istanbul-reports": { + "version": "3.0.1", + "license": "MIT", + "dependencies": { + "@types/istanbul-lib-report": "*" + } + }, + "node_modules/@types/jest": { + "version": "29.5.2", + "license": "MIT", + "dependencies": { + "expect": "^29.0.0", + "pretty-format": "^29.0.0" + } + }, + "node_modules/@types/jest/node_modules/ansi-styles": { + "version": "5.2.0", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/@types/jest/node_modules/pretty-format": { + "version": "29.5.0", + "license": "MIT", + "dependencies": { + "@jest/schemas": "^29.4.3", + "ansi-styles": "^5.0.0", + "react-is": "^18.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@types/mdast": { + "version": "3.0.12", + "license": "MIT", + "dependencies": { + "@types/unist": "^2" + } + }, + "node_modules/@types/mime": { + "version": "1.3.2", + "license": "MIT", + "optional": true, + "peer": true + }, + "node_modules/@types/ms": { + "version": "0.7.31", + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "25.5.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.5.0.tgz", + "integrity": "sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==", + "license": "MIT", + "dependencies": { + "undici-types": "~7.18.0" + } + }, + "node_modules/@types/parse-json": { + "version": "4.0.0", + "license": "MIT" + }, + "node_modules/@types/prop-types": { + "version": "15.7.5", + "license": "MIT" + }, + "node_modules/@types/qs": { + "version": "6.9.7", + "license": "MIT", + "optional": true, + "peer": true + }, + "node_modules/@types/range-parser": { + "version": "1.2.4", + "license": "MIT", + "optional": true, + "peer": true + }, + "node_modules/@types/react": { + "version": "18.2.12", + "license": "MIT", + "dependencies": { + "@types/prop-types": "*", + "@types/scheduler": "*", + "csstype": "^3.0.2" + } + }, + "node_modules/@types/react-dom": { + "version": "18.2.5", + "license": "MIT", + "dependencies": { + "@types/react": "*" + } + }, + "node_modules/@types/scheduler": { + "version": "0.16.3", + "license": "MIT" + }, + "node_modules/@types/send": { + "version": "0.17.1", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "@types/mime": "^1", + "@types/node": "*" + } + }, + "node_modules/@types/serve-static": { + "version": "1.15.1", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "@types/mime": "*", + "@types/node": "*" + } + }, + "node_modules/@types/stack-utils": { + "version": "2.0.1", + "license": "MIT" + }, + "node_modules/@types/testing-library__jest-dom": { + "version": "5.14.6", + "license": "MIT", + "dependencies": { + "@types/jest": "*" + } + }, + "node_modules/@types/unist": { + "version": "2.0.7", + "license": "MIT" + }, + "node_modules/@types/use-sync-external-store": { + "version": "0.0.3", + "license": "MIT" + }, + "node_modules/@types/whatwg-mimetype": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@types/whatwg-mimetype/-/whatwg-mimetype-3.0.2.tgz", + "integrity": "sha512-c2AKvDT8ToxLIOUlN51gTiHXflsfIFisS4pO7pDPoKouJCESkhZnEy623gwP9laCy5lnLDAw1vAzu2vM2YLOrA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/ws": { + "version": "8.18.1", + "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz", + "integrity": "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/@types/yargs": { + "version": "17.0.24", + "license": "MIT", + "dependencies": { + "@types/yargs-parser": "*" + } + }, + "node_modules/@types/yargs-parser": { + "version": "21.0.0", + "license": "MIT" + }, + "node_modules/@vitejs/plugin-react": { + "version": "4.7.0", + "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.7.0.tgz", + "integrity": "sha512-gUu9hwfWvvEDBBmgtAowQCojwZmJ5mcLn3aufeCsitijs3+f2NsrPtlAWIR6OPiqljl96GVCUbLe0HyqIpVaoA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/core": "^7.28.0", + "@babel/plugin-transform-react-jsx-self": "^7.27.1", + "@babel/plugin-transform-react-jsx-source": "^7.27.1", + "@rolldown/pluginutils": "1.0.0-beta.27", + "@types/babel__core": "^7.20.5", + "react-refresh": "^0.17.0" + }, + "engines": { + "node": "^14.18.0 || >=16.0.0" + }, + "peerDependencies": { + "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" + } + }, + "node_modules/@vitejs/plugin-react/node_modules/react-refresh": { + "version": "0.17.0", + "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.17.0.tgz", + "integrity": "sha512-z6F7K9bV85EfseRCp2bzrpyQ0Gkw1uLoCel9XBVWPg/TjRj94SkJzUTGfOa4bs7iJvBWtQG0Wq7wnI0syw3EBQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/@vitest/expect": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-3.2.4.tgz", + "integrity": "sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/chai": "^5.2.2", + "@vitest/spy": "3.2.4", + "@vitest/utils": "3.2.4", + "chai": "^5.2.0", + "tinyrainbow": "^2.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/mocker": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/mocker/-/mocker-3.2.4.tgz", + "integrity": "sha512-46ryTE9RZO/rfDd7pEqFl7etuyzekzEhUbTW3BvmeO/BcCMEgq59BKhek3dXDWgAj4oMK6OZi+vRr1wPW6qjEQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/spy": "3.2.4", + "estree-walker": "^3.0.3", + "magic-string": "^0.30.17" + }, + "funding": { + "url": "https://opencollective.com/vitest" + }, + "peerDependencies": { + "msw": "^2.4.9", + "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0" + }, + "peerDependenciesMeta": { + "msw": { + "optional": true + }, + "vite": { + "optional": true + } + } + }, + "node_modules/@vitest/mocker/node_modules/estree-walker": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz", + "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.0" + } + }, + "node_modules/@vitest/mocker/node_modules/magic-string": { + "version": "0.30.21", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz", + "integrity": "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.5" + } + }, + "node_modules/@vitest/pretty-format": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/pretty-format/-/pretty-format-3.2.4.tgz", + "integrity": "sha512-IVNZik8IVRJRTr9fxlitMKeJeXFFFN0JaB9PHPGQ8NKQbGpfjlTx9zO4RefN8gp7eqjNy8nyK3NZmBzOPeIxtA==", + "dev": true, + "license": "MIT", + "dependencies": { + "tinyrainbow": "^2.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/runner": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/runner/-/runner-3.2.4.tgz", + "integrity": "sha512-oukfKT9Mk41LreEW09vt45f8wx7DordoWUZMYdY/cyAk7w5TWkTRCNZYF7sX7n2wB7jyGAl74OxgwhPgKaqDMQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/utils": "3.2.4", + "pathe": "^2.0.3", + "strip-literal": "^3.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/snapshot": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/snapshot/-/snapshot-3.2.4.tgz", + "integrity": "sha512-dEYtS7qQP2CjU27QBC5oUOxLE/v5eLkGqPE0ZKEIDGMs4vKWe7IjgLOeauHsR0D5YuuycGRO5oSRXnwnmA78fQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/pretty-format": "3.2.4", + "magic-string": "^0.30.17", + "pathe": "^2.0.3" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/snapshot/node_modules/magic-string": { + "version": "0.30.21", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz", + "integrity": "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.5" + } + }, + "node_modules/@vitest/spy": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/spy/-/spy-3.2.4.tgz", + "integrity": "sha512-vAfasCOe6AIK70iP5UD11Ac4siNUNJ9i/9PZ3NKx07sG6sUxeag1LWdNrMWeKKYBLlzuK+Gn65Yd5nyL6ds+nw==", + "dev": true, + "license": "MIT", + "dependencies": { + "tinyspy": "^4.0.3" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/utils": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/utils/-/utils-3.2.4.tgz", + "integrity": "sha512-fB2V0JFrQSMsCo9HiSq3Ezpdv4iYaXRG1Sx8edX3MwxfyNn83mKiGzOcH+Fkxt4MHxr3y42fQi1oeAInqgX2QA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/pretty-format": "3.2.4", + "loupe": "^3.1.4", + "tinyrainbow": "^2.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/abbrev": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz", + "integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==", + "license": "ISC", + "optional": true + }, + "node_modules/acorn": { + "version": "8.8.2", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/ajv": { + "version": "8.12.0", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ajv-formats": { + "version": "2.1.1", + "license": "MIT", + "dependencies": { + "ajv": "^8.0.0" + }, + "peerDependencies": { + "ajv": "^8.0.0" + }, + "peerDependenciesMeta": { + "ajv": { + "optional": true + } + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/antd": { + "version": "5.13.2", + "license": "MIT", + "dependencies": { + "@ant-design/colors": "^7.0.2", + "@ant-design/cssinjs": "^1.18.2", + "@ant-design/icons": "^5.2.6", + "@ant-design/react-slick": "~1.0.2", + "@ctrl/tinycolor": "^3.6.1", + "@rc-component/color-picker": "~1.5.1", + "@rc-component/mutate-observer": "^1.1.0", + "@rc-component/tour": "~1.12.2", + "@rc-component/trigger": "^1.18.2", + "classnames": "^2.5.1", + "copy-to-clipboard": "^3.3.3", + "dayjs": "^1.11.10", + "qrcode.react": "^3.1.0", + "rc-cascader": "~3.21.0", + "rc-checkbox": "~3.1.0", + "rc-collapse": "~3.7.2", + "rc-dialog": "~9.3.4", + "rc-drawer": "~7.0.0", + "rc-dropdown": "~4.1.0", + "rc-field-form": "~1.41.0", + "rc-image": "~7.5.1", + "rc-input": "~1.4.3", + "rc-input-number": "~8.6.1", + "rc-mentions": "~2.10.1", + "rc-menu": "~9.12.4", + "rc-motion": "^2.9.0", + "rc-notification": "~5.3.0", + "rc-pagination": "~4.0.4", + "rc-picker": "~3.14.6", + "rc-progress": "~3.5.1", + "rc-rate": "~2.12.0", + "rc-resize-observer": "^1.4.0", + "rc-segmented": "~2.2.2", + "rc-select": "~14.11.0", + "rc-slider": "~10.5.0", + "rc-steps": "~6.0.1", + "rc-switch": "~4.1.0", + "rc-table": "~7.37.0", + "rc-tabs": "~14.0.0", + "rc-textarea": "~1.6.3", + "rc-tooltip": "~6.1.3", + "rc-tree": "~5.8.2", + "rc-tree-select": "~5.17.0", + "rc-upload": "~4.5.2", + "rc-util": "^5.38.1", + "scroll-into-view-if-needed": "^3.1.0", + "throttle-debounce": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/ant-design" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/antd/node_modules/rc-picker": { + "version": "3.14.6", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "@rc-component/trigger": "^1.5.0", + "classnames": "^2.2.1", + "rc-util": "^5.30.0" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "date-fns": ">= 2.x", + "dayjs": ">= 1.x", + "luxon": ">= 3.x", + "moment": ">= 2.x", + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + }, + "peerDependenciesMeta": { + "date-fns": { + "optional": true + }, + "dayjs": { + "optional": true + }, + "luxon": { + "optional": true + }, + "moment": { + "optional": true + } + } + }, + "node_modules/aproba": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/aproba/-/aproba-2.1.0.tgz", + "integrity": "sha512-tLIEcj5GuR2RSTnxNKdkK0dJ/GrC7P38sUkiDmDuHfsHmbagTFAxDVIBltoklXEVIQ/f14IL8IMJ5pn9Hez1Ew==", + "license": "ISC", + "optional": true + }, + "node_modules/are-we-there-yet": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/are-we-there-yet/-/are-we-there-yet-2.0.0.tgz", + "integrity": "sha512-Ci/qENmwHnsYo9xKIcUJN5LeDKdJ6R1Z1j9V/J5wyq8nh/mYPEpIKJbBZXtZjG04HiK7zV/p6Vs9952MrMeUIw==", + "deprecated": "This package is no longer supported.", + "license": "ISC", + "optional": true, + "dependencies": { + "delegates": "^1.0.0", + "readable-stream": "^3.6.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/argparse": { + "version": "2.0.1", + "license": "Python-2.0" + }, + "node_modules/aria-query": { + "version": "5.1.3", + "license": "Apache-2.0", + "dependencies": { + "deep-equal": "^2.0.5" + } + }, + "node_modules/array-buffer-byte-length": { + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "call-bind": "^1.0.2", + "is-array-buffer": "^3.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/array-tree-filter": { + "version": "2.1.0", + "license": "MIT" + }, + "node_modules/assertion-error": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz", + "integrity": "sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + } + }, + "node_modules/async-validator": { + "version": "4.2.5", + "license": "MIT" + }, + "node_modules/asynckit": { + "version": "0.4.0", + "license": "MIT" + }, + "node_modules/available-typed-arrays": { + "version": "1.0.5", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/axios": { + "version": "1.4.0", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.0", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/babel-plugin-macros": { + "version": "3.1.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.12.5", + "cosmiconfig": "^7.0.0", + "resolve": "^1.19.0" + }, + "engines": { + "node": ">=10", + "npm": ">=6" + } + }, + "node_modules/babel-plugin-macros/node_modules/resolve": { + "version": "1.22.2", + "license": "MIT", + "dependencies": { + "is-core-module": "^2.11.0", + "path-parse": "^1.0.7", + "supports-preserve-symlinks-flag": "^1.0.0" + }, + "bin": { + "resolve": "bin/resolve" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/bail": { + "version": "2.0.2", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "license": "MIT", + "optional": true + }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true, + "peer": true + }, + "node_modules/baseline-browser-mapping": { + "version": "2.10.10", + "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.10.tgz", + "integrity": "sha512-sUoJ3IMxx4AyRqO4MLeHlnGDkyXRoUG0/AI9fjK+vS72ekpV0yWVY7O0BVjmBcRtkNcsAO2QDZ4tdKKGoI6YaQ==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "baseline-browser-mapping": "dist/cli.cjs" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/bidi-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/bidi-js/-/bidi-js-1.0.3.tgz", + "integrity": "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw==", + "dev": true, + "license": "MIT", + "dependencies": { + "require-from-string": "^2.0.2" + } + }, + "node_modules/bl": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", + "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "buffer": "^5.5.0", + "inherits": "^2.0.4", + "readable-stream": "^3.4.0" + } + }, + "node_modules/brace-expansion": { + "version": "1.1.12", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", + "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", + "license": "MIT", + "optional": true, + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/braces": { + "version": "3.0.2", + "license": "MIT", + "dependencies": { + "fill-range": "^7.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/browserslist": { + "version": "4.28.1", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.1.tgz", + "integrity": "sha512-ZC5Bd0LgJXgwGqUknZY/vkUQ04r8NXnJZ3yYi4vDmSiZmC/pdSN0NbNRPxZpbtO4uAfDUAFffO8IZoM3Gj8IkA==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "baseline-browser-mapping": "^2.9.0", + "caniuse-lite": "^1.0.30001759", + "electron-to-chromium": "^1.5.263", + "node-releases": "^2.0.27", + "update-browserslist-db": "^1.2.0" + }, + "bin": { + "browserslist": "cli.js" + }, + "engines": { + "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" + } + }, + "node_modules/buffer": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", + "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.1.13" + } + }, + "node_modules/buffer-from": { + "version": "1.1.2", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true + }, + "node_modules/cac": { + "version": "6.7.14", + "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz", + "integrity": "sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/call-bind": { + "version": "1.0.2", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.1", + "get-intrinsic": "^1.0.2" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/callsites": { + "version": "3.1.0", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/camelcase": { + "version": "6.3.0", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/caniuse-lite": { + "version": "1.0.30001781", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001781.tgz", + "integrity": "sha512-RdwNCyMsNBftLjW6w01z8bKEvT6e/5tpPVEgtn22TiLGlstHOVecsX2KHFkD5e/vRnIE4EGzpuIODb3mtswtkw==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/caniuse-lite" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "CC-BY-4.0" + }, + "node_modules/canvas": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/canvas/-/canvas-3.2.2.tgz", + "integrity": "sha512-duEt4h1HHu9sJZyVKfLRXR6tsKPY7cEELzxSRJkwddOXYvQT3P/+es98SV384JA0zMOZ5s+9gatnGfM6sL4Drg==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "node-addon-api": "^7.0.0", + "prebuild-install": "^7.1.3" + }, + "engines": { + "node": "^18.12.0 || >= 20.9.0" + } + }, + "node_modules/ccount": { + "version": "2.0.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/chai": { + "version": "5.3.3", + "resolved": "https://registry.npmjs.org/chai/-/chai-5.3.3.tgz", + "integrity": "sha512-4zNhdJD/iOjSH0A05ea+Ke6MU5mmpQcbQsSOkgdaUMJ9zTlDTD/GYlwohmIE2u0gaxHYiVHEn1Fw9mZ/ktJWgw==", + "dev": true, + "license": "MIT", + "dependencies": { + "assertion-error": "^2.0.1", + "check-error": "^2.1.1", + "deep-eql": "^5.0.1", + "loupe": "^3.1.0", + "pathval": "^2.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/chalk": { + "version": "4.1.2", + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/character-entities": { + "version": "2.0.2", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/character-entities-legacy": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/character-entities-legacy/-/character-entities-legacy-1.1.4.tgz", + "integrity": "sha512-3Xnr+7ZFS1uxeiUDvV02wQ+QDbc55o97tIV5zHScSPJpcLm/r0DFPcoY3tYRp+VZukxuMeKgXYmsXQHO05zQeA==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/character-reference-invalid": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/character-reference-invalid/-/character-reference-invalid-1.1.4.tgz", + "integrity": "sha512-mKKUkUbhPpQlCOfIuZkvSEgktjPFIsZKRRbC6KWVEMvlzblj3i3asQv5ODsrwt0N3pHAEvjP8KTQPHkp0+6jOg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/check-error": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/check-error/-/check-error-2.1.3.tgz", + "integrity": "sha512-PAJdDJusoxnwm1VwW07VWwUN1sl7smmC3OKggvndJFadxxDRyFJBX/ggnu/KE4kQAB7a3Dp8f/YXC1FlUprWmA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 16" + } + }, + "node_modules/chownr": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", + "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==", + "dev": true, + "license": "ISC", + "optional": true, + "peer": true + }, + "node_modules/ci-info": { + "version": "3.8.0", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/sibiraj-s" + } + ], + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/classnames": { + "version": "2.5.1", + "license": "MIT" + }, + "node_modules/clone": { + "version": "2.1.2", + "license": "MIT", + "engines": { + "node": ">=0.8" + } + }, + "node_modules/clsx": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz", + "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "license": "MIT" + }, + "node_modules/color-support": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-support/-/color-support-1.1.3.tgz", + "integrity": "sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==", + "license": "ISC", + "optional": true, + "bin": { + "color-support": "bin.js" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/comma-separated-tokens": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz", + "integrity": "sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/compute-gcd": { + "version": "1.2.1", + "dependencies": { + "validate.io-array": "^1.0.3", + "validate.io-function": "^1.0.2", + "validate.io-integer-array": "^1.0.0" + } + }, + "node_modules/compute-lcm": { + "version": "1.1.2", + "dependencies": { + "compute-gcd": "^1.2.1", + "validate.io-array": "^1.0.3", + "validate.io-function": "^1.0.2", + "validate.io-integer-array": "^1.0.0" + } + }, + "node_modules/compute-scroll-into-view": { + "version": "3.1.0", + "license": "MIT" + }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "license": "MIT", + "optional": true + }, + "node_modules/console-control-strings": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/console-control-strings/-/console-control-strings-1.1.0.tgz", + "integrity": "sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==", + "license": "ISC", + "optional": true + }, + "node_modules/convert-source-map": { + "version": "1.9.0", + "license": "MIT" + }, + "node_modules/copy-to-clipboard": { + "version": "3.3.3", + "license": "MIT", + "dependencies": { + "toggle-selection": "^1.0.6" + } + }, + "node_modules/cosmiconfig": { + "version": "7.1.0", + "license": "MIT", + "dependencies": { + "@types/parse-json": "^4.0.0", + "import-fresh": "^3.2.1", + "parse-json": "^5.0.0", + "path-type": "^4.0.0", + "yaml": "^1.10.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/cosmiconfig/node_modules/yaml": { + "version": "1.10.3", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-1.10.3.tgz", + "integrity": "sha512-vIYeF1u3CjlhAFekPPAk2h/Kv4T3mAkMox5OymRiJQB0spDP10LHvt+K7G9Ny6NuuMAb25/6n1qyUjAcGNf/AA==", + "license": "ISC", + "engines": { + "node": ">= 6" + } + }, + "node_modules/cron-validator": { + "version": "1.3.1", + "license": "MIT" + }, + "node_modules/cronstrue": { + "version": "2.48.0", + "license": "MIT", + "bin": { + "cronstrue": "bin/cli.js" + } + }, + "node_modules/css-tree": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/css-tree/-/css-tree-3.2.1.tgz", + "integrity": "sha512-X7sjQzceUhu1u7Y/ylrRZFU2FS6LRiFVp6rKLPg23y3x3c3DOKAwuXGDp+PAGjh6CSnCjYeAul8pcT8bAl+lSA==", + "dev": true, + "license": "MIT", + "dependencies": { + "mdn-data": "2.27.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0" + } + }, + "node_modules/css.escape": { + "version": "1.5.1", + "license": "MIT" + }, + "node_modules/cssstyle": { + "version": "5.3.7", + "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-5.3.7.tgz", + "integrity": "sha512-7D2EPVltRrsTkhpQmksIu+LxeWAIEk6wRDMJ1qljlv+CKHJM+cJLlfhWIzNA44eAsHXSNe3+vO6DW1yCYx8SuQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@asamuzakjp/css-color": "^4.1.1", + "@csstools/css-syntax-patches-for-csstree": "^1.0.21", + "css-tree": "^3.1.0", + "lru-cache": "^11.2.4" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/cssstyle/node_modules/lru-cache": { + "version": "11.2.7", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.7.tgz", + "integrity": "sha512-aY/R+aEsRelme17KGQa/1ZSIpLpNYYrhcrepKTZgE+W3WM16YMCaPwOHLHsmopZHELU0Ojin1lPVxKR0MihncA==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/csstype": { + "version": "3.1.3", + "license": "MIT" + }, + "node_modules/d3-array": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz", + "integrity": "sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg==", + "license": "ISC", + "dependencies": { + "internmap": "1 - 2" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-color": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz", + "integrity": "sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-ease": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/d3-ease/-/d3-ease-3.0.1.tgz", + "integrity": "sha512-wR/XK3D3XcLIZwpbvQwQ5fK+8Ykds1ip7A2Txe0yxncXSdq1L9skcG7blcedkOX+ZcgxGAmLX1FrRGbADwzi0w==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-format": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz", + "integrity": "sha512-AJDdYOdnyRDV5b6ArilzCPPwc1ejkHcoyFarqlPqT7zRYjhavcT3uSrqcMvsgh2CgoPbK3RCwyHaVyxYcP2Arg==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-interpolate": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz", + "integrity": "sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g==", + "license": "ISC", + "dependencies": { + "d3-color": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-path": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/d3-path/-/d3-path-3.1.0.tgz", + "integrity": "sha512-p3KP5HCf/bvjBSSKuXid6Zqijx7wIfNW+J/maPs+iwR35at5JCbLUT0LzF1cnjbCHWhqzQTIN2Jpe8pRebIEFQ==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-scale": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz", + "integrity": "sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ==", + "license": "ISC", + "dependencies": { + "d3-array": "2.10.0 - 3", + "d3-format": "1 - 3", + "d3-interpolate": "1.2.0 - 3", + "d3-time": "2.1.1 - 3", + "d3-time-format": "2 - 4" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-shape": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/d3-shape/-/d3-shape-3.2.0.tgz", + "integrity": "sha512-SaLBuwGm3MOViRq2ABk3eLoxwZELpH6zhl3FbAoJ7Vm1gofKx6El1Ib5z23NUEhF9AsGl7y+dzLe5Cw2AArGTA==", + "license": "ISC", + "dependencies": { + "d3-path": "^3.1.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-time": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/d3-time/-/d3-time-3.1.0.tgz", + "integrity": "sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q==", + "license": "ISC", + "dependencies": { + "d3-array": "2 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-time-format": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/d3-time-format/-/d3-time-format-4.1.0.tgz", + "integrity": "sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg==", + "license": "ISC", + "dependencies": { + "d3-time": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-timer": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/d3-timer/-/d3-timer-3.0.1.tgz", + "integrity": "sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/data-urls": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-6.0.1.tgz", + "integrity": "sha512-euIQENZg6x8mj3fO6o9+fOW8MimUI4PpD/fZBhJfeioZVy9TUpM4UY7KjQNVZFlqwJ0UdzRDzkycB997HEq1BQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "whatwg-mimetype": "^5.0.0", + "whatwg-url": "^15.1.0" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/data-urls/node_modules/whatwg-mimetype": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-5.0.0.tgz", + "integrity": "sha512-sXcNcHOC51uPGF0P/D4NVtrkjSU2fNsm9iog4ZvZJsL3rjoDAzXZhkm2MWt1y+PUdggKAYVoMAIYcs78wJ51Cw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=20" + } + }, + "node_modules/date-fns": { + "version": "4.1.0", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/kossnocorp" + } + }, + "node_modules/dayjs": { + "version": "1.11.10", + "license": "MIT" + }, + "node_modules/debug": { + "version": "4.3.4", + "license": "MIT", + "dependencies": { + "ms": "2.1.2" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/decimal.js": { + "version": "10.6.0", + "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.6.0.tgz", + "integrity": "sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg==", + "dev": true, + "license": "MIT" + }, + "node_modules/decimal.js-light": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/decimal.js-light/-/decimal.js-light-2.5.1.tgz", + "integrity": "sha512-qIMFpTMZmny+MMIitAB6D7iVPEorVw6YQRWkvarTkT4tBeSLLiHzcwj6q0MmYSFCiVpiqPJTJEYIrpcPzVEIvg==", + "license": "MIT" + }, + "node_modules/decode-named-character-reference": { + "version": "1.0.2", + "license": "MIT", + "dependencies": { + "character-entities": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/decompress-response": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", + "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "mimic-response": "^3.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/deeks": { + "version": "3.1.0", + "license": "MIT", + "engines": { + "node": ">= 16" + } + }, + "node_modules/deep-eql": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-5.0.2.tgz", + "integrity": "sha512-h5k/5U50IJJFpzfL6nO9jaaumfjO/f2NjK/oYB2Djzm4p9L+3T9qWpZqZ2hAbLPuuYq9wrU08WQyBTL5GbPk5Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/deep-equal": { + "version": "2.2.1", + "license": "MIT", + "dependencies": { + "array-buffer-byte-length": "^1.0.0", + "call-bind": "^1.0.2", + "es-get-iterator": "^1.1.3", + "get-intrinsic": "^1.2.0", + "is-arguments": "^1.1.1", + "is-array-buffer": "^3.0.2", + "is-date-object": "^1.0.5", + "is-regex": "^1.1.4", + "is-shared-array-buffer": "^1.0.2", + "isarray": "^2.0.5", + "object-is": "^1.1.5", + "object-keys": "^1.1.1", + "object.assign": "^4.1.4", + "regexp.prototype.flags": "^1.5.0", + "side-channel": "^1.0.4", + "which-boxed-primitive": "^1.0.2", + "which-collection": "^1.0.1", + "which-typed-array": "^1.1.9" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/deep-extend": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", + "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/define-properties": { + "version": "1.2.0", + "license": "MIT", + "dependencies": { + "has-property-descriptors": "^1.0.0", + "object-keys": "^1.1.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/delegates": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delegates/-/delegates-1.0.0.tgz", + "integrity": "sha512-bd2L678uiWATM6m5Z1VzNCErI3jiGzt6HGY8OVICs40JQq/HALfbyNJmp0UDakEY4pMMaN0Ly5om/B1VI/+xfQ==", + "license": "MIT", + "optional": true + }, + "node_modules/dequal": { + "version": "2.0.3", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/detect-libc": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", + "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", + "license": "Apache-2.0", + "optional": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/diff": { + "version": "5.1.0", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/diff-sequences": { + "version": "29.4.3", + "license": "MIT", + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/dnd-core": { + "version": "16.0.1", + "license": "MIT", + "dependencies": { + "@react-dnd/asap": "^5.0.1", + "@react-dnd/invariant": "^4.0.1", + "redux": "^4.2.0" + } + }, + "node_modules/doc-path": { + "version": "4.1.1", + "license": "MIT", + "engines": { + "node": ">=16" + } + }, + "node_modules/dom-accessibility-api": { + "version": "0.5.16", + "license": "MIT" + }, + "node_modules/dom-align": { + "version": "1.12.4", + "license": "MIT" + }, + "node_modules/dot-case": { + "version": "3.0.4", + "dev": true, + "license": "MIT", + "dependencies": { + "no-case": "^3.0.4", + "tslib": "^2.0.3" + } + }, + "node_modules/electron-to-chromium": { + "version": "1.5.325", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.325.tgz", + "integrity": "sha512-PwfIw7WQSt3xX7yOf5OE/unLzsK9CaN2f/FvV3WjPR1Knoc1T9vePRVV4W1EM301JzzysK51K7FNKcusCr0zYA==", + "dev": true, + "license": "ISC" + }, + "node_modules/emoji-picker-react": { + "version": "4.8.0", + "license": "MIT", + "dependencies": { + "flairup": "0.0.38" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "react": ">=16" + } + }, + "node_modules/emoji-regex": { + "version": "10.3.0", + "license": "MIT" + }, + "node_modules/end-of-stream": { + "version": "1.4.5", + "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", + "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "once": "^1.4.0" + } + }, + "node_modules/engine.io-client": { + "version": "6.5.2", + "license": "MIT", + "dependencies": { + "@socket.io/component-emitter": "~3.1.0", + "debug": "~4.3.1", + "engine.io-parser": "~5.2.1", + "ws": "~8.11.0", + "xmlhttprequest-ssl": "~2.0.0" + } + }, + "node_modules/engine.io-client/node_modules/ws": { + "version": "8.11.0", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": "^5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, + "node_modules/engine.io-parser": { + "version": "5.2.1", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/entities": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz", + "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/error-ex": { + "version": "1.3.2", + "license": "MIT", + "dependencies": { + "is-arrayish": "^0.2.1" + } + }, + "node_modules/es-get-iterator": { + "version": "1.1.3", + "license": "MIT", + "dependencies": { + "call-bind": "^1.0.2", + "get-intrinsic": "^1.1.3", + "has-symbols": "^1.0.3", + "is-arguments": "^1.1.1", + "is-map": "^2.0.2", + "is-set": "^2.0.2", + "is-string": "^1.0.7", + "isarray": "^2.0.5", + "stop-iteration-iterator": "^1.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/es-module-lexer": { + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.7.0.tgz", + "integrity": "sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==", + "dev": true, + "license": "MIT" + }, + "node_modules/es-toolkit": { + "version": "1.45.1", + "resolved": "https://registry.npmjs.org/es-toolkit/-/es-toolkit-1.45.1.tgz", + "integrity": "sha512-/jhoOj/Fx+A+IIyDNOvO3TItGmlMKhtX8ISAHKE90c4b/k1tqaqEZ+uUqfpU8DMnW5cgNJv606zS55jGvza0Xw==", + "license": "MIT", + "workspaces": [ + "docs", + "benchmarks" + ] + }, + "node_modules/esbuild": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.4.tgz", + "integrity": "sha512-Rq4vbHnYkK5fws5NF7MYTU68FPRE1ajX7heQ/8QXXWqNgqqJ/GkmmyxIzUnf2Sr/bakf8l54716CcMGHYhMrrQ==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.27.4", + "@esbuild/android-arm": "0.27.4", + "@esbuild/android-arm64": "0.27.4", + "@esbuild/android-x64": "0.27.4", + "@esbuild/darwin-arm64": "0.27.4", + "@esbuild/darwin-x64": "0.27.4", + "@esbuild/freebsd-arm64": "0.27.4", + "@esbuild/freebsd-x64": "0.27.4", + "@esbuild/linux-arm": "0.27.4", + "@esbuild/linux-arm64": "0.27.4", + "@esbuild/linux-ia32": "0.27.4", + "@esbuild/linux-loong64": "0.27.4", + "@esbuild/linux-mips64el": "0.27.4", + "@esbuild/linux-ppc64": "0.27.4", + "@esbuild/linux-riscv64": "0.27.4", + "@esbuild/linux-s390x": "0.27.4", + "@esbuild/linux-x64": "0.27.4", + "@esbuild/netbsd-arm64": "0.27.4", + "@esbuild/netbsd-x64": "0.27.4", + "@esbuild/openbsd-arm64": "0.27.4", + "@esbuild/openbsd-x64": "0.27.4", + "@esbuild/openharmony-arm64": "0.27.4", + "@esbuild/sunos-x64": "0.27.4", + "@esbuild/win32-arm64": "0.27.4", + "@esbuild/win32-ia32": "0.27.4", + "@esbuild/win32-x64": "0.27.4" + } + }, + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/escape-string-regexp": { + "version": "4.0.0", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/eventemitter3": { + "version": "4.0.7", + "license": "MIT" + }, + "node_modules/expand-template": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", + "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==", + "dev": true, + "license": "(MIT OR WTFPL)", + "optional": true, + "peer": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/expect": { + "version": "29.5.0", + "license": "MIT", + "dependencies": { + "@jest/expect-utils": "^29.5.0", + "jest-get-type": "^29.4.3", + "jest-matcher-utils": "^29.5.0", + "jest-message-util": "^29.5.0", + "jest-util": "^29.5.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/expect-type": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.3.0.tgz", + "integrity": "sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/extend": { + "version": "3.0.2", + "license": "MIT" + }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "license": "MIT" + }, + "node_modules/fault": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/fault/-/fault-1.0.4.tgz", + "integrity": "sha512-CJ0HCB5tL5fYTEA7ToAq5+kTwd++Borf1/bifxd9iT70QcXr4MRrO3Llf8Ifs70q+SJcGHFtnIE/Nw6giCtECA==", + "license": "MIT", + "dependencies": { + "format": "^0.2.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/fflate": { + "version": "0.4.8", + "license": "MIT" + }, + "node_modules/file-saver": { + "version": "2.0.5", + "license": "MIT" + }, + "node_modules/fill-range": { + "version": "7.0.1", + "license": "MIT", + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/find-root": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/find-root/-/find-root-1.1.0.tgz", + "integrity": "sha512-NKfW6bec6GfKc0SGx1e07QZY9PE99u0Bft/0rzSD5k3sO/vwkVUpDUKVm5Gpp5Ue3YfShPFTX2070tDs5kB9Ng==", + "license": "MIT" + }, + "node_modules/flairup": { + "version": "0.0.38", + "license": "MIT" + }, + "node_modules/follow-redirects": { + "version": "1.15.2", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/for-each": { + "version": "0.3.3", + "license": "MIT", + "dependencies": { + "is-callable": "^1.1.3" + } + }, + "node_modules/form-data": { + "version": "4.0.0", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/format": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/format/-/format-0.2.2.tgz", + "integrity": "sha512-wzsgA6WOq+09wrU1tsJ09udeR/YZRaeArL9e1wPbFg3GG2yDnC2ldKpxs4xunpFF9DgqCqOIra3bc1HWrJ37Ww==", + "engines": { + "node": ">=0.4.x" + } + }, + "node_modules/framer-motion": { + "version": "11.2.10", + "license": "MIT", + "dependencies": { + "tslib": "^2.4.0" + }, + "peerDependencies": { + "@emotion/is-prop-valid": "*", + "react": "^18.0.0", + "react-dom": "^18.0.0" + }, + "peerDependenciesMeta": { + "@emotion/is-prop-valid": { + "optional": true + }, + "react": { + "optional": true + }, + "react-dom": { + "optional": true + } + } + }, + "node_modules/fs-constants": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", + "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true + }, + "node_modules/fs-minipass": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fs-minipass/-/fs-minipass-2.1.0.tgz", + "integrity": "sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==", + "license": "ISC", + "optional": true, + "dependencies": { + "minipass": "^3.0.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/fs-minipass/node_modules/minipass": { + "version": "3.3.6", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", + "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", + "license": "ISC", + "optional": true, + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/fs-minipass/node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "license": "ISC", + "optional": true + }, + "node_modules/fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "license": "ISC", + "optional": true + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/function-bind": { + "version": "1.1.1", + "license": "MIT" + }, + "node_modules/functions-have-names": { + "version": "1.2.3", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/gauge": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/gauge/-/gauge-3.0.2.tgz", + "integrity": "sha512-+5J6MS/5XksCuXq++uFRsnUd7Ovu1XenbeuIuNRJxYWjgQbPuFhT14lAvsWfqfAmnwluf1OwMjz39HjfLPci0Q==", + "deprecated": "This package is no longer supported.", + "license": "ISC", + "optional": true, + "dependencies": { + "aproba": "^1.0.3 || ^2.0.0", + "color-support": "^1.1.2", + "console-control-strings": "^1.0.0", + "has-unicode": "^2.0.1", + "object-assign": "^4.1.1", + "signal-exit": "^3.0.0", + "string-width": "^4.2.3", + "strip-ansi": "^6.0.1", + "wide-align": "^1.1.2" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/gensync": { + "version": "1.0.0-beta.2", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/get-intrinsic": { + "version": "1.2.1", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.1", + "has": "^1.0.3", + "has-proto": "^1.0.1", + "has-symbols": "^1.0.3" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/github-from-package": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", + "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true + }, + "node_modules/glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", + "license": "ISC", + "optional": true, + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/gopd": { + "version": "1.0.1", + "license": "MIT", + "dependencies": { + "get-intrinsic": "^1.1.3" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "license": "ISC" + }, + "node_modules/handlebars": { + "version": "4.7.8", + "license": "MIT", + "dependencies": { + "minimist": "^1.2.5", + "neo-async": "^2.6.2", + "source-map": "^0.6.1", + "wordwrap": "^1.0.0" + }, + "bin": { + "handlebars": "bin/handlebars" + }, + "engines": { + "node": ">=0.4.7" + }, + "optionalDependencies": { + "uglify-js": "^3.1.4" + } + }, + "node_modules/handlebars/node_modules/source-map": { + "version": "0.6.1", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/happy-dom": { + "version": "20.8.8", + "resolved": "https://registry.npmjs.org/happy-dom/-/happy-dom-20.8.8.tgz", + "integrity": "sha512-5/F8wxkNxYtsN0bXfMwIyNLZ9WYsoOYPbmoluqVJqv8KBUbcyKZawJ7uYK4WTX8IHBLYv+VXIwfeNDPy1oKMwQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": ">=20.0.0", + "@types/whatwg-mimetype": "^3.0.2", + "@types/ws": "^8.18.1", + "entities": "^7.0.1", + "whatwg-mimetype": "^3.0.0", + "ws": "^8.18.3" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/happy-dom/node_modules/entities": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz", + "integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/happy-dom/node_modules/whatwg-mimetype": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-3.0.0.tgz", + "integrity": "sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + } + }, + "node_modules/has": { + "version": "1.0.3", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.1" + }, + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/has-bigints": { + "version": "1.0.2", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-flag": { + "version": "4.0.0", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/has-property-descriptors": { + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "get-intrinsic": "^1.1.1" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-proto": { + "version": "1.0.1", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.0.3", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-unicode": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz", + "integrity": "sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==", + "license": "ISC", + "optional": true + }, + "node_modules/hast-util-parse-selector": { + "version": "2.2.5", + "resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-2.2.5.tgz", + "integrity": "sha512-7j6mrk/qqkSehsM92wQjdIgWM2/BW61u/53G6xmC8i1OmEdKLHbk419QKQUjz6LglWsfqoiHmyMRkP1BGjecNQ==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-whitespace": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-2.0.1.tgz", + "integrity": "sha512-nAxA0v8+vXSBDt3AnRUNjyRIQ0rD+ntpbAp4LnPkumc5M9yUbSMa4XDU9Q6etY4f1Wp4bNgvc1yjiZtsTTrSng==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hastscript": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/hastscript/-/hastscript-6.0.0.tgz", + "integrity": "sha512-nDM6bvd7lIqDUiYEiu5Sl/+6ReP0BMk/2f4U/Rooccxkj0P5nm+acM5PrGJ/t5I8qPGiqZSE6hVAwZEdZIvP4w==", + "license": "MIT", + "dependencies": { + "@types/hast": "^2.0.0", + "comma-separated-tokens": "^1.0.0", + "hast-util-parse-selector": "^2.0.0", + "property-information": "^5.0.0", + "space-separated-tokens": "^1.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hastscript/node_modules/comma-separated-tokens": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-1.0.8.tgz", + "integrity": "sha512-GHuDRO12Sypu2cV70d1dkA2EUmXHgntrzbpvOB+Qy+49ypNfGgFQIC2fhhXbnyrJRynDCAARsT7Ou0M6hirpfw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/hastscript/node_modules/property-information": { + "version": "5.6.0", + "resolved": "https://registry.npmjs.org/property-information/-/property-information-5.6.0.tgz", + "integrity": "sha512-YUHSPk+A30YPv+0Qf8i9Mbfe/C0hdPXk1s1jPVToV8pk8BQtpw10ct89Eo7OWkutrwqvT0eicAxlOg3dOAu8JA==", + "license": "MIT", + "dependencies": { + "xtend": "^4.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/hastscript/node_modules/space-separated-tokens": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/space-separated-tokens/-/space-separated-tokens-1.1.5.tgz", + "integrity": "sha512-q/JSVd1Lptzhf5bkYm4ob4iWPjx0KiRe3sRFBNrVqbJkFaBm5vbbowy1mymoPNLRa52+oadOhJ+K49wsSeSjTA==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/highlight.js": { + "version": "10.7.3", + "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-10.7.3.tgz", + "integrity": "sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A==", + "license": "BSD-3-Clause", + "engines": { + "node": "*" + } + }, + "node_modules/highlightjs-vue": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/highlightjs-vue/-/highlightjs-vue-1.0.0.tgz", + "integrity": "sha512-PDEfEF102G23vHmPhLyPboFCD+BkMGu+GuJe2d9/eH4FsCwvgBpnc9n0pGE+ffKdph38s6foEZiEjdgHdzp+IA==", + "license": "CC0-1.0" + }, + "node_modules/hoist-non-react-statics": { + "version": "3.3.2", + "license": "BSD-3-Clause", + "dependencies": { + "react-is": "^16.7.0" + } + }, + "node_modules/hoist-non-react-statics/node_modules/react-is": { + "version": "16.13.1", + "license": "MIT" + }, + "node_modules/html-encoding-sniffer": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-4.0.0.tgz", + "integrity": "sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "whatwg-encoding": "^3.1.1" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/http-proxy": { + "version": "1.18.1", + "license": "MIT", + "dependencies": { + "eventemitter3": "^4.0.0", + "follow-redirects": "^1.0.0", + "requires-port": "^1.0.0" + }, + "engines": { + "node": ">=8.0.0" + } + }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/http-proxy-middleware": { + "version": "2.0.6", + "license": "MIT", + "dependencies": { + "@types/http-proxy": "^1.17.8", + "http-proxy": "^1.18.1", + "is-glob": "^4.0.1", + "is-plain-obj": "^3.0.0", + "micromatch": "^4.0.2" + }, + "engines": { + "node": ">=12.0.0" + }, + "peerDependencies": { + "@types/express": "^4.17.13" + }, + "peerDependenciesMeta": { + "@types/express": { + "optional": true + } + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/i18next": { + "version": "23.16.8", + "funding": [ + { + "type": "individual", + "url": "https://locize.com" + }, + { + "type": "individual", + "url": "https://locize.com/i18next.html" + }, + { + "type": "individual", + "url": "https://www.i18next.com/how-to/faq#i18next-is-awesome.-how-can-i-support-the-project" + } + ], + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.23.2" + } + }, + "node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "dev": true, + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/ieee754": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "BSD-3-Clause", + "optional": true, + "peer": true + }, + "node_modules/immer": { + "version": "9.0.21", + "license": "MIT", + "optional": true, + "peer": true, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/immer" + } + }, + "node_modules/immutable": { + "version": "4.3.7", + "license": "MIT" + }, + "node_modules/import-fresh": { + "version": "3.3.0", + "license": "MIT", + "dependencies": { + "parent-module": "^1.0.0", + "resolve-from": "^4.0.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/indent-string": { + "version": "4.0.0", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.", + "license": "ISC", + "optional": true, + "dependencies": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC", + "optional": true + }, + "node_modules/ini": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", + "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==", + "dev": true, + "license": "ISC", + "optional": true, + "peer": true + }, + "node_modules/inline-style-parser": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/inline-style-parser/-/inline-style-parser-0.1.1.tgz", + "integrity": "sha512-7NXolsK4CAS5+xvdj5OMMbI962hU/wvwoxk+LWR9Ek9bVtyuuYScDN6eS0rUm6TxApFpw7CX1o4uJzcd4AyD3Q==", + "license": "MIT" + }, + "node_modules/internal-slot": { + "version": "1.0.5", + "license": "MIT", + "dependencies": { + "get-intrinsic": "^1.2.0", + "has": "^1.0.3", + "side-channel": "^1.0.4" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/internmap": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/internmap/-/internmap-2.0.3.tgz", + "integrity": "sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/invariant": { + "version": "2.2.4", + "license": "MIT", + "dependencies": { + "loose-envify": "^1.0.0" + } + }, + "node_modules/is-alphabetical": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-alphabetical/-/is-alphabetical-1.0.4.tgz", + "integrity": "sha512-DwzsA04LQ10FHTZuL0/grVDk4rFoVH1pjAToYwBrHSxcrBIGQuXrQMtD5U1b0U2XVgKZCTLLP8u2Qxqhy3l2Vg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/is-alphanumerical": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-alphanumerical/-/is-alphanumerical-1.0.4.tgz", + "integrity": "sha512-UzoZUr+XfVz3t3v4KyGEniVL9BDRoQtY7tOyrRybkVNjDFWyo1yhXNGrrBTQxp3ib9BLAWs7k2YKBQsFRkZG9A==", + "license": "MIT", + "dependencies": { + "is-alphabetical": "^1.0.0", + "is-decimal": "^1.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/is-arguments": { + "version": "1.1.1", + "license": "MIT", + "dependencies": { + "call-bind": "^1.0.2", + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-array-buffer": { + "version": "3.0.2", + "license": "MIT", + "dependencies": { + "call-bind": "^1.0.2", + "get-intrinsic": "^1.2.0", + "is-typed-array": "^1.1.10" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-arrayish": { + "version": "0.2.1", + "license": "MIT" + }, + "node_modules/is-bigint": { + "version": "1.0.4", + "license": "MIT", + "dependencies": { + "has-bigints": "^1.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-boolean-object": { + "version": "1.1.2", + "license": "MIT", + "dependencies": { + "call-bind": "^1.0.2", + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-buffer": { + "version": "2.0.5", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/is-callable": { + "version": "1.2.7", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-core-module": { + "version": "2.12.1", + "license": "MIT", + "dependencies": { + "has": "^1.0.3" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-date-object": { + "version": "1.0.5", + "license": "MIT", + "dependencies": { + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-decimal": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-decimal/-/is-decimal-1.0.4.tgz", + "integrity": "sha512-RGdriMmQQvZ2aqaQq3awNA6dCGtKpiDFcOzrTWrDAT2MiWrKQVPmxLGHl7Y2nNu6led0kEyoX0enY0qXYsv9zw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/is-extglob": { + "version": "2.1.1", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/is-glob": { + "version": "4.0.3", + "license": "MIT", + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-hexadecimal": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-hexadecimal/-/is-hexadecimal-1.0.4.tgz", + "integrity": "sha512-gyPJuv83bHMpocVYoqof5VDiZveEoGoFL8m3BXNb2VW8Xs+rz9kqO8LOQ5DH6EsuvilT1ApazU0pyl+ytbPtlw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/is-map": { + "version": "2.0.2", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "license": "MIT", + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/is-number-object": { + "version": "1.0.7", + "license": "MIT", + "dependencies": { + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-plain-obj": { + "version": "3.0.0", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-potential-custom-element-name": { + "version": "1.0.1", + "dev": true, + "license": "MIT" + }, + "node_modules/is-regex": { + "version": "1.1.4", + "license": "MIT", + "dependencies": { + "call-bind": "^1.0.2", + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-set": { + "version": "2.0.2", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-shared-array-buffer": { + "version": "1.0.2", + "license": "MIT", + "dependencies": { + "call-bind": "^1.0.2" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-string": { + "version": "1.0.7", + "license": "MIT", + "dependencies": { + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-symbol": { + "version": "1.0.4", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-typed-array": { + "version": "1.1.10", + "license": "MIT", + "dependencies": { + "available-typed-arrays": "^1.0.5", + "call-bind": "^1.0.2", + "for-each": "^0.3.3", + "gopd": "^1.0.1", + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-weakmap": { + "version": "2.0.1", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-weakset": { + "version": "2.0.2", + "license": "MIT", + "dependencies": { + "call-bind": "^1.0.2", + "get-intrinsic": "^1.1.1" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/isarray": { + "version": "2.0.5", + "license": "MIT" + }, + "node_modules/jest-diff": { + "version": "29.5.0", + "license": "MIT", + "dependencies": { + "chalk": "^4.0.0", + "diff-sequences": "^29.4.3", + "jest-get-type": "^29.4.3", + "pretty-format": "^29.5.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-diff/node_modules/ansi-styles": { + "version": "5.2.0", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/jest-diff/node_modules/pretty-format": { + "version": "29.5.0", + "license": "MIT", + "dependencies": { + "@jest/schemas": "^29.4.3", + "ansi-styles": "^5.0.0", + "react-is": "^18.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-get-type": { + "version": "29.4.3", + "license": "MIT", + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-matcher-utils": { + "version": "29.5.0", + "license": "MIT", + "dependencies": { + "chalk": "^4.0.0", + "jest-diff": "^29.5.0", + "jest-get-type": "^29.4.3", + "pretty-format": "^29.5.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-matcher-utils/node_modules/ansi-styles": { + "version": "5.2.0", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/jest-matcher-utils/node_modules/pretty-format": { + "version": "29.5.0", + "license": "MIT", + "dependencies": { + "@jest/schemas": "^29.4.3", + "ansi-styles": "^5.0.0", + "react-is": "^18.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-message-util": { + "version": "29.5.0", + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.12.13", + "@jest/types": "^29.5.0", + "@types/stack-utils": "^2.0.0", + "chalk": "^4.0.0", + "graceful-fs": "^4.2.9", + "micromatch": "^4.0.4", + "pretty-format": "^29.5.0", + "slash": "^3.0.0", + "stack-utils": "^2.0.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-message-util/node_modules/ansi-styles": { + "version": "5.2.0", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/jest-message-util/node_modules/pretty-format": { + "version": "29.5.0", + "license": "MIT", + "dependencies": { + "@jest/schemas": "^29.4.3", + "ansi-styles": "^5.0.0", + "react-is": "^18.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-util": { + "version": "29.5.0", + "license": "MIT", + "dependencies": { + "@jest/types": "^29.5.0", + "@types/node": "*", + "chalk": "^4.0.0", + "ci-info": "^3.2.0", + "graceful-fs": "^4.2.9", + "picomatch": "^2.2.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jiti": { + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz", + "integrity": "sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "bin": { + "jiti": "lib/jiti-cli.mjs" + } + }, + "node_modules/js-cookie": { + "version": "3.0.5", + "license": "MIT", + "engines": { + "node": ">=14" + } + }, + "node_modules/js-tokens": { + "version": "4.0.0", + "license": "MIT" + }, + "node_modules/js-yaml": { + "version": "4.1.0", + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/jsdom": { + "version": "27.0.1", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-27.0.1.tgz", + "integrity": "sha512-SNSQteBL1IlV2zqhwwolaG9CwhIhTvVHWg3kTss/cLE7H/X4644mtPQqYvCfsSrGQWt9hSZcgOXX8bOZaMN+kA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@asamuzakjp/dom-selector": "^6.7.2", + "cssstyle": "^5.3.1", + "data-urls": "^6.0.0", + "decimal.js": "^10.6.0", + "html-encoding-sniffer": "^4.0.0", + "http-proxy-agent": "^7.0.2", + "https-proxy-agent": "^7.0.6", + "is-potential-custom-element-name": "^1.0.1", + "parse5": "^8.0.0", + "rrweb-cssom": "^0.8.0", + "saxes": "^6.0.0", + "symbol-tree": "^3.2.4", + "tough-cookie": "^6.0.0", + "w3c-xmlserializer": "^5.0.0", + "webidl-conversions": "^8.0.0", + "whatwg-encoding": "^3.1.1", + "whatwg-mimetype": "^4.0.0", + "whatwg-url": "^15.1.0", + "ws": "^8.18.3", + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": ">=20" + }, + "peerDependencies": { + "canvas": "^3.0.0" + }, + "peerDependenciesMeta": { + "canvas": { + "optional": true + } + } + }, + "node_modules/jsesc": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz", + "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==", + "license": "MIT", + "bin": { + "jsesc": "bin/jsesc" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/json-2-csv": { + "version": "5.5.4", + "license": "MIT", + "dependencies": { + "deeks": "3.1.0", + "doc-path": "4.1.1" + }, + "engines": { + "node": ">= 16" + } + }, + "node_modules/json-logic-js": { + "version": "2.0.5", + "license": "MIT" + }, + "node_modules/json-parse-even-better-errors": { + "version": "2.3.1", + "license": "MIT" + }, + "node_modules/json-schema-compare": { + "version": "0.2.2", + "license": "MIT", + "dependencies": { + "lodash": "^4.17.4" + } + }, + "node_modules/json-schema-merge-allof": { + "version": "0.8.1", + "license": "MIT", + "dependencies": { + "compute-lcm": "^1.1.2", + "json-schema-compare": "^0.2.2", + "lodash": "^4.17.20" + }, + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/json-schema-traverse": { + "version": "1.0.0", + "license": "MIT" + }, + "node_modules/json2mq": { + "version": "0.2.0", + "license": "MIT", + "dependencies": { + "string-convert": "^0.2.0" + } + }, + "node_modules/json5": { + "version": "2.2.3", + "dev": true, + "license": "MIT", + "bin": { + "json5": "lib/cli.js" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/jsonpointer": { + "version": "5.0.1", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/lines-and-columns": { + "version": "1.2.4", + "license": "MIT" + }, + "node_modules/lodash": { + "version": "4.17.21", + "license": "MIT" + }, + "node_modules/lodash-es": { + "version": "4.17.21", + "license": "MIT" + }, + "node_modules/longest-streak": { + "version": "3.1.0", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/loose-envify": { + "version": "1.4.0", + "license": "MIT", + "dependencies": { + "js-tokens": "^3.0.0 || ^4.0.0" + }, + "bin": { + "loose-envify": "cli.js" + } + }, + "node_modules/loupe": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/loupe/-/loupe-3.2.1.tgz", + "integrity": "sha512-CdzqowRJCeLU72bHvWqwRBBlLcMEtIvGrlvef74kMnV2AolS9Y8xUv1I0U/MNAWMhBlKIoyuEgoJ0t/bbwHbLQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/lower-case": { + "version": "2.0.2", + "dev": true, + "license": "MIT", + "dependencies": { + "tslib": "^2.0.3" + } + }, + "node_modules/lowlight": { + "version": "1.20.0", + "resolved": "https://registry.npmjs.org/lowlight/-/lowlight-1.20.0.tgz", + "integrity": "sha512-8Ktj+prEb1RoCPkEOrPMYUN/nCggB7qAWe3a7OpMjWQkh3l2RD5wKRQ+o8Q8YuI9RG/xs95waaI/E6ym/7NsTw==", + "license": "MIT", + "dependencies": { + "fault": "^1.0.0", + "highlight.js": "~10.7.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/lru-cache": { + "version": "5.1.1", + "dev": true, + "license": "ISC", + "dependencies": { + "yallist": "^3.0.2" + } + }, + "node_modules/lz-string": { + "version": "1.5.0", + "license": "MIT", + "bin": { + "lz-string": "bin/bin.js" + } + }, + "node_modules/make-dir": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-3.1.0.tgz", + "integrity": "sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==", + "license": "MIT", + "optional": true, + "dependencies": { + "semver": "^6.0.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/markdown-table": { + "version": "3.0.3", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/markdown-to-jsx": { + "version": "7.4.0", + "license": "MIT", + "engines": { + "node": ">= 10" + }, + "peerDependencies": { + "react": ">= 0.14.0" + } + }, + "node_modules/mdast-util-definitions": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/mdast-util-definitions/-/mdast-util-definitions-5.1.2.tgz", + "integrity": "sha512-8SVPMuHqlPME/z3gqVwWY4zVXn8lqKv/pAhC57FuJ40ImXyBpmO5ukh98zB2v7Blql2FiHjHv9LVztSIqjY+MA==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^3.0.0", + "@types/unist": "^2.0.0", + "unist-util-visit": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-find-and-replace": { + "version": "2.2.2", + "license": "MIT", + "dependencies": { + "@types/mdast": "^3.0.0", + "escape-string-regexp": "^5.0.0", + "unist-util-is": "^5.0.0", + "unist-util-visit-parents": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-find-and-replace/node_modules/escape-string-regexp": { + "version": "5.0.0", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/mdast-util-from-markdown": { + "version": "1.3.1", + "license": "MIT", + "dependencies": { + "@types/mdast": "^3.0.0", + "@types/unist": "^2.0.0", + "decode-named-character-reference": "^1.0.0", + "mdast-util-to-string": "^3.1.0", + "micromark": "^3.0.0", + "micromark-util-decode-numeric-character-reference": "^1.0.0", + "micromark-util-decode-string": "^1.0.0", + "micromark-util-normalize-identifier": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0", + "unist-util-stringify-position": "^3.0.0", + "uvu": "^0.5.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm": { + "version": "2.0.2", + "license": "MIT", + "dependencies": { + "mdast-util-from-markdown": "^1.0.0", + "mdast-util-gfm-autolink-literal": "^1.0.0", + "mdast-util-gfm-footnote": "^1.0.0", + "mdast-util-gfm-strikethrough": "^1.0.0", + "mdast-util-gfm-table": "^1.0.0", + "mdast-util-gfm-task-list-item": "^1.0.0", + "mdast-util-to-markdown": "^1.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-autolink-literal": { + "version": "1.0.3", + "license": "MIT", + "dependencies": { + "@types/mdast": "^3.0.0", + "ccount": "^2.0.0", + "mdast-util-find-and-replace": "^2.0.0", + "micromark-util-character": "^1.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-footnote": { + "version": "1.0.2", + "license": "MIT", + "dependencies": { + "@types/mdast": "^3.0.0", + "mdast-util-to-markdown": "^1.3.0", + "micromark-util-normalize-identifier": "^1.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-strikethrough": { + "version": "1.0.3", + "license": "MIT", + "dependencies": { + "@types/mdast": "^3.0.0", + "mdast-util-to-markdown": "^1.3.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-table": { + "version": "1.0.7", + "license": "MIT", + "dependencies": { + "@types/mdast": "^3.0.0", + "markdown-table": "^3.0.0", + "mdast-util-from-markdown": "^1.0.0", + "mdast-util-to-markdown": "^1.3.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-task-list-item": { + "version": "1.0.2", + "license": "MIT", + "dependencies": { + "@types/mdast": "^3.0.0", + "mdast-util-to-markdown": "^1.3.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-phrasing": { + "version": "3.0.1", + "license": "MIT", + "dependencies": { + "@types/mdast": "^3.0.0", + "unist-util-is": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-to-hast": { + "version": "12.3.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-12.3.0.tgz", + "integrity": "sha512-pits93r8PhnIoU4Vy9bjW39M2jJ6/tdHyja9rrot9uujkN7UTU9SDnE6WNJz/IGyQk3XHX6yNNtrBH6cQzm8Hw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^2.0.0", + "@types/mdast": "^3.0.0", + "mdast-util-definitions": "^5.0.0", + "micromark-util-sanitize-uri": "^1.1.0", + "trim-lines": "^3.0.0", + "unist-util-generated": "^2.0.0", + "unist-util-position": "^4.0.0", + "unist-util-visit": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-to-markdown": { + "version": "1.5.0", + "license": "MIT", + "dependencies": { + "@types/mdast": "^3.0.0", + "@types/unist": "^2.0.0", + "longest-streak": "^3.0.0", + "mdast-util-phrasing": "^3.0.0", + "mdast-util-to-string": "^3.0.0", + "micromark-util-decode-string": "^1.0.0", + "unist-util-visit": "^4.0.0", + "zwitch": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-to-string": { + "version": "3.2.0", + "license": "MIT", + "dependencies": { + "@types/mdast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdn-data": { + "version": "2.27.1", + "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.27.1.tgz", + "integrity": "sha512-9Yubnt3e8A0OKwxYSXyhLymGW4sCufcLG6VdiDdUGVkPhpqLxlvP5vl1983gQjJl3tqbrM731mjaZaP68AgosQ==", + "dev": true, + "license": "CC0-1.0" + }, + "node_modules/memoize-one": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/memoize-one/-/memoize-one-6.0.0.tgz", + "integrity": "sha512-rkpe71W0N0c0Xz6QD0eJETuWAJGnJ9afsl1srmwPrI+yBCkge5EycXXbYRyvL29zZVUWQCY7InPRCv3GDXuZNw==", + "license": "MIT" + }, + "node_modules/micromark": { + "version": "3.2.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "@types/debug": "^4.0.0", + "debug": "^4.0.0", + "decode-named-character-reference": "^1.0.0", + "micromark-core-commonmark": "^1.0.1", + "micromark-factory-space": "^1.0.0", + "micromark-util-character": "^1.0.0", + "micromark-util-chunked": "^1.0.0", + "micromark-util-combine-extensions": "^1.0.0", + "micromark-util-decode-numeric-character-reference": "^1.0.0", + "micromark-util-encode": "^1.0.0", + "micromark-util-normalize-identifier": "^1.0.0", + "micromark-util-resolve-all": "^1.0.0", + "micromark-util-sanitize-uri": "^1.0.0", + "micromark-util-subtokenize": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.1", + "uvu": "^0.5.0" + } + }, + "node_modules/micromark-core-commonmark": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "decode-named-character-reference": "^1.0.0", + "micromark-factory-destination": "^1.0.0", + "micromark-factory-label": "^1.0.0", + "micromark-factory-space": "^1.0.0", + "micromark-factory-title": "^1.0.0", + "micromark-factory-whitespace": "^1.0.0", + "micromark-util-character": "^1.0.0", + "micromark-util-chunked": "^1.0.0", + "micromark-util-classify-character": "^1.0.0", + "micromark-util-html-tag-name": "^1.0.0", + "micromark-util-normalize-identifier": "^1.0.0", + "micromark-util-resolve-all": "^1.0.0", + "micromark-util-subtokenize": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.1", + "uvu": "^0.5.0" + } + }, + "node_modules/micromark-extension-gfm": { + "version": "2.0.3", + "license": "MIT", + "dependencies": { + "micromark-extension-gfm-autolink-literal": "^1.0.0", + "micromark-extension-gfm-footnote": "^1.0.0", + "micromark-extension-gfm-strikethrough": "^1.0.0", + "micromark-extension-gfm-table": "^1.0.0", + "micromark-extension-gfm-tagfilter": "^1.0.0", + "micromark-extension-gfm-task-list-item": "^1.0.0", + "micromark-util-combine-extensions": "^1.0.0", + "micromark-util-types": "^1.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-autolink-literal": { + "version": "1.0.5", + "license": "MIT", + "dependencies": { + "micromark-util-character": "^1.0.0", + "micromark-util-sanitize-uri": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-footnote": { + "version": "1.1.2", + "license": "MIT", + "dependencies": { + "micromark-core-commonmark": "^1.0.0", + "micromark-factory-space": "^1.0.0", + "micromark-util-character": "^1.0.0", + "micromark-util-normalize-identifier": "^1.0.0", + "micromark-util-sanitize-uri": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0", + "uvu": "^0.5.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-strikethrough": { + "version": "1.0.7", + "license": "MIT", + "dependencies": { + "micromark-util-chunked": "^1.0.0", + "micromark-util-classify-character": "^1.0.0", + "micromark-util-resolve-all": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0", + "uvu": "^0.5.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-table": { + "version": "1.0.7", + "license": "MIT", + "dependencies": { + "micromark-factory-space": "^1.0.0", + "micromark-util-character": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0", + "uvu": "^0.5.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-tagfilter": { + "version": "1.0.2", + "license": "MIT", + "dependencies": { + "micromark-util-types": "^1.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-task-list-item": { + "version": "1.0.5", + "license": "MIT", + "dependencies": { + "micromark-factory-space": "^1.0.0", + "micromark-util-character": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0", + "uvu": "^0.5.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-factory-destination": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-factory-label": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0", + "uvu": "^0.5.0" + } + }, + "node_modules/micromark-factory-space": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^1.0.0", + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-factory-title": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-factory-space": "^1.0.0", + "micromark-util-character": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-factory-whitespace": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-factory-space": "^1.0.0", + "micromark-util-character": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-util-character": { + "version": "1.2.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-util-chunked": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^1.0.0" + } + }, + "node_modules/micromark-util-classify-character": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-util-combine-extensions": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-chunked": "^1.0.0", + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-util-decode-numeric-character-reference": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^1.0.0" + } + }, + "node_modules/micromark-util-decode-string": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "decode-named-character-reference": "^1.0.0", + "micromark-util-character": "^1.0.0", + "micromark-util-decode-numeric-character-reference": "^1.0.0", + "micromark-util-symbol": "^1.0.0" + } + }, + "node_modules/micromark-util-encode": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromark-util-html-tag-name": { + "version": "1.2.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromark-util-normalize-identifier": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^1.0.0" + } + }, + "node_modules/micromark-util-resolve-all": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-util-sanitize-uri": { + "version": "1.2.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^1.0.0", + "micromark-util-encode": "^1.0.0", + "micromark-util-symbol": "^1.0.0" + } + }, + "node_modules/micromark-util-subtokenize": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-chunked": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0", + "uvu": "^0.5.0" + } + }, + "node_modules/micromark-util-symbol": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromark-util-types": { + "version": "1.1.0", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromatch": { + "version": "4.0.5", + "license": "MIT", + "dependencies": { + "braces": "^3.0.2", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=8.6" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mimic-response": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", + "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/min-indent": { + "version": "1.0.1", + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/minimatch": { + "version": "3.1.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz", + "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==", + "license": "ISC", + "optional": true, + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/minimist": { + "version": "1.2.8", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/minipass": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-5.0.0.tgz", + "integrity": "sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ==", + "license": "ISC", + "optional": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/minizlib": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-2.1.2.tgz", + "integrity": "sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==", + "license": "MIT", + "optional": true, + "dependencies": { + "minipass": "^3.0.0", + "yallist": "^4.0.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/minizlib/node_modules/minipass": { + "version": "3.3.6", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", + "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", + "license": "ISC", + "optional": true, + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/minizlib/node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "license": "ISC", + "optional": true + }, + "node_modules/mkdirp": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-1.0.4.tgz", + "integrity": "sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==", + "license": "MIT", + "optional": true, + "bin": { + "mkdirp": "bin/cmd.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/mkdirp-classic": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", + "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true + }, + "node_modules/moment": { + "version": "2.30.1", + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/moment-timezone": { + "version": "0.5.45", + "license": "MIT", + "dependencies": { + "moment": "^2.29.4" + }, + "engines": { + "node": "*" + } + }, + "node_modules/monaco-editor": { + "version": "0.52.2", + "license": "MIT", + "peer": true + }, + "node_modules/mri": { + "version": "1.2.0", + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/ms": { + "version": "2.1.2", + "license": "MIT" + }, + "node_modules/nan": { + "version": "2.26.2", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.26.2.tgz", + "integrity": "sha512-0tTvBTYkt3tdGw22nrAy50x7gpbGCCFH3AFcyS5WiUu7Eu4vWlri1woE6qHBSfy11vksDqkiwjOnlR7WV8G1Hw==", + "license": "MIT", + "optional": true + }, + "node_modules/nanoid": { + "version": "3.3.11", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", + "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/napi-build-utils": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz", + "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true + }, + "node_modules/neo-async": { + "version": "2.6.2", + "license": "MIT" + }, + "node_modules/no-case": { + "version": "3.0.4", + "dev": true, + "license": "MIT", + "dependencies": { + "lower-case": "^2.0.2", + "tslib": "^2.0.3" + } + }, + "node_modules/node-abi": { + "version": "3.89.0", + "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.89.0.tgz", + "integrity": "sha512-6u9UwL0HlAl21+agMN3YAMXcKByMqwGx+pq+P76vii5f7hTPtKDp08/H9py6DY+cfDw7kQNTGEj/rly3IgbNQA==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "semver": "^7.3.5" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/node-abi/node_modules/semver": { + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "dev": true, + "license": "ISC", + "optional": true, + "peer": true, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/node-addon-api": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.1.tgz", + "integrity": "sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true + }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "optional": true, + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/node-fetch/node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT", + "optional": true + }, + "node_modules/node-fetch/node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause", + "optional": true + }, + "node_modules/node-fetch/node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "optional": true, + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "node_modules/node-releases": { + "version": "2.0.36", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.36.tgz", + "integrity": "sha512-TdC8FSgHz8Mwtw9g5L4gR/Sh9XhSP/0DEkQxfEFXOpiul5IiHgHan2VhYYb6agDSfp4KuvltmGApc8HMgUrIkA==", + "dev": true, + "license": "MIT" + }, + "node_modules/nopt": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/nopt/-/nopt-5.0.0.tgz", + "integrity": "sha512-Tbj67rffqceeLpcRXrT7vKAN8CwfPeIBgM7E6iBkmKLV7bEMwpGgYLGv0jACUsECaa/vuxP0IjEont6umdMgtQ==", + "license": "ISC", + "optional": true, + "dependencies": { + "abbrev": "1" + }, + "bin": { + "nopt": "bin/nopt.js" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/npmlog": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/npmlog/-/npmlog-5.0.1.tgz", + "integrity": "sha512-AqZtDUWOMKs1G/8lwylVjrdYgqA4d9nu8hc+0gzRxlDb1I10+FHBGMXs6aiQHFdCUUlqH99MUMuLfzWDNDtfxw==", + "deprecated": "This package is no longer supported.", + "license": "ISC", + "optional": true, + "dependencies": { + "are-we-there-yet": "^2.0.0", + "console-control-strings": "^1.1.0", + "gauge": "^3.0.0", + "set-blocking": "^2.0.0" + } + }, + "node_modules/object-assign": { + "version": "4.1.1", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/object-inspect": { + "version": "1.12.3", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/object-is": { + "version": "1.1.5", + "license": "MIT", + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/object-keys": { + "version": "1.1.1", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/object.assign": { + "version": "4.1.4", + "license": "MIT", + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.4", + "has-symbols": "^1.0.3", + "object-keys": "^1.1.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "license": "ISC", + "optional": true, + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/parent-module": { + "version": "1.0.1", + "license": "MIT", + "dependencies": { + "callsites": "^3.0.0" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/parse-entities": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/parse-entities/-/parse-entities-2.0.0.tgz", + "integrity": "sha512-kkywGpCcRYhqQIchaWqZ875wzpS/bMKhz5HnN3p7wveJTkTtyAB/AlnS0f8DFSqYW1T82t6yEAkEcB+A1I3MbQ==", + "license": "MIT", + "dependencies": { + "character-entities": "^1.0.0", + "character-entities-legacy": "^1.0.0", + "character-reference-invalid": "^1.0.0", + "is-alphanumerical": "^1.0.0", + "is-decimal": "^1.0.0", + "is-hexadecimal": "^1.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/parse-entities/node_modules/character-entities": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/character-entities/-/character-entities-1.2.4.tgz", + "integrity": "sha512-iBMyeEHxfVnIakwOuDXpVkc54HijNgCyQB2w0VfGQThle6NXn50zU6V/u+LDhxHcDUPojn6Kpga3PTAD8W1bQw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/parse-json": { + "version": "5.2.0", + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.0.0", + "error-ex": "^1.3.1", + "json-parse-even-better-errors": "^2.3.0", + "lines-and-columns": "^1.1.6" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/parse5": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-8.0.0.tgz", + "integrity": "sha512-9m4m5GSgXjL4AjumKzq1Fgfp3Z8rsvjRNbnkVwfu2ImRqE5D0LnY2QfDen18FSY9C573YU5XxSapdHZTZ2WolA==", + "dev": true, + "license": "MIT", + "dependencies": { + "entities": "^6.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/path-parse": { + "version": "1.0.7", + "license": "MIT" + }, + "node_modules/path-type": { + "version": "4.0.0", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/path2d-polyfill": { + "version": "2.0.1", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/pathe": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz", + "integrity": "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==", + "dev": true, + "license": "MIT" + }, + "node_modules/pathval": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/pathval/-/pathval-2.0.1.tgz", + "integrity": "sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 14.16" + } + }, + "node_modules/pdfjs-dist": { + "version": "3.4.120", + "license": "Apache-2.0", + "dependencies": { + "path2d-polyfill": "^2.0.1", + "web-streams-polyfill": "^3.2.1" + }, + "optionalDependencies": { + "canvas": "^2.11.0" + } + }, + "node_modules/pdfjs-dist/node_modules/canvas": { + "version": "2.11.2", + "resolved": "https://registry.npmjs.org/canvas/-/canvas-2.11.2.tgz", + "integrity": "sha512-ItanGBMrmRV7Py2Z+Xhs7cT+FNt5K0vPL4p9EZ/UX/Mu7hFbkxSjKF2KVtPwX7UYWp7dRKnrTvReflgrItJbdw==", + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@mapbox/node-pre-gyp": "^1.0.0", + "nan": "^2.17.0", + "simple-get": "^3.0.3" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/pdfjs-dist/node_modules/decompress-response": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-4.2.1.tgz", + "integrity": "sha512-jOSne2qbyE+/r8G1VU+G/82LBs2Fs4LAsTiLSHOCOMZQl2OKZ6i8i4IyHemTe+/yIXOtTcRQMzPcgyhoFlqPkw==", + "license": "MIT", + "optional": true, + "dependencies": { + "mimic-response": "^2.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/pdfjs-dist/node_modules/mimic-response": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-2.1.0.tgz", + "integrity": "sha512-wXqjST+SLt7R009ySCglWBCFpjUygmCIfD790/kVbiGmUgfYGuB14PiTd5DwVxSV4NcYHjzMkoj5LjQZwTQLEA==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/pdfjs-dist/node_modules/simple-get": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-3.1.1.tgz", + "integrity": "sha512-CQ5LTKGfCpvE1K0n2us+kuMPbk/q0EKl82s4aheV9oXjFEz6W/Y7oQFVJuU6QG77hRT4Ghb5RURteF5vnWjupA==", + "license": "MIT", + "optional": true, + "dependencies": { + "decompress-response": "^4.2.0", + "once": "^1.3.1", + "simple-concat": "^1.0.0" + } + }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "license": "ISC" + }, + "node_modules/picomatch": { + "version": "2.3.1", + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/postcss": { + "version": "8.5.8", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.8.tgz", + "integrity": "sha512-OW/rX8O/jXnm82Ey1k44pObPtdblfiuWnrd8X7GJ7emImCOstunGbXUpp7HdBrFQX6rJzn3sPT397Wp5aCwCHg==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "nanoid": "^3.3.11", + "picocolors": "^1.1.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/posthog-js": { + "version": "1.130.2", + "license": "MIT", + "dependencies": { + "fflate": "^0.4.8", + "preact": "^10.19.3" + } + }, + "node_modules/preact": { + "version": "10.20.1", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/preact" + } + }, + "node_modules/prebuild-install": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", + "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==", + "deprecated": "No longer maintained. Please contact the author of the relevant native addon; alternatives are available.", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "detect-libc": "^2.0.0", + "expand-template": "^2.0.3", + "github-from-package": "0.0.0", + "minimist": "^1.2.3", + "mkdirp-classic": "^0.5.3", + "napi-build-utils": "^2.0.0", + "node-abi": "^3.3.0", + "pump": "^3.0.0", + "rc": "^1.2.7", + "simple-get": "^4.0.0", + "tar-fs": "^2.0.0", + "tunnel-agent": "^0.6.0" + }, + "bin": { + "prebuild-install": "bin.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/pretty-format": { + "version": "27.5.1", + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1", + "ansi-styles": "^5.0.0", + "react-is": "^17.0.1" + }, + "engines": { + "node": "^10.13.0 || ^12.13.0 || ^14.15.0 || >=15.0.0" + } + }, + "node_modules/pretty-format/node_modules/ansi-styles": { + "version": "5.2.0", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/pretty-format/node_modules/react-is": { + "version": "17.0.2", + "license": "MIT" + }, + "node_modules/prismjs": { + "version": "1.30.0", + "resolved": "https://registry.npmjs.org/prismjs/-/prismjs-1.30.0.tgz", + "integrity": "sha512-DEvV2ZF2r2/63V+tK8hQvrR2ZGn10srHbXviTlcv7Kpzw8jWiNTqbVgjO3IY8RxrrOUF8VPMQQFysYYYv0YZxw==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/product-fruits": { + "version": "1.0.25", + "license": "MIT" + }, + "node_modules/prop-types": { + "version": "15.8.1", + "license": "MIT", + "dependencies": { + "loose-envify": "^1.4.0", + "object-assign": "^4.1.1", + "react-is": "^16.13.1" + } + }, + "node_modules/prop-types/node_modules/react-is": { + "version": "16.13.1", + "license": "MIT" + }, + "node_modules/property-information": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/property-information/-/property-information-6.5.0.tgz", + "integrity": "sha512-PgTgs/BlvHxOu8QuEN7wi5A0OmXaBcHpmCSTehcs6Uuu9IkDIEo13Hy7n898RHfrQ49vKCoGeWZSaAK01nwVig==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "license": "MIT" + }, + "node_modules/pump": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz", + "integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, + "node_modules/punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/qrcode.react": { + "version": "3.1.0", + "license": "ISC", + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + } + }, + "node_modules/rc": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", + "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", + "dev": true, + "license": "(BSD-2-Clause OR MIT OR Apache-2.0)", + "optional": true, + "peer": true, + "dependencies": { + "deep-extend": "^0.6.0", + "ini": "~1.3.0", + "minimist": "^1.2.0", + "strip-json-comments": "~2.0.1" + }, + "bin": { + "rc": "cli.js" + } + }, + "node_modules/rc-align": { + "version": "4.0.15", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "classnames": "2.x", + "dom-align": "^1.7.0", + "rc-util": "^5.26.0", + "resize-observer-polyfill": "^1.5.1" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-cascader": { + "version": "3.21.2", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.12.5", + "array-tree-filter": "^2.1.0", + "classnames": "^2.3.1", + "rc-select": "~14.11.0", + "rc-tree": "~5.8.1", + "rc-util": "^5.37.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-checkbox": { + "version": "3.1.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "classnames": "^2.3.2", + "rc-util": "^5.25.2" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-collapse": { + "version": "3.7.2", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "classnames": "2.x", + "rc-motion": "^2.3.4", + "rc-util": "^5.27.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-dialog": { + "version": "9.3.4", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "@rc-component/portal": "^1.0.0-8", + "classnames": "^2.2.6", + "rc-motion": "^2.3.0", + "rc-util": "^5.21.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-drawer": { + "version": "7.0.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "@rc-component/portal": "^1.1.1", + "classnames": "^2.2.6", + "rc-motion": "^2.6.1", + "rc-util": "^5.36.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-dropdown": { + "version": "4.1.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.18.3", + "@rc-component/trigger": "^1.7.0", + "classnames": "^2.2.6", + "rc-util": "^5.17.0" + }, + "peerDependencies": { + "react": ">=16.11.0", + "react-dom": ">=16.11.0" + } + }, + "node_modules/rc-field-form": { + "version": "1.41.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.18.0", + "async-validator": "^4.1.0", + "rc-util": "^5.32.2" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-image": { + "version": "7.5.1", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.11.2", + "@rc-component/portal": "^1.0.2", + "classnames": "^2.2.6", + "rc-dialog": "~9.3.4", + "rc-motion": "^2.6.2", + "rc-util": "^5.34.1" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-input": { + "version": "1.4.3", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.11.1", + "classnames": "^2.2.1", + "rc-util": "^5.18.1" + }, + "peerDependencies": { + "react": ">=16.0.0", + "react-dom": ">=16.0.0" + } + }, + "node_modules/rc-input-number": { + "version": "8.6.1", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "@rc-component/mini-decimal": "^1.0.1", + "classnames": "^2.2.5", + "rc-input": "~1.4.0", + "rc-util": "^5.28.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-mentions": { + "version": "2.10.1", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.22.5", + "@rc-component/trigger": "^1.5.0", + "classnames": "^2.2.6", + "rc-input": "~1.4.0", + "rc-menu": "~9.12.0", + "rc-textarea": "~1.6.1", + "rc-util": "^5.34.1" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-menu": { + "version": "9.12.4", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "@rc-component/trigger": "^1.17.0", + "classnames": "2.x", + "rc-motion": "^2.4.3", + "rc-overflow": "^1.3.1", + "rc-util": "^5.27.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-motion": { + "version": "2.9.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.11.1", + "classnames": "^2.2.1", + "rc-util": "^5.21.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-notification": { + "version": "5.3.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "classnames": "2.x", + "rc-motion": "^2.9.0", + "rc-util": "^5.20.1" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-overflow": { + "version": "1.3.2", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.11.1", + "classnames": "^2.2.1", + "rc-resize-observer": "^1.0.0", + "rc-util": "^5.37.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-pagination": { + "version": "4.0.4", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "classnames": "^2.3.2", + "rc-util": "^5.38.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-picker": { + "version": "2.7.6", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "classnames": "^2.2.1", + "date-fns": "2.x", + "dayjs": "1.x", + "moment": "^2.24.0", + "rc-trigger": "^5.0.4", + "rc-util": "^5.37.0", + "shallowequal": "^1.1.0" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-picker/node_modules/date-fns": { + "version": "2.30.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.21.0" + }, + "engines": { + "node": ">=0.11" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/date-fns" + } + }, + "node_modules/rc-progress": { + "version": "3.5.1", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "classnames": "^2.2.6", + "rc-util": "^5.16.1" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-rate": { + "version": "2.12.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "classnames": "^2.2.5", + "rc-util": "^5.0.1" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-resize-observer": { + "version": "1.4.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.20.7", + "classnames": "^2.2.1", + "rc-util": "^5.38.0", + "resize-observer-polyfill": "^1.5.1" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-segmented": { + "version": "2.2.2", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.11.1", + "classnames": "^2.2.1", + "rc-motion": "^2.4.4", + "rc-util": "^5.17.0" + }, + "peerDependencies": { + "react": ">=16.0.0", + "react-dom": ">=16.0.0" + } + }, + "node_modules/rc-select": { + "version": "14.11.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "@rc-component/trigger": "^1.5.0", + "classnames": "2.x", + "rc-motion": "^2.0.1", + "rc-overflow": "^1.3.1", + "rc-util": "^5.16.1", + "rc-virtual-list": "^3.5.2" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": "*", + "react-dom": "*" + } + }, + "node_modules/rc-slider": { + "version": "10.5.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "classnames": "^2.2.5", + "rc-util": "^5.27.0" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-steps": { + "version": "6.0.1", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.16.7", + "classnames": "^2.2.3", + "rc-util": "^5.16.1" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-switch": { + "version": "4.1.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.21.0", + "classnames": "^2.2.1", + "rc-util": "^5.30.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-table": { + "version": "7.37.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "@rc-component/context": "^1.4.0", + "classnames": "^2.2.5", + "rc-resize-observer": "^1.1.0", + "rc-util": "^5.37.0", + "rc-virtual-list": "^3.11.1" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-tabs": { + "version": "14.0.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.11.2", + "classnames": "2.x", + "rc-dropdown": "~4.1.0", + "rc-menu": "~9.12.0", + "rc-motion": "^2.6.2", + "rc-resize-observer": "^1.0.0", + "rc-util": "^5.34.1" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-textarea": { + "version": "1.6.3", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "classnames": "^2.2.1", + "rc-input": "~1.4.0", + "rc-resize-observer": "^1.0.0", + "rc-util": "^5.27.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-tooltip": { + "version": "6.1.3", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.11.2", + "@rc-component/trigger": "^1.18.0", + "classnames": "^2.3.1" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-tree": { + "version": "5.8.2", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "classnames": "2.x", + "rc-motion": "^2.0.1", + "rc-util": "^5.16.1", + "rc-virtual-list": "^3.5.1" + }, + "engines": { + "node": ">=10.x" + }, + "peerDependencies": { + "react": "*", + "react-dom": "*" + } + }, + "node_modules/rc-tree-select": { + "version": "5.17.0", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.10.1", + "classnames": "2.x", + "rc-select": "~14.11.0-0", + "rc-tree": "~5.8.1", + "rc-util": "^5.16.1" + }, + "peerDependencies": { + "react": "*", + "react-dom": "*" + } + }, + "node_modules/rc-trigger": { + "version": "5.3.4", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.18.3", + "classnames": "^2.2.6", + "rc-align": "^4.0.0", + "rc-motion": "^2.0.0", + "rc-util": "^5.19.2" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-upload": { + "version": "4.5.2", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.18.3", + "classnames": "^2.2.5", + "rc-util": "^5.2.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-util": { + "version": "5.44.3", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.18.3", + "react-is": "^18.2.0" + }, + "peerDependencies": { + "react": ">=16.9.0", + "react-dom": ">=16.9.0" + } + }, + "node_modules/rc-virtual-list": { + "version": "3.11.3", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.20.0", + "classnames": "^2.2.6", + "rc-resize-observer": "^1.0.0", + "rc-util": "^5.36.0" + }, + "engines": { + "node": ">=8.x" + }, + "peerDependencies": { + "react": "*", + "react-dom": "*" + } + }, + "node_modules/react": { + "version": "18.2.0", + "license": "MIT", + "dependencies": { + "loose-envify": "^1.1.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/react-async-script": { + "version": "1.2.0", + "license": "MIT", + "dependencies": { + "hoist-non-react-statics": "^3.3.0", + "prop-types": "^15.5.0" + }, + "peerDependencies": { + "react": ">=16.4.1" + } + }, + "node_modules/react-diff-viewer-continued": { + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/react-diff-viewer-continued/-/react-diff-viewer-continued-3.4.0.tgz", + "integrity": "sha512-kMZmUyb3Pv5L9vUtCfIGYsdOHs8mUojblGy1U1Sm0D7FhAOEsH9QhnngEIRo5hXWIPNGupNRJls1TJ6Eqx84eg==", + "license": "MIT", + "dependencies": { + "@emotion/css": "^11.11.2", + "classnames": "^2.3.2", + "diff": "^5.1.0", + "memoize-one": "^6.0.0", + "prop-types": "^15.8.1" + }, + "engines": { + "node": ">= 8" + }, + "peerDependencies": { + "react": "^15.3.0 || ^16.0.0 || ^17.0.0 || ^18.0.0", + "react-dom": "^15.3.0 || ^16.0.0 || ^17.0.0 || ^18.0.0" + } + }, + "node_modules/react-dnd": { + "version": "16.0.1", + "license": "MIT", + "dependencies": { + "@react-dnd/invariant": "^4.0.1", + "@react-dnd/shallowequal": "^4.0.1", + "dnd-core": "^16.0.1", + "fast-deep-equal": "^3.1.3", + "hoist-non-react-statics": "^3.3.2" + }, + "peerDependencies": { + "@types/hoist-non-react-statics": ">= 3.3.1", + "@types/node": ">= 12", + "@types/react": ">= 16", + "react": ">= 16.14" + }, + "peerDependenciesMeta": { + "@types/hoist-non-react-statics": { + "optional": true + }, + "@types/node": { + "optional": true + }, + "@types/react": { + "optional": true + } + } + }, + "node_modules/react-dnd-html5-backend": { + "version": "16.0.1", + "license": "MIT", + "dependencies": { + "dnd-core": "^16.0.1" + } + }, + "node_modules/react-dom": { + "version": "18.2.0", + "license": "MIT", + "dependencies": { + "loose-envify": "^1.1.0", + "scheduler": "^0.23.0" + }, + "peerDependencies": { + "react": "^18.2.0" + } + }, + "node_modules/react-fast-compare": { + "version": "3.2.2", + "license": "MIT" + }, + "node_modules/react-google-recaptcha": { + "version": "3.1.0", + "license": "MIT", + "dependencies": { + "prop-types": "^15.5.0", + "react-async-script": "^1.2.0" + }, + "peerDependencies": { + "react": ">=16.4.1" + } + }, + "node_modules/react-gtm-module": { + "version": "2.0.11", + "license": "MIT" + }, + "node_modules/react-helmet-async": { + "version": "2.0.5", + "license": "Apache-2.0", + "dependencies": { + "invariant": "^2.2.4", + "react-fast-compare": "^3.2.2", + "shallowequal": "^1.1.0" + }, + "peerDependencies": { + "react": "^16.6.0 || ^17.0.0 || ^18.0.0" + } + }, + "node_modules/react-is": { + "version": "18.2.0", + "license": "MIT" + }, + "node_modules/react-js-cron": { + "version": "5.0.1", + "license": "MIT", + "peerDependencies": { + "antd": ">=5.8.0", + "react": ">=17.0.0", + "react-dom": ">=17.0.0" + } + }, + "node_modules/react-markdown": { + "version": "8.0.7", + "resolved": "https://registry.npmjs.org/react-markdown/-/react-markdown-8.0.7.tgz", + "integrity": "sha512-bvWbzG4MtOU62XqBx3Xx+zB2raaFFsq4mYiAzfjXJMEz2sixgeAfraA3tvzULF02ZdOMUOKTBFFaZJDDrq+BJQ==", + "license": "MIT", + "dependencies": { + "@types/hast": "^2.0.0", + "@types/prop-types": "^15.0.0", + "@types/unist": "^2.0.0", + "comma-separated-tokens": "^2.0.0", + "hast-util-whitespace": "^2.0.0", + "prop-types": "^15.0.0", + "property-information": "^6.0.0", + "react-is": "^18.0.0", + "remark-parse": "^10.0.0", + "remark-rehype": "^10.0.0", + "space-separated-tokens": "^2.0.0", + "style-to-object": "^0.4.0", + "unified": "^10.0.0", + "unist-util-visit": "^4.0.0", + "vfile": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + }, + "peerDependencies": { + "@types/react": ">=16", + "react": ">=16" + } + }, + "node_modules/react-product-fruits": { + "version": "2.2.6", + "license": "MIT", + "dependencies": { + "product-fruits": "^1.0.25" + }, + "peerDependencies": { + "react": ">= 17.0.0" + } + }, + "node_modules/react-redux": { + "version": "8.1.3", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.12.1", + "@types/hoist-non-react-statics": "^3.3.1", + "@types/use-sync-external-store": "^0.0.3", + "hoist-non-react-statics": "^3.3.2", + "react-is": "^18.0.0", + "use-sync-external-store": "^1.0.0" + }, + "peerDependencies": { + "@types/react": "^16.8 || ^17.0 || ^18.0", + "@types/react-dom": "^16.8 || ^17.0 || ^18.0", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0", + "react-native": ">=0.59", + "redux": "^4 || ^5.0.0-beta.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + }, + "react-dom": { + "optional": true + }, + "react-native": { + "optional": true + }, + "redux": { + "optional": true + } + } + }, + "node_modules/react-router": { + "version": "6.13.0", + "license": "MIT", + "dependencies": { + "@remix-run/router": "1.6.3" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "react": ">=16.8" + } + }, + "node_modules/react-router-dom": { + "version": "6.13.0", + "license": "MIT", + "dependencies": { + "@remix-run/router": "1.6.3", + "react-router": "6.13.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "react": ">=16.8", + "react-dom": ">=16.8" + } + }, + "node_modules/react-social-login-buttons": { + "version": "3.9.1", + "license": "ISC", + "peerDependencies": { + "react": "^15.0.0 || ^16.0.0 || ^17.x || ^18.x" + } + }, + "node_modules/react-syntax-highlighter": { + "version": "15.6.6", + "resolved": "https://registry.npmjs.org/react-syntax-highlighter/-/react-syntax-highlighter-15.6.6.tgz", + "integrity": "sha512-DgXrc+AZF47+HvAPEmn7Ua/1p10jNoVZVI/LoPiYdtY+OM+/nG5yefLHKJwdKqY1adMuHFbeyBaG9j64ML7vTw==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.3.1", + "highlight.js": "^10.4.1", + "highlightjs-vue": "^1.0.0", + "lowlight": "^1.17.0", + "prismjs": "^1.30.0", + "refractor": "^3.6.0" + }, + "peerDependencies": { + "react": ">= 0.14.0" + } + }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "optional": true, + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/recharts": { + "version": "3.8.1", + "resolved": "https://registry.npmjs.org/recharts/-/recharts-3.8.1.tgz", + "integrity": "sha512-mwzmO1s9sFL0TduUpwndxCUNoXsBw3u3E/0+A+cLcrSfQitSG62L32N69GhqUrrT5qKcAE3pCGVINC6pqkBBQg==", + "license": "MIT", + "workspaces": [ + "www" + ], + "dependencies": { + "@reduxjs/toolkit": "^1.9.0 || 2.x.x", + "clsx": "^2.1.1", + "decimal.js-light": "^2.5.1", + "es-toolkit": "^1.39.3", + "eventemitter3": "^5.0.1", + "immer": "^10.1.1", + "react-redux": "8.x.x || 9.x.x", + "reselect": "5.1.1", + "tiny-invariant": "^1.3.3", + "use-sync-external-store": "^1.2.2", + "victory-vendor": "^37.0.2" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", + "react-dom": "^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", + "react-is": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/recharts/node_modules/eventemitter3": { + "version": "5.0.4", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-5.0.4.tgz", + "integrity": "sha512-mlsTRyGaPBjPedk6Bvw+aqbsXDtoAyAzm5MO7JgU+yVRyMQ5O8bD4Kcci7BS85f93veegeCPkL8R4GLClnjLFw==", + "license": "MIT" + }, + "node_modules/recharts/node_modules/immer": { + "version": "10.2.0", + "resolved": "https://registry.npmjs.org/immer/-/immer-10.2.0.tgz", + "integrity": "sha512-d/+XTN3zfODyjr89gM3mPq1WNX2B8pYsu7eORitdwyA2sBubnTl3laYlBk4sXY5FUa5qTZGBDPJICVbvqzjlbw==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/immer" + } + }, + "node_modules/recharts/node_modules/use-sync-external-store": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.6.0.tgz", + "integrity": "sha512-Pp6GSwGP/NrPIrxVFAIkOQeyw8lFenOHijQWkUTrDvrF4ALqylP2C/KCkeS9dpUM3KvYRQhna5vt7IL95+ZQ9w==", + "license": "MIT", + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/redent": { + "version": "3.0.0", + "license": "MIT", + "dependencies": { + "indent-string": "^4.0.0", + "strip-indent": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/redux": { + "version": "4.2.1", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.9.2" + } + }, + "node_modules/refractor": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/refractor/-/refractor-3.6.0.tgz", + "integrity": "sha512-MY9W41IOWxxk31o+YvFCNyNzdkc9M20NoZK5vq6jkv4I/uh2zkWcfudj0Q1fovjUQJrNewS9NMzeTtqPf+n5EA==", + "license": "MIT", + "dependencies": { + "hastscript": "^6.0.0", + "parse-entities": "^2.0.0", + "prismjs": "~1.27.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/refractor/node_modules/prismjs": { + "version": "1.27.0", + "resolved": "https://registry.npmjs.org/prismjs/-/prismjs-1.27.0.tgz", + "integrity": "sha512-t13BGPUlFDR7wRB5kQDG4jjl7XeuH6jbJGt11JHPL96qwsEHNX2+68tFXqc1/k+/jALsbSWJKUOT/hcYAZ5LkA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/regexp.prototype.flags": { + "version": "1.5.0", + "license": "MIT", + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.2.0", + "functions-have-names": "^1.2.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/remark-gfm": { + "version": "3.0.1", + "license": "MIT", + "dependencies": { + "@types/mdast": "^3.0.0", + "mdast-util-gfm": "^2.0.0", + "micromark-extension-gfm": "^2.0.0", + "unified": "^10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-parse": { + "version": "10.0.2", + "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-10.0.2.tgz", + "integrity": "sha512-3ydxgHa/ZQzG8LvC7jTXccARYDcRld3VfcgIIFs7bI6vbRSxJJmzgLEIIoYKyrfhaY+ujuWaf/PJiMZXoiCXgw==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^3.0.0", + "mdast-util-from-markdown": "^1.0.0", + "unified": "^10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-rehype": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/remark-rehype/-/remark-rehype-10.1.0.tgz", + "integrity": "sha512-EFmR5zppdBp0WQeDVZ/b66CWJipB2q2VLNFMabzDSGR66Z2fQii83G5gTBbgGEnEEA0QRussvrFHxk1HWGJskw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^2.0.0", + "@types/mdast": "^3.0.0", + "mdast-util-to-hast": "^12.1.0", + "unified": "^10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/requires-port": { + "version": "1.0.0", + "license": "MIT" + }, + "node_modules/reselect": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/reselect/-/reselect-5.1.1.tgz", + "integrity": "sha512-K/BG6eIky/SBpzfHZv/dd+9JBFiS4SWV7FIujVyJRux6e45+73RaUHXLmIR1f7WOMaQ0U1km6qwklRQxpJJY0w==", + "license": "MIT" + }, + "node_modules/resize-observer-polyfill": { + "version": "1.5.1", + "license": "MIT" + }, + "node_modules/resolve-from": { + "version": "4.0.0", + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/rimraf": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", + "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", + "deprecated": "Rimraf versions prior to v4 are no longer supported", + "license": "ISC", + "optional": true, + "dependencies": { + "glob": "^7.1.3" + }, + "bin": { + "rimraf": "bin.js" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/rollup": { + "version": "2.79.1", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=10.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.2" + } + }, + "node_modules/rrweb-cssom": { + "version": "0.8.0", + "resolved": "https://registry.npmjs.org/rrweb-cssom/-/rrweb-cssom-0.8.0.tgz", + "integrity": "sha512-guoltQEx+9aMf2gDZ0s62EcV8lsXR+0w8915TC3ITdn2YueuNjdAYh/levpU9nFaoChh9RUS5ZdQMrKfVEN9tw==", + "dev": true, + "license": "MIT" + }, + "node_modules/sade": { + "version": "1.8.1", + "license": "MIT", + "dependencies": { + "mri": "^1.1.0" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", + "dev": true, + "license": "MIT" + }, + "node_modules/saxes": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz", + "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==", + "dev": true, + "license": "ISC", + "dependencies": { + "xmlchars": "^2.2.0" + }, + "engines": { + "node": ">=v12.22.7" + } + }, + "node_modules/scheduler": { + "version": "0.23.0", + "license": "MIT", + "dependencies": { + "loose-envify": "^1.1.0" + } + }, + "node_modules/scroll-into-view-if-needed": { + "version": "3.1.0", + "license": "MIT", + "dependencies": { + "compute-scroll-into-view": "^3.0.2" + } + }, + "node_modules/semver": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "devOptional": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/set-blocking": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz", + "integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==", + "license": "ISC", + "optional": true + }, + "node_modules/shallowequal": { + "version": "1.1.0", + "license": "MIT" + }, + "node_modules/side-channel": { + "version": "1.0.4", + "license": "MIT", + "dependencies": { + "call-bind": "^1.0.0", + "get-intrinsic": "^1.0.2", + "object-inspect": "^1.9.0" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/siginfo": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz", + "integrity": "sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==", + "dev": true, + "license": "ISC" + }, + "node_modules/signal-exit": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", + "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", + "license": "ISC", + "optional": true + }, + "node_modules/simple-concat": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", + "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true + }, + "node_modules/simple-get": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz", + "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "decompress-response": "^6.0.0", + "once": "^1.3.1", + "simple-concat": "^1.0.0" + } + }, + "node_modules/slash": { + "version": "3.0.0", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/snake-case": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/snake-case/-/snake-case-3.0.4.tgz", + "integrity": "sha512-LAOh4z89bGQvl9pFfNF8V146i7o7/CqFPbqzYgP+yYzDIDeS9HaNFtXABamRW+AQzEVODcvE79ljJ+8a9YSdMg==", + "dev": true, + "license": "MIT", + "dependencies": { + "dot-case": "^3.0.4", + "tslib": "^2.0.3" + } + }, + "node_modules/socket.io-client": { + "version": "4.7.2", + "license": "MIT", + "dependencies": { + "@socket.io/component-emitter": "~3.1.0", + "debug": "~4.3.2", + "engine.io-client": "~6.5.2", + "socket.io-parser": "~4.2.4" + }, + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/socket.io-parser": { + "version": "4.2.4", + "license": "MIT", + "dependencies": { + "@socket.io/component-emitter": "~3.1.0", + "debug": "~4.3.1" + }, + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/source-map-js": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/source-map-support": { + "version": "0.5.21", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "buffer-from": "^1.0.0", + "source-map": "^0.6.0" + } + }, + "node_modules/source-map-support/node_modules/source-map": { + "version": "0.6.1", + "dev": true, + "license": "BSD-3-Clause", + "optional": true, + "peer": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/space-separated-tokens": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/space-separated-tokens/-/space-separated-tokens-2.0.2.tgz", + "integrity": "sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/spel2js": { + "version": "0.2.8", + "engines": { + "node": ">=8" + } + }, + "node_modules/sqlstring": { + "version": "2.3.3", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/stack-utils": { + "version": "2.0.6", + "license": "MIT", + "dependencies": { + "escape-string-regexp": "^2.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/stack-utils/node_modules/escape-string-regexp": { + "version": "2.0.0", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/stackback": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz", + "integrity": "sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==", + "dev": true, + "license": "MIT" + }, + "node_modules/state-local": { + "version": "1.0.7", + "license": "MIT" + }, + "node_modules/std-env": { + "version": "3.10.0", + "resolved": "https://registry.npmjs.org/std-env/-/std-env-3.10.0.tgz", + "integrity": "sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==", + "dev": true, + "license": "MIT" + }, + "node_modules/stop-iteration-iterator": { + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "internal-slot": "^1.0.4" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "optional": true, + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, + "node_modules/string-convert": { + "version": "0.2.1", + "license": "MIT" + }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "license": "MIT", + "optional": true, + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "license": "MIT", + "optional": true + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "license": "MIT", + "optional": true, + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-indent": { + "version": "3.0.0", + "license": "MIT", + "dependencies": { + "min-indent": "^1.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-json-comments": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", + "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/strip-literal": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/strip-literal/-/strip-literal-3.1.0.tgz", + "integrity": "sha512-8r3mkIM/2+PpjHoOtiAW8Rg3jJLHaV7xPwG+YRGrv6FP0wwk/toTpATxWYOW0BKdWwl82VT2tFYi5DlROa0Mxg==", + "dev": true, + "license": "MIT", + "dependencies": { + "js-tokens": "^9.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/strip-literal/node_modules/js-tokens": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-9.0.1.tgz", + "integrity": "sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/style-to-object": { + "version": "0.4.4", + "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-0.4.4.tgz", + "integrity": "sha512-HYNoHZa2GorYNyqiCaBgsxvcJIn7OHq6inEga+E6Ke3m5JkoqpQbnFssk4jwe+K7AhGa2fcha4wSOf1Kn01dMg==", + "license": "MIT", + "dependencies": { + "inline-style-parser": "0.1.1" + } + }, + "node_modules/stylis": { + "version": "4.3.1", + "license": "MIT" + }, + "node_modules/supports-color": { + "version": "7.2.0", + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/supports-preserve-symlinks-flag": { + "version": "1.0.0", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/svg-parser": { + "version": "2.0.4", + "dev": true, + "license": "MIT" + }, + "node_modules/symbol-tree": { + "version": "3.2.4", + "dev": true, + "license": "MIT" + }, + "node_modules/tar": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/tar/-/tar-6.2.1.tgz", + "integrity": "sha512-DZ4yORTwrbTj/7MZYq2w+/ZFdI6OZ/f9SFHR+71gIVUZhOQPHzVCLpvRnPgyaMpfWxxk/4ONva3GQSyNIKRv6A==", + "deprecated": "Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", + "license": "ISC", + "optional": true, + "dependencies": { + "chownr": "^2.0.0", + "fs-minipass": "^2.0.0", + "minipass": "^5.0.0", + "minizlib": "^2.1.1", + "mkdirp": "^1.0.3", + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/tar-fs": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz", + "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "chownr": "^1.1.1", + "mkdirp-classic": "^0.5.2", + "pump": "^3.0.0", + "tar-stream": "^2.1.4" + } + }, + "node_modules/tar-stream": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", + "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "bl": "^4.0.3", + "end-of-stream": "^1.4.1", + "fs-constants": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.1.1" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/tar/node_modules/chownr": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-2.0.0.tgz", + "integrity": "sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==", + "license": "ISC", + "optional": true, + "engines": { + "node": ">=10" + } + }, + "node_modules/tar/node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "license": "ISC", + "optional": true + }, + "node_modules/terser": { + "version": "5.18.0", + "dev": true, + "license": "BSD-2-Clause", + "optional": true, + "peer": true, + "dependencies": { + "@jridgewell/source-map": "^0.3.3", + "acorn": "^8.8.2", + "commander": "^2.20.0", + "source-map-support": "~0.5.20" + }, + "bin": { + "terser": "bin/terser" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/terser/node_modules/commander": { + "version": "2.20.3", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true + }, + "node_modules/throttle-debounce": { + "version": "5.0.0", + "license": "MIT", + "engines": { + "node": ">=12.22" + } + }, + "node_modules/tiny-invariant": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz", + "integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==", + "license": "MIT" + }, + "node_modules/tinybench": { + "version": "2.9.0", + "resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz", + "integrity": "sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==", + "dev": true, + "license": "MIT" + }, + "node_modules/tinyexec": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/tinyexec/-/tinyexec-0.3.2.tgz", + "integrity": "sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA==", + "dev": true, + "license": "MIT" + }, + "node_modules/tinyglobby": { + "version": "0.2.15", + "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", + "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "fdir": "^6.5.0", + "picomatch": "^4.0.3" + }, + "engines": { + "node": ">=12.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/SuperchupuDev" + } + }, + "node_modules/tinyglobby/node_modules/fdir": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", + "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12.0.0" + }, + "peerDependencies": { + "picomatch": "^3 || ^4" + }, + "peerDependenciesMeta": { + "picomatch": { + "optional": true + } + } + }, + "node_modules/tinyglobby/node_modules/picomatch": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/tinypool": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/tinypool/-/tinypool-1.1.1.tgz", + "integrity": "sha512-Zba82s87IFq9A9XmjiX5uZA/ARWDrB03OHlq+Vw1fSdt0I+4/Kutwy8BP4Y/y/aORMo61FQ0vIb5j44vSo5Pkg==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18.0.0 || >=20.0.0" + } + }, + "node_modules/tinyrainbow": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/tinyrainbow/-/tinyrainbow-2.0.0.tgz", + "integrity": "sha512-op4nsTR47R6p0vMUUoYl/a+ljLFVtlfaXkLQmqfLR1qHma1h/ysYk4hEXZ880bf2CYgTskvTa/e196Vd5dDQXw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/tinyspy": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/tinyspy/-/tinyspy-4.0.4.tgz", + "integrity": "sha512-azl+t0z7pw/z958Gy9svOTuzqIk6xq+NSheJzn5MMWtWTFywIacg2wUlzKFGtt3cthx0r2SxMK0yzJOR0IES7Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/tldts": { + "version": "7.0.27", + "resolved": "https://registry.npmjs.org/tldts/-/tldts-7.0.27.tgz", + "integrity": "sha512-I4FZcVFcqCRuT0ph6dCDpPuO4Xgzvh+spkcTr1gK7peIvxWauoloVO0vuy1FQnijT63ss6AsHB6+OIM4aXHbPg==", + "dev": true, + "license": "MIT", + "dependencies": { + "tldts-core": "^7.0.27" + }, + "bin": { + "tldts": "bin/cli.js" + } + }, + "node_modules/tldts-core": { + "version": "7.0.27", + "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-7.0.27.tgz", + "integrity": "sha512-YQ7uPjgWUibIK6DW5lrKujGwUKhLevU4hcGbP5O6TcIUb+oTjJYJVWPS4nZsIHrEEEG6myk/oqAJUEQmpZrHsg==", + "dev": true, + "license": "MIT" + }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "license": "MIT", + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/toggle-selection": { + "version": "1.0.6", + "license": "MIT" + }, + "node_modules/tough-cookie": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-6.0.1.tgz", + "integrity": "sha512-LktZQb3IeoUWB9lqR5EWTHgW/VTITCXg4D21M+lvybRVdylLrRMnqaIONLVb5mav8vM19m44HIcGq4qASeu2Qw==", + "dev": true, + "license": "BSD-3-Clause", + "dependencies": { + "tldts": "^7.0.5" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/tr46": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-6.0.0.tgz", + "integrity": "sha512-bLVMLPtstlZ4iMQHpFHTR7GAGj2jxi8Dg0s2h2MafAE4uSWF98FC/3MomU51iQAMf8/qDUbKWf5GxuvvVcXEhw==", + "dev": true, + "license": "MIT", + "dependencies": { + "punycode": "^2.3.1" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/trim-lines": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz", + "integrity": "sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/trough": { + "version": "2.1.0", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/tslib": { + "version": "2.5.3", + "license": "0BSD" + }, + "node_modules/tunnel-agent": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", + "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==", + "dev": true, + "license": "Apache-2.0", + "optional": true, + "peer": true, + "dependencies": { + "safe-buffer": "^5.0.1" + }, + "engines": { + "node": "*" + } + }, + "node_modules/typescript": { + "version": "4.9.5", + "dev": true, + "license": "Apache-2.0", + "optional": true, + "peer": true, + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=4.2.0" + } + }, + "node_modules/uglify-js": { + "version": "3.17.4", + "license": "BSD-2-Clause", + "optional": true, + "bin": { + "uglifyjs": "bin/uglifyjs" + }, + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/undici-types": { + "version": "7.18.2", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", + "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", + "license": "MIT" + }, + "node_modules/unified": { + "version": "10.1.2", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0", + "bail": "^2.0.0", + "extend": "^3.0.0", + "is-buffer": "^2.0.0", + "is-plain-obj": "^4.0.0", + "trough": "^2.0.0", + "vfile": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unified/node_modules/is-plain-obj": { + "version": "4.1.0", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/unist-util-generated": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/unist-util-generated/-/unist-util-generated-2.0.1.tgz", + "integrity": "sha512-qF72kLmPxAw0oN2fwpWIqbXAVyEqUzDHMsbtPvOudIlUzXYFIeQIuxXQCRCFh22B7cixvU0MG7m3MW8FTq/S+A==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-is": { + "version": "5.2.1", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-position": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/unist-util-position/-/unist-util-position-4.0.4.tgz", + "integrity": "sha512-kUBE91efOWfIVBo8xzh/uZQ7p9ffYRtUbMRZBNFYwf0RK8koUMx6dGUfwylLOKmaT2cs4wSW96QoYUSXAyEtpg==", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-stringify-position": { + "version": "3.0.3", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-visit": { + "version": "4.1.2", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0", + "unist-util-is": "^5.0.0", + "unist-util-visit-parents": "^5.1.1" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-visit-parents": { + "version": "5.1.3", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0", + "unist-util-is": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/update-browserslist-db": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz", + "integrity": "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "escalade": "^3.2.0", + "picocolors": "^1.1.1" + }, + "bin": { + "update-browserslist-db": "cli.js" + }, + "peerDependencies": { + "browserslist": ">= 4.21.0" + } + }, + "node_modules/uri-js": { + "version": "4.4.1", + "license": "BSD-2-Clause", + "dependencies": { + "punycode": "^2.1.0" + } + }, + "node_modules/use-sync-external-store": { + "version": "1.2.0", + "license": "MIT", + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + } + }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT", + "optional": true + }, + "node_modules/uuid": { + "version": "9.0.1", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/uvu": { + "version": "0.5.6", + "license": "MIT", + "dependencies": { + "dequal": "^2.0.0", + "diff": "^5.0.0", + "kleur": "^4.0.3", + "sade": "^1.7.3" + }, + "bin": { + "uvu": "bin.js" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/uvu/node_modules/kleur": { + "version": "4.1.5", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/validate.io-array": { + "version": "1.0.6", + "license": "MIT" + }, + "node_modules/validate.io-function": { + "version": "1.0.2" + }, + "node_modules/validate.io-integer": { + "version": "1.0.5", + "dependencies": { + "validate.io-number": "^1.0.3" + } + }, + "node_modules/validate.io-integer-array": { + "version": "1.0.0", + "dependencies": { + "validate.io-array": "^1.0.3", + "validate.io-integer": "^1.0.4" + } + }, + "node_modules/validate.io-number": { + "version": "1.0.3" + }, + "node_modules/vfile": { + "version": "5.3.7", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0", + "is-buffer": "^2.0.0", + "unist-util-stringify-position": "^3.0.0", + "vfile-message": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/vfile-message": { + "version": "3.1.4", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0", + "unist-util-stringify-position": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/victory-vendor": { + "version": "37.3.6", + "resolved": "https://registry.npmjs.org/victory-vendor/-/victory-vendor-37.3.6.tgz", + "integrity": "sha512-SbPDPdDBYp+5MJHhBCAyI7wKM3d5ivekigc2Dk2s7pgbZ9wIgIBYGVw4zGHBml/qTFbexrofXW6Gu4noGxrOwQ==", + "license": "MIT AND ISC", + "dependencies": { + "@types/d3-array": "^3.0.3", + "@types/d3-ease": "^3.0.0", + "@types/d3-interpolate": "^3.0.1", + "@types/d3-scale": "^4.0.2", + "@types/d3-shape": "^3.1.0", + "@types/d3-time": "^3.0.0", + "@types/d3-timer": "^3.0.0", + "d3-array": "^3.1.6", + "d3-ease": "^3.0.1", + "d3-interpolate": "^3.0.1", + "d3-scale": "^4.0.2", + "d3-shape": "^3.1.0", + "d3-time": "^3.0.0", + "d3-timer": "^3.0.1" + } + }, + "node_modules/vite": { + "version": "7.3.1", + "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz", + "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==", + "dev": true, + "license": "MIT", + "dependencies": { + "esbuild": "^0.27.0", + "fdir": "^6.5.0", + "picomatch": "^4.0.3", + "postcss": "^8.5.6", + "rollup": "^4.43.0", + "tinyglobby": "^0.2.15" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^20.19.0 || >=22.12.0", + "jiti": ">=1.21.0", + "less": "^4.0.0", + "lightningcss": "^1.21.0", + "sass": "^1.70.0", + "sass-embedded": "^1.70.0", + "stylus": ">=0.54.8", + "sugarss": "^5.0.0", + "terser": "^5.16.0", + "tsx": "^4.8.1", + "yaml": "^2.4.2" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "jiti": { + "optional": true + }, + "less": { + "optional": true + }, + "lightningcss": { + "optional": true + }, + "sass": { + "optional": true + }, + "sass-embedded": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + }, + "tsx": { + "optional": true + }, + "yaml": { + "optional": true + } + } + }, + "node_modules/vite-node": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/vite-node/-/vite-node-3.2.4.tgz", + "integrity": "sha512-EbKSKh+bh1E1IFxeO0pg1n4dvoOTt0UDiXMd/qn++r98+jPO1xtJilvXldeuQ8giIB5IkpjCgMleHMNEsGH6pg==", + "dev": true, + "license": "MIT", + "dependencies": { + "cac": "^6.7.14", + "debug": "^4.4.1", + "es-module-lexer": "^1.7.0", + "pathe": "^2.0.3", + "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0" + }, + "bin": { + "vite-node": "vite-node.mjs" + }, + "engines": { + "node": "^18.0.0 || ^20.0.0 || >=22.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/vite-node/node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/vite-node/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true, + "license": "MIT" + }, + "node_modules/vite-plugin-svgr": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/vite-plugin-svgr/-/vite-plugin-svgr-4.5.0.tgz", + "integrity": "sha512-W+uoSpmVkSmNOGPSsDCWVW/DDAyv+9fap9AZXBvWiQqrboJ08j2vh0tFxTD/LjwqwAd3yYSVJgm54S/1GhbdnA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@rollup/pluginutils": "^5.2.0", + "@svgr/core": "^8.1.0", + "@svgr/plugin-jsx": "^8.1.0" + }, + "peerDependencies": { + "vite": ">=2.6.0" + } + }, + "node_modules/vite-plugin-svgr/node_modules/@rollup/pluginutils": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/@rollup/pluginutils/-/pluginutils-5.3.0.tgz", + "integrity": "sha512-5EdhGZtnu3V88ces7s53hhfK5KSASnJZv8Lulpc04cWO3REESroJXg73DFsOmgbU2BhwV0E20bu2IDZb3VKW4Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.0", + "estree-walker": "^2.0.2", + "picomatch": "^4.0.2" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "rollup": "^1.20.0||^2.0.0||^3.0.0||^4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } + } + }, + "node_modules/vite-plugin-svgr/node_modules/@svgr/babel-plugin-add-jsx-attribute": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/@svgr/babel-plugin-add-jsx-attribute/-/babel-plugin-add-jsx-attribute-8.0.0.tgz", + "integrity": "sha512-b9MIk7yhdS1pMCZM8VeNfUlSKVRhsHZNMl5O9SfaX0l0t5wjdgu4IDzGB8bpnGBBOjGST3rRFVsaaEtI4W6f7g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/gregberge" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/vite-plugin-svgr/node_modules/@svgr/babel-plugin-remove-jsx-attribute": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/@svgr/babel-plugin-remove-jsx-attribute/-/babel-plugin-remove-jsx-attribute-8.0.0.tgz", + "integrity": "sha512-BcCkm/STipKvbCl6b7QFrMh/vx00vIP63k2eM66MfHJzPr6O2U0jYEViXkHJWqXqQYjdeA9cuCl5KWmlwjDvbA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/gregberge" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/vite-plugin-svgr/node_modules/@svgr/babel-plugin-remove-jsx-empty-expression": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/@svgr/babel-plugin-remove-jsx-empty-expression/-/babel-plugin-remove-jsx-empty-expression-8.0.0.tgz", + "integrity": "sha512-5BcGCBfBxB5+XSDSWnhTThfI9jcO5f0Ai2V24gZpG+wXF14BzwxxdDb4g6trdOux0rhibGs385BeFMSmxtS3uA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/gregberge" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/vite-plugin-svgr/node_modules/@svgr/babel-plugin-replace-jsx-attribute-value": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/@svgr/babel-plugin-replace-jsx-attribute-value/-/babel-plugin-replace-jsx-attribute-value-8.0.0.tgz", + "integrity": "sha512-KVQ+PtIjb1BuYT3ht8M5KbzWBhdAjjUPdlMtpuw/VjT8coTrItWX6Qafl9+ji831JaJcu6PJNKCV0bp01lBNzQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/gregberge" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/vite-plugin-svgr/node_modules/@svgr/babel-plugin-svg-dynamic-title": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/@svgr/babel-plugin-svg-dynamic-title/-/babel-plugin-svg-dynamic-title-8.0.0.tgz", + "integrity": "sha512-omNiKqwjNmOQJ2v6ge4SErBbkooV2aAWwaPFs2vUY7p7GhVkzRkJ00kILXQvRhA6miHnNpXv7MRnnSjdRjK8og==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/gregberge" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/vite-plugin-svgr/node_modules/@svgr/babel-plugin-svg-em-dimensions": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/@svgr/babel-plugin-svg-em-dimensions/-/babel-plugin-svg-em-dimensions-8.0.0.tgz", + "integrity": "sha512-mURHYnu6Iw3UBTbhGwE/vsngtCIbHE43xCRK7kCw4t01xyGqb2Pd+WXekRRoFOBIY29ZoOhUCTEweDMdrjfi9g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/gregberge" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/vite-plugin-svgr/node_modules/@svgr/babel-plugin-transform-react-native-svg": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/@svgr/babel-plugin-transform-react-native-svg/-/babel-plugin-transform-react-native-svg-8.1.0.tgz", + "integrity": "sha512-Tx8T58CHo+7nwJ+EhUwx3LfdNSG9R2OKfaIXXs5soiy5HtgoAEkDay9LIimLOcG8dJQH1wPZp/cnAv6S9CrR1Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/gregberge" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/vite-plugin-svgr/node_modules/@svgr/babel-plugin-transform-svg-component": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/@svgr/babel-plugin-transform-svg-component/-/babel-plugin-transform-svg-component-8.0.0.tgz", + "integrity": "sha512-DFx8xa3cZXTdb/k3kfPeaixecQLgKh5NVBMwD0AQxOzcZawK4oo1Jh9LbrcACUivsCA7TLG8eeWgrDXjTMhRmw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/gregberge" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/vite-plugin-svgr/node_modules/@svgr/babel-preset": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/@svgr/babel-preset/-/babel-preset-8.1.0.tgz", + "integrity": "sha512-7EYDbHE7MxHpv4sxvnVPngw5fuR6pw79SkcrILHJ/iMpuKySNCl5W1qcwPEpU+LgyRXOaAFgH0KhwD18wwg6ug==", + "dev": true, + "license": "MIT", + "dependencies": { + "@svgr/babel-plugin-add-jsx-attribute": "8.0.0", + "@svgr/babel-plugin-remove-jsx-attribute": "8.0.0", + "@svgr/babel-plugin-remove-jsx-empty-expression": "8.0.0", + "@svgr/babel-plugin-replace-jsx-attribute-value": "8.0.0", + "@svgr/babel-plugin-svg-dynamic-title": "8.0.0", + "@svgr/babel-plugin-svg-em-dimensions": "8.0.0", + "@svgr/babel-plugin-transform-react-native-svg": "8.1.0", + "@svgr/babel-plugin-transform-svg-component": "8.0.0" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/gregberge" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/vite-plugin-svgr/node_modules/@svgr/core": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/@svgr/core/-/core-8.1.0.tgz", + "integrity": "sha512-8QqtOQT5ACVlmsvKOJNEaWmRPmcojMOzCz4Hs2BGG/toAp/K38LcsMRyLp349glq5AzJbCEeimEoxaX6v/fLrA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/core": "^7.21.3", + "@svgr/babel-preset": "8.1.0", + "camelcase": "^6.2.0", + "cosmiconfig": "^8.1.3", + "snake-case": "^3.0.4" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/gregberge" + } + }, + "node_modules/vite-plugin-svgr/node_modules/@svgr/hast-util-to-babel-ast": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/@svgr/hast-util-to-babel-ast/-/hast-util-to-babel-ast-8.0.0.tgz", + "integrity": "sha512-EbDKwO9GpfWP4jN9sGdYwPBU0kdomaPIL2Eu4YwmgP+sJeXT+L7bMwJUBnhzfH8Q2qMBqZ4fJwpCyYsAN3mt2Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/types": "^7.21.3", + "entities": "^4.4.0" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/gregberge" + } + }, + "node_modules/vite-plugin-svgr/node_modules/@svgr/plugin-jsx": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/@svgr/plugin-jsx/-/plugin-jsx-8.1.0.tgz", + "integrity": "sha512-0xiIyBsLlr8quN+WyuxooNW9RJ0Dpr8uOnH/xrCVO8GLUcwHISwj1AG0k+LFzteTkAA0GbX0kj9q6Dk70PTiPA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/core": "^7.21.3", + "@svgr/babel-preset": "8.1.0", + "@svgr/hast-util-to-babel-ast": "8.0.0", + "svg-parser": "^2.0.4" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/gregberge" + }, + "peerDependencies": { + "@svgr/core": "*" + } + }, + "node_modules/vite-plugin-svgr/node_modules/cosmiconfig": { + "version": "8.3.6", + "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-8.3.6.tgz", + "integrity": "sha512-kcZ6+W5QzcJ3P1Mt+83OUv/oHFqZHIx8DuxG6eZ5RGMERoLqp4BuGjhHLYGK+Kf5XVkQvqBSmAy/nGWN3qDgEA==", + "dev": true, + "license": "MIT", + "dependencies": { + "import-fresh": "^3.3.0", + "js-yaml": "^4.1.0", + "parse-json": "^5.2.0", + "path-type": "^4.0.0" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/d-fischer" + }, + "peerDependencies": { + "typescript": ">=4.9.5" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/vite-plugin-svgr/node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/vite-plugin-svgr/node_modules/estree-walker": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", + "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==", + "dev": true, + "license": "MIT" + }, + "node_modules/vite-plugin-svgr/node_modules/picomatch": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/vite/node_modules/fdir": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", + "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12.0.0" + }, + "peerDependencies": { + "picomatch": "^3 || ^4" + }, + "peerDependenciesMeta": { + "picomatch": { + "optional": true + } + } + }, + "node_modules/vite/node_modules/picomatch": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/vite/node_modules/rollup": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.60.0.tgz", + "integrity": "sha512-yqjxruMGBQJ2gG4HtjZtAfXArHomazDHoFwFFmZZl0r7Pdo7qCIXKqKHZc8yeoMgzJJ+pO6pEEHa+V7uzWlrAQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/estree": "1.0.8" + }, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=18.0.0", + "npm": ">=8.0.0" + }, + "optionalDependencies": { + "@rollup/rollup-android-arm-eabi": "4.60.0", + "@rollup/rollup-android-arm64": "4.60.0", + "@rollup/rollup-darwin-arm64": "4.60.0", + "@rollup/rollup-darwin-x64": "4.60.0", + "@rollup/rollup-freebsd-arm64": "4.60.0", + "@rollup/rollup-freebsd-x64": "4.60.0", + "@rollup/rollup-linux-arm-gnueabihf": "4.60.0", + "@rollup/rollup-linux-arm-musleabihf": "4.60.0", + "@rollup/rollup-linux-arm64-gnu": "4.60.0", + "@rollup/rollup-linux-arm64-musl": "4.60.0", + "@rollup/rollup-linux-loong64-gnu": "4.60.0", + "@rollup/rollup-linux-loong64-musl": "4.60.0", + "@rollup/rollup-linux-ppc64-gnu": "4.60.0", + "@rollup/rollup-linux-ppc64-musl": "4.60.0", + "@rollup/rollup-linux-riscv64-gnu": "4.60.0", + "@rollup/rollup-linux-riscv64-musl": "4.60.0", + "@rollup/rollup-linux-s390x-gnu": "4.60.0", + "@rollup/rollup-linux-x64-gnu": "4.60.0", + "@rollup/rollup-linux-x64-musl": "4.60.0", + "@rollup/rollup-openbsd-x64": "4.60.0", + "@rollup/rollup-openharmony-arm64": "4.60.0", + "@rollup/rollup-win32-arm64-msvc": "4.60.0", + "@rollup/rollup-win32-ia32-msvc": "4.60.0", + "@rollup/rollup-win32-x64-gnu": "4.60.0", + "@rollup/rollup-win32-x64-msvc": "4.60.0", + "fsevents": "~2.3.2" + } + }, + "node_modules/vitest": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz", + "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/chai": "^5.2.2", + "@vitest/expect": "3.2.4", + "@vitest/mocker": "3.2.4", + "@vitest/pretty-format": "^3.2.4", + "@vitest/runner": "3.2.4", + "@vitest/snapshot": "3.2.4", + "@vitest/spy": "3.2.4", + "@vitest/utils": "3.2.4", + "chai": "^5.2.0", + "debug": "^4.4.1", + "expect-type": "^1.2.1", + "magic-string": "^0.30.17", + "pathe": "^2.0.3", + "picomatch": "^4.0.2", + "std-env": "^3.9.0", + "tinybench": "^2.9.0", + "tinyexec": "^0.3.2", + "tinyglobby": "^0.2.14", + "tinypool": "^1.1.1", + "tinyrainbow": "^2.0.0", + "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0", + "vite-node": "3.2.4", + "why-is-node-running": "^2.3.0" + }, + "bin": { + "vitest": "vitest.mjs" + }, + "engines": { + "node": "^18.0.0 || ^20.0.0 || >=22.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + }, + "peerDependencies": { + "@edge-runtime/vm": "*", + "@types/debug": "^4.1.12", + "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", + "@vitest/browser": "3.2.4", + "@vitest/ui": "3.2.4", + "happy-dom": "*", + "jsdom": "*" + }, + "peerDependenciesMeta": { + "@edge-runtime/vm": { + "optional": true + }, + "@types/debug": { + "optional": true + }, + "@types/node": { + "optional": true + }, + "@vitest/browser": { + "optional": true + }, + "@vitest/ui": { + "optional": true + }, + "happy-dom": { + "optional": true + }, + "jsdom": { + "optional": true + } + } + }, + "node_modules/vitest/node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/vitest/node_modules/magic-string": { + "version": "0.30.21", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz", + "integrity": "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.5" + } + }, + "node_modules/vitest/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true, + "license": "MIT" + }, + "node_modules/vitest/node_modules/picomatch": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/w3c-xmlserializer": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz", + "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==", + "dev": true, + "license": "MIT", + "dependencies": { + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/web-streams-polyfill": { + "version": "3.2.1", + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, + "node_modules/webidl-conversions": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-8.0.1.tgz", + "integrity": "sha512-BMhLD/Sw+GbJC21C/UgyaZX41nPt8bUTg+jWyDeg7e7YN4xOM05YPSIXceACnXVtqyEw/LMClUQMtMZ+PGGpqQ==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=20" + } + }, + "node_modules/whatwg-encoding": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", + "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", + "deprecated": "Use @exodus/bytes instead for a more spec-conformant and faster implementation", + "dev": true, + "license": "MIT", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-mimetype": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", + "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-url": { + "version": "15.1.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-15.1.0.tgz", + "integrity": "sha512-2ytDk0kiEj/yu90JOAp44PVPUkO9+jVhyf+SybKlRHSDlvOOZhdPIrr7xTH64l4WixO2cP+wQIcgujkGBPPz6g==", + "dev": true, + "license": "MIT", + "dependencies": { + "tr46": "^6.0.0", + "webidl-conversions": "^8.0.0" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/which-boxed-primitive": { + "version": "1.0.2", + "license": "MIT", + "dependencies": { + "is-bigint": "^1.0.1", + "is-boolean-object": "^1.1.0", + "is-number-object": "^1.0.4", + "is-string": "^1.0.5", + "is-symbol": "^1.0.3" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/which-collection": { + "version": "1.0.1", + "license": "MIT", + "dependencies": { + "is-map": "^2.0.1", + "is-set": "^2.0.1", + "is-weakmap": "^2.0.1", + "is-weakset": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/which-typed-array": { + "version": "1.1.9", + "license": "MIT", + "dependencies": { + "available-typed-arrays": "^1.0.5", + "call-bind": "^1.0.2", + "for-each": "^0.3.3", + "gopd": "^1.0.1", + "has-tostringtag": "^1.0.0", + "is-typed-array": "^1.1.10" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/why-is-node-running": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/why-is-node-running/-/why-is-node-running-2.3.0.tgz", + "integrity": "sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==", + "dev": true, + "license": "MIT", + "dependencies": { + "siginfo": "^2.0.0", + "stackback": "0.0.2" + }, + "bin": { + "why-is-node-running": "cli.js" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/wide-align": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/wide-align/-/wide-align-1.1.5.tgz", + "integrity": "sha512-eDMORYaPNZ4sQIuuYPDHdQvf4gyCF9rEEV/yPxGfwPkRodwEgiMUUXTx/dex+Me0wxx53S+NgUHaP7y3MGlDmg==", + "license": "ISC", + "optional": true, + "dependencies": { + "string-width": "^1.0.2 || 2 || 3 || 4" + } + }, + "node_modules/wordwrap": { + "version": "1.0.0", + "license": "MIT" + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "license": "ISC", + "optional": true + }, + "node_modules/ws": { + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz", + "integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, + "node_modules/xml-name-validator": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz", + "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18" + } + }, + "node_modules/xmlchars": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz", + "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==", + "dev": true, + "license": "MIT" + }, + "node_modules/xmlhttprequest-ssl": { + "version": "2.0.0", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/xtend": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", + "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==", + "license": "MIT", + "engines": { + "node": ">=0.4" + } + }, + "node_modules/yallist": { + "version": "3.1.1", + "dev": true, + "license": "ISC" + }, + "node_modules/yaml": { + "version": "2.8.3", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.3.tgz", + "integrity": "sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==", + "dev": true, + "license": "ISC", + "optional": true, + "peer": true, + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14.6" + }, + "funding": { + "url": "https://github.com/sponsors/eemeli" + } + }, + "node_modules/zustand": { + "version": "4.3.8", + "license": "MIT", + "dependencies": { + "use-sync-external-store": "1.2.0" + }, + "engines": { + "node": ">=12.7.0" + }, + "peerDependencies": { + "immer": ">=9.0", + "react": ">=16.8" + }, + "peerDependenciesMeta": { + "immer": { + "optional": true + }, + "react": { + "optional": true + } + } + }, + "node_modules/zwitch": { + "version": "2.0.4", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + } + } +} diff --git a/frontend/package.json b/frontend/package.json index fd14ebeda4..83da2a7923 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -51,7 +51,6 @@ "react-router-dom": "^6.11.2", "react-social-login-buttons": "^3.9.1", "react-syntax-highlighter": "^15.6.1", - "recharts": "^3.5.1", "remark-gfm": "^3.0.1", "socket.io-client": "^4.7.2", @@ -89,6 +88,8 @@ "@biomejs/biome": "^2.3.13", "@vitejs/plugin-react": "^4.4.0", "baseline-browser-mapping": "^2.9.19", + "happy-dom": "^20.8.8", + "jsdom": "^27.0.1", "vite": "^7.0.0", "vite-plugin-svgr": "^4.5.0", "vitest": "^3.2.0" diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index f534442a8e..2a21fd639c 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -49,7 +49,9 @@ function App() { ); useEffect(() => { - if (!alertDetails?.content) return; + if (!alertDetails?.content) { + return; + } notificationAPI.open({ message: alertDetails?.title, diff --git a/frontend/src/assets/index.js b/frontend/src/assets/index.js index 5817b7faa7..16b7410192 100644 --- a/frontend/src/assets/index.js +++ b/frontend/src/assets/index.js @@ -37,45 +37,44 @@ import UnstractBlackLogo from "./UnstractLogoBlack.svg?react"; import Workflows from "./Workflows.svg?react"; import YellowGradCircle from "./yellow-grad-circle.svg?react"; +export { default as OrganizationIcon } from "./organization-icon.svg?react"; export { - SunIcon, - MoonIcon, - Logo64, - Logo24, - Document, - Folder, - BingAds, - ToolIcon, - InputPlaceholder, - OutputPlaceholder, - ToolIdeInputDocPlaceholder, - ToolIdePromptsPlaceholder, - UnstractLogo, - ListOfWfStepsPlaceholder, - ListOfToolsPlaceholder, ApiDeployments, - Workflows, - StepIcon, - EmptyPlaceholder, + BingAds, CombinedOutputIcon, + CustomToolIcon, Desktop, - ReachOut, - RequireDemoIcon, + Document, + EmptyPlaceholder, + ETLIcon, + ExportToolIcon, + Folder, + InputPlaceholder, LearnMore, - UnstractBlackLogo, - SquareBg, - TrialDoc, - TextExtractorIcon, + ListOfToolsPlaceholder, + ListOfWfStepsPlaceholder, + Logo24, + Logo64, + MoonIcon, OcrIcon, OrgAvatar, OrgSelection, - RedGradCircle, - YellowGradCircle, - ExportToolIcon, + OutputPlaceholder, PlaceholderImg, - CustomToolIcon, - ETLIcon, + ReachOut, + RedGradCircle, + RequireDemoIcon, + SquareBg, + StepIcon, + SunIcon, Task, + TextExtractorIcon, + ToolIcon, + ToolIdeInputDocPlaceholder, + ToolIdePromptsPlaceholder, + TrialDoc, + UnstractBlackLogo, + UnstractLogo, + Workflows, + YellowGradCircle, }; - -export { default as OrganizationIcon } from "./organization-icon.svg?react"; diff --git a/frontend/src/components/agency/agency/Agency.jsx b/frontend/src/components/agency/agency/Agency.jsx index 966b2e4fd0..de90e8d136 100644 --- a/frontend/src/components/agency/agency/Agency.jsx +++ b/frontend/src/components/agency/agency/Agency.jsx @@ -368,7 +368,7 @@ function Agency() { if (!signal?.aborted) { setDeploymentInfo(deploymentInfo); } - } catch (err) { + } catch (_err) { // Don't show alert for this as it's not critical // Also check if error is due to abort if (signal?.aborted) { @@ -433,7 +433,7 @@ function Agency() { info: `Clicked on 'Deploy as ${deployType}' button`, workflow_name: projectName, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; @@ -869,7 +869,7 @@ function Agency() { info: "Clicked on 'Run Workflow' button (Normal Execution)", }); } - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } const workflowId = details?.id; @@ -893,7 +893,9 @@ function Agency() { body["execution_action"] = wfExecutionTypes[executionAction]; handleWfExecutionApi(body) - .then(() => {}) + .then(() => { + // Intentionally empty: fire-and-forget + }) .catch((err) => { setAlertDetails(handleException(err)); }); diff --git a/frontend/src/components/agency/cards-list/CardsList.jsx b/frontend/src/components/agency/cards-list/CardsList.jsx index 4940bc5b75..f0b7ab8e05 100644 --- a/frontend/src/components/agency/cards-list/CardsList.jsx +++ b/frontend/src/components/agency/cards-list/CardsList.jsx @@ -184,4 +184,5 @@ CardsList.propTypes = { activeTool: PropTypes.string.isRequired, moveItem: PropTypes.func.isRequired, }; + export { CardsList }; diff --git a/frontend/src/components/agency/configure-connector-modal/ConfigureConnectorModal.jsx b/frontend/src/components/agency/configure-connector-modal/ConfigureConnectorModal.jsx index c55f3cdf4e..026c9919cb 100644 --- a/frontend/src/components/agency/configure-connector-modal/ConfigureConnectorModal.jsx +++ b/frontend/src/components/agency/configure-connector-modal/ConfigureConnectorModal.jsx @@ -199,7 +199,7 @@ function ConfigureConnectorModal({ connector_name: selectedConnector.connector.connector_name, }, ); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } } @@ -232,7 +232,9 @@ function ConfigureConnectorModal({ }; const handleAddFolder = () => { - if (!selectedFolderPath) return; + if (!selectedFolderPath) { + return; + } // HACK: For GDrive connectors, strip the "root/" prefix to avoid duplication // since backend will add it back during execution. This helps avoid a migration @@ -482,7 +484,9 @@ function ConfigureConnectorModal({ // Helper function to render connector label const renderConnectorLabel = (connDetails, availableConnectors) => { - if (!connDetails?.id) return undefined; + if (!connDetails?.id) { + return undefined; + } const selectedConnector = availableConnectors.find( (conn) => conn.value === connDetails.id, @@ -736,7 +740,9 @@ function ConfigureConnectorModal({ connectorMode={connMode} addNewItem={handleConnectorCreated} editItemId={null} - setEditItemId={() => {}} + setEditItemId={() => { + // No-op: editing not supported from this modal + }} /> )} diff --git a/frontend/src/components/agency/markdown-renderer/MarkdownRenderer.jsx b/frontend/src/components/agency/markdown-renderer/MarkdownRenderer.jsx index 7a6bbee0bb..20655d3d75 100644 --- a/frontend/src/components/agency/markdown-renderer/MarkdownRenderer.jsx +++ b/frontend/src/components/agency/markdown-renderer/MarkdownRenderer.jsx @@ -4,7 +4,9 @@ import ReactMarkdown from "react-markdown"; import remarkGfm from "remark-gfm"; const MarkdownRenderer = memo(({ markdownText }) => { - if (!markdownText) return null; + if (!markdownText) { + return null; + } return ( {markdownText} diff --git a/frontend/src/components/common/PromptStudioModal.jsx b/frontend/src/components/common/PromptStudioModal.jsx index 606ac0b7bc..503399ec0d 100644 --- a/frontend/src/components/common/PromptStudioModal.jsx +++ b/frontend/src/components/common/PromptStudioModal.jsx @@ -10,7 +10,9 @@ export function PromptStudioModal({ onClose, showModal }) { const { sessionDetails } = useSessionStore(); const handleClose = () => { - if (onClose) onClose(); + if (onClose) { + onClose(); + } }; const handleCreateClick = () => { diff --git a/frontend/src/components/custom-tools/add-llm-profile/AddLlmProfile.jsx b/frontend/src/components/custom-tools/add-llm-profile/AddLlmProfile.jsx index 4cfe0c2aba..a020540b26 100644 --- a/frontend/src/components/custom-tools/add-llm-profile/AddLlmProfile.jsx +++ b/frontend/src/components/custom-tools/add-llm-profile/AddLlmProfile.jsx @@ -330,7 +330,7 @@ function AddLlmProfile({ setPostHogCustomEvent("intent_success_ps_new_llm_profile", { info: "Clicked on 'Add' button", }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } } diff --git a/frontend/src/components/custom-tools/combined-output/CombinedOutput.jsx b/frontend/src/components/custom-tools/combined-output/CombinedOutput.jsx index 2f4ea38825..3036e7b9d6 100644 --- a/frontend/src/components/custom-tools/combined-output/CombinedOutput.jsx +++ b/frontend/src/components/custom-tools/combined-output/CombinedOutput.jsx @@ -74,7 +74,9 @@ function CombinedOutput({ docId, setFilledFields, selectedPrompts }) { const handleException = useExceptionHandler(); useEffect(() => { - if (isSimplePromptStudio) return; + if (isSimplePromptStudio) { + return; + } const fetchAdapterInfo = async () => { let url = `/api/v1/unstract/${sessionDetails?.orgId}/adapter/?adapter_type=LLM`; @@ -101,7 +103,9 @@ function CombinedOutput({ docId, setFilledFields, selectedPrompts }) { }, [singlePassExtractMode]); useEffect(() => { - if (!docId || isSinglePassExtractLoading) return; + if (!docId || isSinglePassExtractLoading) { + return; + } const fetchCombinedOutput = async () => { setIsOutputLoading(true); diff --git a/frontend/src/components/custom-tools/custom-data-settings/CustomDataSettings.jsx b/frontend/src/components/custom-tools/custom-data-settings/CustomDataSettings.jsx index b0c9842a32..6550b98fe2 100644 --- a/frontend/src/components/custom-tools/custom-data-settings/CustomDataSettings.jsx +++ b/frontend/src/components/custom-tools/custom-data-settings/CustomDataSettings.jsx @@ -22,7 +22,9 @@ const CUSTOM_DATA_VARIABLE_REGEX = /\{\{custom_data\.([a-zA-Z0-9_.]+)\}\}/g; // Helper function to extract all custom_data variables from text const extractCustomDataVariables = (text) => { const variables = []; - if (!text) return variables; + if (!text) { + return variables; + } const matches = text.matchAll(CUSTOM_DATA_VARIABLE_REGEX); for (const match of matches) { @@ -87,7 +89,9 @@ function CustomDataSettings() { ) { const promptText = prompt?.prompt || ""; const foundVariables = extractCustomDataVariables(promptText); - foundVariables.forEach((v) => variables.add(v)); + foundVariables.forEach((v) => { + variables.add(v); + }); } }); diff --git a/frontend/src/components/custom-tools/document-parser/DocumentParser.jsx b/frontend/src/components/custom-tools/document-parser/DocumentParser.jsx index 3e633cba81..3d5c891e13 100644 --- a/frontend/src/components/custom-tools/document-parser/DocumentParser.jsx +++ b/frontend/src/components/custom-tools/document-parser/DocumentParser.jsx @@ -211,7 +211,9 @@ function DocumentParser({ const getPromptOutputs = (promptId) => { const keys = Object.keys(promptOutputs || {}); - if (!keys?.length) return {}; + if (!keys?.length) { + return {}; + } const outputs = {}; keys.forEach((key) => { diff --git a/frontend/src/components/custom-tools/header-title/HeaderTitle.jsx b/frontend/src/components/custom-tools/header-title/HeaderTitle.jsx index 03b84b4a7e..9b8dc710fe 100644 --- a/frontend/src/components/custom-tools/header-title/HeaderTitle.jsx +++ b/frontend/src/components/custom-tools/header-title/HeaderTitle.jsx @@ -31,4 +31,5 @@ function HeaderTitle() { ); } + export { HeaderTitle }; diff --git a/frontend/src/components/custom-tools/header/Header.jsx b/frontend/src/components/custom-tools/header/Header.jsx index 84718ee876..221b7f0fe5 100644 --- a/frontend/src/components/custom-tools/header/Header.jsx +++ b/frontend/src/components/custom-tools/header/Header.jsx @@ -133,7 +133,7 @@ function Header({ info: `Clicked on the 'Export' button`, tool_name: details?.tool_name, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } @@ -199,7 +199,7 @@ function Header({ tool_id: details?.tool_id, tool_name: details?.tool_name, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } @@ -260,7 +260,7 @@ function Header({ tool_id: details?.tool_id, tool_name: details?.tool_name, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } diff --git a/frontend/src/components/custom-tools/import-tool/ImportTool.jsx b/frontend/src/components/custom-tools/import-tool/ImportTool.jsx index 7773c00ea6..e0d972cd99 100644 --- a/frontend/src/components/custom-tools/import-tool/ImportTool.jsx +++ b/frontend/src/components/custom-tools/import-tool/ImportTool.jsx @@ -48,7 +48,7 @@ function ImportTool({ open, setOpen, onImport, loading }) { setProjectData(projectData); setShowAdapterSelection(true); setParseLoading(false); - } catch (error) { + } catch (_error) { message.error("Invalid JSON file"); setParseLoading(false); } diff --git a/frontend/src/components/custom-tools/list-of-tools/ListOfTools.jsx b/frontend/src/components/custom-tools/list-of-tools/ListOfTools.jsx index ae8164e379..ad41967e56 100644 --- a/frontend/src/components/custom-tools/list-of-tools/ListOfTools.jsx +++ b/frontend/src/components/custom-tools/list-of-tools/ListOfTools.jsx @@ -208,7 +208,7 @@ function ListOfTools() { setPostHogCustomEvent("intent_new_ps_project", { info: "Clicked on '+ New Project' button", }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; @@ -219,7 +219,7 @@ function ListOfTools() { info: "Importing project from projects list", file_name: file.name, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } diff --git a/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx b/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx index efd5285107..0c640ebaa2 100644 --- a/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx +++ b/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx @@ -219,9 +219,13 @@ function ManageDocsModal({ newMessages = newMessages.slice(0, lastIndex); } - // Filter only INFO and ERROR logs + // Filter only INFO and ERROR logs that are NOT from answer_prompt. + // Answer prompt messages carry a prompt_key in their component; + // indexing messages do not. newMessages = newMessages.filter( - (item) => item?.level === "INFO" || item?.level === "ERROR", + (item) => + (item?.level === "INFO" || item?.level === "ERROR") && + !item?.component?.prompt_key, ); // If there are no new INFO or ERROR messages, return early @@ -821,4 +825,5 @@ ManageDocsModal.propTypes = { handleUpdateTool: PropTypes.func.isRequired, handleDocChange: PropTypes.func.isRequired, }; + export { ManageDocsModal }; diff --git a/frontend/src/components/custom-tools/manage-llm-profiles/ManageLlmProfiles.jsx b/frontend/src/components/custom-tools/manage-llm-profiles/ManageLlmProfiles.jsx index aab5e8558e..3bb80d6cb2 100644 --- a/frontend/src/components/custom-tools/manage-llm-profiles/ManageLlmProfiles.jsx +++ b/frontend/src/components/custom-tools/manage-llm-profiles/ManageLlmProfiles.jsx @@ -77,7 +77,7 @@ function ManageLlmProfiles() { setPostHogCustomEvent("ps_profile_changed_per_prompt", { info: "Selected default LLM profile", }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } @@ -172,7 +172,7 @@ function ManageLlmProfiles() { setPostHogCustomEvent("intent_ps_new_llm_profile", { info: "Clicked on 'Add New LLM Profile' button", }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; diff --git a/frontend/src/components/custom-tools/notes-card/NotesCard.jsx b/frontend/src/components/custom-tools/notes-card/NotesCard.jsx index 38a03f0a9c..e2ddaa4992 100644 --- a/frontend/src/components/custom-tools/notes-card/NotesCard.jsx +++ b/frontend/src/components/custom-tools/notes-card/NotesCard.jsx @@ -38,8 +38,9 @@ function NotesCard({ if ( isPromptDetailsStateUpdated || !Object.keys(promptDetails || {})?.length - ) + ) { return; + } setPromptDetailsState(promptDetails); setIsPromptDetailsStateUpdated(true); }, [promptDetails]); diff --git a/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzer.jsx b/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzer.jsx index 334391d2c3..7922a1a0fe 100644 --- a/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzer.jsx +++ b/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzer.jsx @@ -48,7 +48,9 @@ function OutputAnalyzer() { }, []); const currentDoc = useMemo(() => { - if (currentDocIndex === -1) return null; + if (currentDocIndex === -1) { + return null; + } return listOfDocs[currentDocIndex]; }, [listOfDocs, currentDocIndex]); diff --git a/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzerCard.jsx b/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzerCard.jsx index 727701139f..814a317b35 100644 --- a/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzerCard.jsx +++ b/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzerCard.jsx @@ -37,7 +37,9 @@ function OutputAnalyzerCard({ doc, selectedPrompts, totalFields }) { // Memoize the file URL endpoint to prevent unnecessary recalculations const fileUrlEndpoint = useMemo(() => { - if (!doc) return null; + if (!doc) { + return null; + } if (isPublicSource) { return publicDocumentApi?.(id, doc.document_id, null); @@ -72,7 +74,9 @@ function OutputAnalyzerCard({ doc, selectedPrompts, totalFields }) { // Calculate fill rate const fillRate = useMemo(() => { - if (totalFields === 0) return "0"; + if (totalFields === 0) { + return "0"; + } return ((filledFields / totalFields) * 100).toFixed(2); }, [filledFields, totalFields]); diff --git a/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.jsx b/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.jsx index dff233f5bc..4933eb40dd 100644 --- a/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.jsx +++ b/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.jsx @@ -24,7 +24,10 @@ function DisplayPromptResult({ confidenceData, wordConfidenceData, isTable = false, - setOpenExpandModal = () => {}, + setOpenExpandModal = () => { + // No-op default + }, + progressMsg, }) { const [isLoading, setIsLoading] = useState(false); const [parsedOutput, setParsedOutput] = useState(null); @@ -66,10 +69,22 @@ function DisplayPromptResult({ ]); if (isLoading) { - return } />; + return ( +
+ } /> + {progressMsg?.message && ( + + {progressMsg.message} + + )} +
+ ); } - if (output === undefined) { + if (output === undefined || output === null) { return ( @@ -82,7 +97,9 @@ function DisplayPromptResult({ // Extract confidence from 5th element of highlight data coordinate arrays const extractConfidenceFromHighlightData = (data) => { - if (!data) return null; + if (!data) { + return null; + } const confidenceValues = []; @@ -134,11 +151,15 @@ function DisplayPromptResult({ details?.enable_highlight && details?.enable_word_confidence; const getNestedValue = (obj, path) => { - if (!obj || !path) return undefined; + if (!obj || !path) { + return undefined; + } const normalized = path.replace(/\[(\d+)\]/g, ".$1"); const parts = normalized.split(".").filter((p) => p !== ""); return parts.reduce((acc, part) => { - if (acc === undefined || acc === null) return undefined; + if (acc === undefined || acc === null) { + return undefined; + } const maybeIndex = /^\d+$/.test(part) ? Number(part) : part; return acc[maybeIndex]; }, obj); @@ -427,6 +448,7 @@ DisplayPromptResult.propTypes = { wordConfidenceData: PropTypes.object, isTable: PropTypes.bool, setOpenExpandModal: PropTypes.func, + progressMsg: PropTypes.object, }; export { DisplayPromptResult }; diff --git a/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.test.jsx b/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.test.jsx new file mode 100644 index 0000000000..71c32644e7 --- /dev/null +++ b/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.test.jsx @@ -0,0 +1,67 @@ +import { render, screen } from "@testing-library/react"; +import React from "react"; +import { describe, expect, it, vi } from "vitest"; + +// Mock dependencies before importing the component +vi.mock("../../../store/custom-tool-store", () => ({ + useCustomToolStore: () => ({ + singlePassExtractMode: false, + isSinglePassExtractLoading: false, + details: {}, + selectedHighlight: null, + }), +})); + +vi.mock("../../../helpers/GetStaticData", () => ({ + displayPromptResult: (output) => output, + generateApiRunStatusId: () => "key", + generateUUID: () => "uuid-stub", + PROMPT_RUN_API_STATUSES: { RUNNING: "RUNNING" }, +})); + +vi.mock("./PromptCard.css", () => ({})); + +vi.mock("../../widgets/spinner-loader/SpinnerLoader", () => ({ + SpinnerLoader: () => React.createElement("div", { "data-testid": "spinner" }), +})); + +import { DisplayPromptResult } from "./DisplayPromptResult"; + +const baseProps = { + profileId: "profile-1", + docId: "doc-1", + promptRunStatus: {}, + handleSelectHighlight: vi.fn(), + highlightData: null, + promptDetails: { prompt_id: "p1" }, + confidenceData: null, + wordConfidenceData: null, + progressMsg: null, +}; + +describe("DisplayPromptResult null/undefined guard", () => { + it("shows 'Yet to run' when output is null", () => { + render(); + expect(screen.getByText(/Yet to run/)).toBeInTheDocument(); + }); + + it("shows 'Yet to run' when output is undefined", () => { + render(); + expect(screen.getByText(/Yet to run/)).toBeInTheDocument(); + }); + + it("does NOT show 'Yet to run' for a string output", () => { + render(); + expect(screen.queryByText(/Yet to run/)).not.toBeInTheDocument(); + }); + + it("does NOT show 'Yet to run' for an empty string output", () => { + render(); + expect(screen.queryByText(/Yet to run/)).not.toBeInTheDocument(); + }); + + it("does NOT show 'Yet to run' for numeric zero output", () => { + render(); + expect(screen.queryByText(/Yet to run/)).not.toBeInTheDocument(); + }); +}); diff --git a/frontend/src/components/custom-tools/prompt-card/Header.jsx b/frontend/src/components/custom-tools/prompt-card/Header.jsx index 45f444d729..2b6d6b99c1 100644 --- a/frontend/src/components/custom-tools/prompt-card/Header.jsx +++ b/frontend/src/components/custom-tools/prompt-card/Header.jsx @@ -398,7 +398,9 @@ function Header({ {}} + handleGetOutput={() => { + // No-op: output fetching handled elsewhere + }} promptDetails={promptDetails} /> )} diff --git a/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx b/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx index a1d59fa114..0a3dbc891f 100644 --- a/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx +++ b/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx @@ -83,14 +83,18 @@ function OutputForIndex({ chunkData, setIsIndexOpen, isIndexOpen }) { }; const renderHighlightedLine = (line, lineIndex, chunkIndex) => { - if (!searchTerm) return line; + if (!searchTerm) { + return line; + } const matchesInLine = highlightedChunks.filter( (chunk) => chunk.lineIndex === lineIndex && chunk.chunkIndex === chunkIndex, ); - if (!matchesInLine?.length) return line; + if (!matchesInLine?.length) { + return line; + } const parts = []; let lastIndex = 0; diff --git a/frontend/src/components/custom-tools/prompt-card/PromptCard.css b/frontend/src/components/custom-tools/prompt-card/PromptCard.css index f5b4a66a04..72ba8bcb70 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptCard.css +++ b/frontend/src/components/custom-tools/prompt-card/PromptCard.css @@ -325,3 +325,17 @@ .prompt-output-result { font-size: 12px; } + +.prompt-loading-container { + display: flex; + align-items: center; + gap: 8px; +} + +.prompt-progress-msg { + font-size: 12px; + max-width: 300px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} diff --git a/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx b/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx index a4188ee4c0..2e579108c2 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx +++ b/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx @@ -61,12 +61,28 @@ const PromptCard = memo( if ( isPromptDetailsStateUpdated || !Object.keys(promptDetails || {})?.length - ) + ) { return; + } setPromptDetailsState(promptDetails); setIsPromptDetailsStateUpdated(true); }, [promptDetails]); + // Initialize promptKey from props so the message filter can match + // per-prompt PROGRESS messages (executor sends component.prompt_key). + useEffect(() => { + if (promptDetailsState?.prompt_key && !promptKey) { + setPromptKey(promptDetailsState.prompt_key); + } + }, [promptDetailsState?.prompt_key]); + + // Clear stale progress text when execution finishes. + useEffect(() => { + if (!isCoverageLoading) { + setProgressMsg({}); + } + }, [isCoverageLoading]); + useEffect(() => { // Find the latest message that matches the criteria const msg = [...messages] @@ -74,7 +90,8 @@ const PromptCard = memo( .find( (item) => (item?.component?.prompt_id === promptDetailsState?.prompt_id || - item?.component?.prompt_key === promptKey) && + item?.component?.prompt_key === promptKey || + item?.component?.tool_id === details?.tool_id) && (item?.level === "INFO" || item?.level === "ERROR"), ); @@ -88,7 +105,7 @@ const PromptCard = memo( message: msg?.message || "", level: msg?.level || "INFO", }); - }, [messages]); + }, [messages, promptDetailsState?.prompt_id, promptKey, details?.tool_id]); useEffect(() => { setSelectedLlmProfileId( @@ -179,17 +196,17 @@ const PromptCard = memo( const processNestedArray = (nestedValue, flattened) => { if (Array.isArray(nestedValue)) { - nestedValue.forEach((coords) => - addCoordsToFlattened(coords, flattened), - ); + nestedValue.forEach((coords) => { + addCoordsToFlattened(coords, flattened); + }); } }; const processObjectValues = (item, flattened) => { if (typeof item === "object" && !Array.isArray(item)) { - Object.values(item).forEach((value) => - processNestedArray(value, flattened), - ); + Object.values(item).forEach((value) => { + processNestedArray(value, flattened); + }); } }; @@ -202,12 +219,16 @@ const PromptCard = memo( }; const flattenHighlightData = (data) => { - if (!data || typeof data !== "object") return data; + if (!data || typeof data !== "object") { + return data; + } const flattened = []; Object.values(data).forEach((value) => { if (Array.isArray(value)) { - value.forEach((item) => processArrayItem(item, flattened)); + value.forEach((item) => { + processArrayItem(item, flattened); + }); } }); return flattened; @@ -255,8 +276,8 @@ const PromptCard = memo( setPostHogCustomEvent("ps_prompt_run", { info: "Click on 'Run Prompt' button (Multi Pass)", }); - } catch (err) { - // If an error occurs while setting custom posthog event, ignore it and continue + } catch { + // Analytics failure should not block prompt execution } const validateInputs = () => { diff --git a/frontend/src/components/custom-tools/prompt-card/PromptCardItems.jsx b/frontend/src/components/custom-tools/prompt-card/PromptCardItems.jsx index 67a19905bd..90f4936a3a 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptCardItems.jsx +++ b/frontend/src/components/custom-tools/prompt-card/PromptCardItems.jsx @@ -113,7 +113,9 @@ function PromptCardItems({ if (adapter) { result.conf[key.label] = adapter?.model || adapter?.adapter_id?.split("|")[0]; - if (adapter?.adapter_type === "LLM") result.icon = adapter?.icon; + if (adapter?.adapter_type === "LLM") { + result.icon = adapter?.icon; + } result.conf["Profile Name"] = profile?.profile_name; } }); @@ -163,8 +165,12 @@ function PromptCardItems({ isDefault: profile?.profile_id === selectedLlmProfileId, })) .sort((a, b) => { - if (a?.isDefault) return -1; // Default profile comes first - if (b?.isDefault) return 1; + if (a?.isDefault) { + return -1; // Default profile comes first + } + if (b?.isDefault) { + return 1; + } return 0; }), ); @@ -317,6 +323,7 @@ function PromptCardItems({ promptRunStatus={promptRunStatus} isChallenge={isChallenge} handleSelectHighlight={handleSelectHighlight} + progressMsg={progressMsg} /> diff --git a/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx b/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx index 0e3e350b46..88df329744 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx +++ b/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx @@ -66,6 +66,7 @@ function PromptOutput({ promptRunStatus, isChallenge, handleSelectHighlight, + progressMsg, }) { const [openExpandModal, setOpenExpandModal] = useState(false); const { width: windowWidth } = useWindowDimensions(); @@ -94,9 +95,7 @@ function PromptOutput({ ); const handleTable = (profileId, promptOutputData) => { - if (tableSettings?.document_type !== "rent_rolls") - return ; - else + if (tableSettings?.document_type === "rent_rolls") { return ( <>
); + } + return ; }; const getColSpan = () => (componentWidth < 1200 ? 24 : 6); @@ -203,6 +205,7 @@ function PromptOutput({ wordConfidenceData={ promptOutputData?.wordConfidenceData?.[promptDetails.prompt_key] } + progressMsg={progressMsg} />
state.setPromptRunQueue, ); const { runPrompt, syncPromptRunApisAndStatus } = usePromptRun(); + usePromptStudioSocket(); const promptRunStatus = usePromptRunStatusStore( (state) => state.promptRunStatus, ); @@ -37,7 +39,9 @@ function PromptRun() { // Setup the beforeunload event handler to store queue in cookies const handleBeforeUnload = () => { - if (!PROMPT_RUN_STATE_PERSISTENCE) return; + if (!PROMPT_RUN_STATE_PERSISTENCE) { + return; + } const { queue } = usePromptRunQueueStore.getState(); // Get the latest state dynamically if (queue?.length) { Cookies.set("promptRunQueue", JSON.stringify(queue), { @@ -54,7 +58,9 @@ function PromptRun() { }, [syncPromptRunApisAndStatus]); useEffect(() => { - if (!queue?.length || activeApis >= MAX_ACTIVE_APIS) return; + if (!queue?.length || activeApis >= MAX_ACTIVE_APIS) { + return; + } const canRunApis = MAX_ACTIVE_APIS - activeApis; const apisToRun = queue.slice(0, canRunApis); diff --git a/frontend/src/components/custom-tools/prompt-card/constants.js b/frontend/src/components/custom-tools/prompt-card/constants.js index 63b7f0db50..2ee8163c5f 100644 --- a/frontend/src/components/custom-tools/prompt-card/constants.js +++ b/frontend/src/components/custom-tools/prompt-card/constants.js @@ -11,4 +11,4 @@ const handleUpdateStatus = (isUpdate, promptId, value, setUpdateStatus) => { }); }; -export { handleUpdateStatus, TABLE, LINE_ITEM_ENFORCE_TYPE }; +export { handleUpdateStatus, LINE_ITEM_ENFORCE_TYPE, TABLE }; diff --git a/frontend/src/components/custom-tools/prompts-reorder/DraggablePrompt.jsx b/frontend/src/components/custom-tools/prompts-reorder/DraggablePrompt.jsx index c845217c08..e0a8ddea0d 100644 --- a/frontend/src/components/custom-tools/prompts-reorder/DraggablePrompt.jsx +++ b/frontend/src/components/custom-tools/prompts-reorder/DraggablePrompt.jsx @@ -14,12 +14,16 @@ function DraggablePrompt({ prompt, index, movePrompt, onDrop, cancelDrag }) { const [{ handlerId }, drop] = useDrop({ accept: ItemTypes.PROMPT, hover: (item, monitor) => { - if (!ref.current) return; + if (!ref.current) { + return; + } const dragIndex = item.index; const hoverIndex = index; - if (dragIndex === hoverIndex) return; + if (dragIndex === hoverIndex) { + return; + } // Move the item visually during drag movePrompt(dragIndex, hoverIndex); diff --git a/frontend/src/components/custom-tools/prompts-reorder/PromptsReorder.jsx b/frontend/src/components/custom-tools/prompts-reorder/PromptsReorder.jsx index d2e2d24a73..5947453ce2 100644 --- a/frontend/src/components/custom-tools/prompts-reorder/PromptsReorder.jsx +++ b/frontend/src/components/custom-tools/prompts-reorder/PromptsReorder.jsx @@ -39,7 +39,9 @@ function PromptsReorder({ isOpen, updateReorderedStatus }) { const movePrompt = useCallback( (fromIndex, toIndex) => { - if (fromIndex === toIndex) return; + if (fromIndex === toIndex) { + return; + } // Store the previous state if not already stored if (!previousListOfPrompts.current?.length) { @@ -92,7 +94,9 @@ function PromptsReorder({ isOpen, updateReorderedStatus }) { const onDrop = useCallback( async (fromIndex, toIndex) => { - if (fromIndex === toIndex) return; + if (fromIndex === toIndex) { + return; + } updateReorderedStatus(true); diff --git a/frontend/src/components/custom-tools/retrieval-strategy-modal/RetrievalStrategyModal.jsx b/frontend/src/components/custom-tools/retrieval-strategy-modal/RetrievalStrategyModal.jsx index 3652b1ece1..1549b5c13c 100644 --- a/frontend/src/components/custom-tools/retrieval-strategy-modal/RetrievalStrategyModal.jsx +++ b/frontend/src/components/custom-tools/retrieval-strategy-modal/RetrievalStrategyModal.jsx @@ -87,26 +87,34 @@ const RetrievalStrategyModal = ({ }; const getTokenUsageClassName = (usage) => { - if (usage.includes("Low")) + if (usage.includes("Low")) { return "retrieval-strategy-modal__token-usage-low"; - if (usage.includes("Medium")) + } + if (usage.includes("Medium")) { return "retrieval-strategy-modal__token-usage-medium"; - if (usage.includes("Very High")) + } + if (usage.includes("Very High")) { return "retrieval-strategy-modal__token-usage-high"; - if (usage.includes("High")) + } + if (usage.includes("High")) { return "retrieval-strategy-modal__token-usage-high"; + } return ""; }; const getCostImpactClassName = (impact) => { - if (impact.includes("Low")) + if (impact.includes("Low")) { return "retrieval-strategy-modal__cost-impact-low"; - if (impact.includes("Medium")) + } + if (impact.includes("Medium")) { return "retrieval-strategy-modal__cost-impact-medium"; - if (impact.includes("Very High")) + } + if (impact.includes("Very High")) { return "retrieval-strategy-modal__cost-impact-high"; - if (impact.includes("High")) + } + if (impact.includes("High")) { return "retrieval-strategy-modal__cost-impact-high"; + } return ""; }; diff --git a/frontend/src/components/custom-tools/tool-ide/ToolIde.jsx b/frontend/src/components/custom-tools/tool-ide/ToolIde.jsx index 3b7fa94291..db17199928 100644 --- a/frontend/src/components/custom-tools/tool-ide/ToolIde.jsx +++ b/frontend/src/components/custom-tools/tool-ide/ToolIde.jsx @@ -221,7 +221,7 @@ function ToolIde() { info: "Exported from reminder bar", tool_name: details?.tool_name, }); - } catch (err) { + } catch (_err) { // Ignore posthog errors } } catch (err) { @@ -265,29 +265,14 @@ function ToolIde() { }; pushIndexDoc(docId); - return axiosPrivate(requestOptions) - .then(() => { - setAlertDetails({ - type: "success", - content: `${doc?.document_name} - Indexed successfully`, - }); - - try { - setPostHogCustomEvent("intent_success_ps_indexed_file", { - info: "Indexing completed", - }); - } catch (err) { - // If an error occurs while setting custom posthog event, ignore it and continue - } - }) - .catch((err) => { - setAlertDetails( - handleException(err, `${doc?.document_name} - Failed to index`), - ); - }) - .finally(() => { - deleteIndexDoc(docId); - }); + return axiosPrivate(requestOptions).catch((err) => { + // Only clear spinner on POST network failure (not 2xx). + // On success the spinner stays until a socket event arrives. + deleteIndexDoc(docId); + setAlertDetails( + handleException(err, `${doc?.document_name} - Failed to index`), + ); + }); }; const handleUpdateTool = async (body) => { diff --git a/frontend/src/components/custom-tools/tools-main/ToolsMain.jsx b/frontend/src/components/custom-tools/tools-main/ToolsMain.jsx index c9bab1b24f..dd8d75bd90 100644 --- a/frontend/src/components/custom-tools/tools-main/ToolsMain.jsx +++ b/frontend/src/components/custom-tools/tools-main/ToolsMain.jsx @@ -61,6 +61,10 @@ function ToolsMain() { ]; useEffect(() => { + const { isSinglePassExtractLoading } = useCustomToolStore.getState(); + if (isSinglePassExtractLoading) { + return; + } promptOutputApi( details?.tool_id, selectedDoc?.document_id, @@ -122,7 +126,8 @@ function ToolsMain() { info: `Clicked on + ${type} button`, }); } catch (err) { - // If an error occurs while setting custom posthog event, ignore it and continue + // PostHog analytics failure should not block the user action + console.error("PostHog event failed", err); } let body = {}; diff --git a/frontend/src/components/custom-tools/tools-main/ToolsMain.test.jsx b/frontend/src/components/custom-tools/tools-main/ToolsMain.test.jsx new file mode 100644 index 0000000000..14f7009ce6 --- /dev/null +++ b/frontend/src/components/custom-tools/tools-main/ToolsMain.test.jsx @@ -0,0 +1,20 @@ +import fs from "node:fs"; +import path from "node:path"; +import { describe, expect, it } from "vitest"; + +const source = fs.readFileSync( + path.resolve(__dirname, "ToolsMain.jsx"), + "utf-8", +); + +describe("ToolsMain source-text checks", () => { + it("reads isSinglePassExtractLoading via getState()", () => { + expect(source).toContain("useCustomToolStore.getState()"); + }); + + it("guards promptOutputApi behind isSinglePassExtractLoading check", () => { + expect(source).toContain( + "if (isSinglePassExtractLoading) {\n return;\n }", + ); + }); +}); diff --git a/frontend/src/components/custom-tools/tools-main/ToolsMainActionBtns.jsx b/frontend/src/components/custom-tools/tools-main/ToolsMainActionBtns.jsx index dd9cc1020a..c7053ac921 100644 --- a/frontend/src/components/custom-tools/tools-main/ToolsMainActionBtns.jsx +++ b/frontend/src/components/custom-tools/tools-main/ToolsMainActionBtns.jsx @@ -77,7 +77,7 @@ function ToolsMainActionBtns() { setPostHogCustomEvent("ps_output_analyser_seen", { info: "Clicked on 'Output Analyzer' button", }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom PostHog event, ignore it and continue } }, [navigate, setPostHogCustomEvent]); diff --git a/frontend/src/components/deployments/create-api-deployment-from-prompt-studio/CreateApiDeploymentFromPromptStudio.jsx b/frontend/src/components/deployments/create-api-deployment-from-prompt-studio/CreateApiDeploymentFromPromptStudio.jsx index 7ff2018cf6..9ce4dbc05b 100644 --- a/frontend/src/components/deployments/create-api-deployment-from-prompt-studio/CreateApiDeploymentFromPromptStudio.jsx +++ b/frontend/src/components/deployments/create-api-deployment-from-prompt-studio/CreateApiDeploymentFromPromptStudio.jsx @@ -95,7 +95,9 @@ const CreateApiDeploymentFromPromptStudio = ({ }, [open, toolDetails, form]); const fetchToolFunctionName = async () => { - if (!toolDetails?.tool_id) return; + if (!toolDetails?.tool_id) { + return; + } try { // Fetch tool list to find the function name for this tool_id @@ -311,16 +313,12 @@ const CreateApiDeploymentFromPromptStudio = ({ return; } - try { - setPostHogCustomEvent("intent_create_api_deployment_from_prompt_studio", { - info: "Creating API deployment from prompt studio", - tool_id: toolDetails?.tool_id, - tool_name: toolDetails?.tool_name, - deployment_name: deploymentDetails.api_name, - }); - } catch (err) { - // If an error occurs while setting custom posthog event, ignore it and continue - } + setPostHogCustomEvent("intent_create_api_deployment_from_prompt_studio", { + info: "Creating API deployment from prompt studio", + tool_id: toolDetails?.tool_id, + tool_name: toolDetails?.tool_name, + deployment_name: deploymentDetails.api_name, + }); setIsLoading(true); setBackendErrors(null); // Clear any previous errors @@ -563,7 +561,9 @@ const CreateApiDeploymentFromPromptStudio = ({ formData={toolSettings} setFormData={handleToolSettingsChange} isLoading={false} - validateAndSubmit={() => {}} + validateAndSubmit={() => { + // No-op: validation not required for read-only display + }} isStateUpdateRequired={true} /> ); diff --git a/frontend/src/components/helpers/auth/RequireAuth.js b/frontend/src/components/helpers/auth/RequireAuth.js index 22244f147f..a856c2e03d 100644 --- a/frontend/src/components/helpers/auth/RequireAuth.js +++ b/frontend/src/components/helpers/auth/RequireAuth.js @@ -42,7 +42,7 @@ const RequireAuth = () => { selectedProductStore.useSelectedProductStore( (state) => state?.selectedProduct, ) === "llm-whisperer"; - } catch (error) { + } catch (_error) { // Do nothing } try { @@ -50,7 +50,7 @@ const RequireAuth = () => { selectedProductStore.useSelectedProductStore( (state) => state?.selectedProduct, ) === "verticals"; - } catch (error) { + } catch (_error) { // Do nothing } diff --git a/frontend/src/components/helpers/auth/RequireGuest.js b/frontend/src/components/helpers/auth/RequireGuest.js index 7668d5a006..f0cec7ef7c 100644 --- a/frontend/src/components/helpers/auth/RequireGuest.js +++ b/frontend/src/components/helpers/auth/RequireGuest.js @@ -28,7 +28,7 @@ const RequireGuest = () => { selectedProductStore.useSelectedProductStore( (state) => state?.selectedProduct, ) === "llm-whisperer"; - } catch (error) { + } catch (_error) { // Do nothing } try { @@ -36,7 +36,7 @@ const RequireGuest = () => { selectedProductStore.useSelectedProductStore( (state) => state?.selectedProduct, ) === "verticals"; - } catch (error) { + } catch (_error) { // Do nothing } diff --git a/frontend/src/components/helpers/socket-messages/SocketMessages.js b/frontend/src/components/helpers/socket-messages/SocketMessages.js index 352eebd01e..bbaadbcbed 100644 --- a/frontend/src/components/helpers/socket-messages/SocketMessages.js +++ b/frontend/src/components/helpers/socket-messages/SocketMessages.js @@ -12,6 +12,7 @@ import { SocketContext } from "../../../helpers/SocketContext"; import { useExceptionHandler } from "../../../hooks/useExceptionHandler"; import { useAlertStore } from "../../../store/alert-store"; import { useSessionStore } from "../../../store/session-store"; +import { useSocketCustomToolStore } from "../../../store/socket-custom-tool"; import { useSocketLogsStore } from "../../../store/socket-logs-store"; import { useSocketMessagesStore } from "../../../store/socket-messages-store"; import { useUsageStore } from "../../../store/usage-store"; @@ -28,6 +29,7 @@ function SocketMessages() { setPointer, } = useSocketMessagesStore(); const { pushLogMessages } = useSocketLogsStore(); + const { updateCusToolMessages } = useSocketCustomToolStore(); const { sessionDetails } = useSessionStore(); const socket = useContext(SocketContext); const { setAlertDetails } = useAlertStore(); @@ -45,7 +47,9 @@ function SocketMessages() { const logMessagesThrottledUpdate = useMemo( () => throttle((logsBatch) => { - if (!logsBatch.length) return; + if (!logsBatch.length) { + return; + } pushLogMessages(logsBatch); logBufferRef.current = []; }, THROTTLE_DELAY), @@ -89,6 +93,8 @@ function SocketMessages() { pushStagedMessage(msg); } else if (msg?.type === "LOG" && msg?.service === "prompt") { handleLogMessages(msg); + } else if (msg?.type === "PROGRESS") { + updateCusToolMessages([msg]); } if (msg?.type === "LOG" && msg?.service === "usage") { @@ -102,12 +108,14 @@ function SocketMessages() { ); } }, - [handleLogMessages, pushStagedMessage], + [handleLogMessages, pushStagedMessage, updateCusToolMessages], ); // Subscribe/unsubscribe to the socket channel useEffect(() => { - if (!logId) return; + if (!logId) { + return; + } const channel = `logs:${logId}`; socket?.on(channel, onMessage); @@ -118,7 +126,9 @@ function SocketMessages() { // Process staged messages sequentially useEffect(() => { - if (pointer > stagedMessages?.length - 1) return; + if (pointer > stagedMessages?.length - 1) { + return; + } const stagedMsg = stagedMessages[pointer]; const timer = setTimeout(() => { diff --git a/frontend/src/components/input-output/add-source/AddSource.jsx b/frontend/src/components/input-output/add-source/AddSource.jsx index 8f0f695c86..9cdd757b8b 100644 --- a/frontend/src/components/input-output/add-source/AddSource.jsx +++ b/frontend/src/components/input-output/add-source/AddSource.jsx @@ -87,7 +87,9 @@ function AddSource({ ]); useEffect(() => { - if (!isLLMWPaidSchema || !transformLlmWhispererFormData) return; + if (!isLLMWPaidSchema || !transformLlmWhispererFormData) { + return; + } const modifiedFormData = transformLlmWhispererFormData(formData); diff --git a/frontend/src/components/input-output/configure-ds/ConfigureDs.jsx b/frontend/src/components/input-output/configure-ds/ConfigureDs.jsx index d6953d859e..74b304d6bf 100644 --- a/frontend/src/components/input-output/configure-ds/ConfigureDs.jsx +++ b/frontend/src/components/input-output/configure-ds/ConfigureDs.jsx @@ -60,7 +60,9 @@ function ConfigureDs({ // Determine if OAuth authentication method is selected const isOAuthMethodSelected = () => { - if (!oAuthProvider?.length) return false; + if (!oAuthProvider?.length) { + return false; + } // Check if auth_type is set to a non-OAuth value const data = formData || {}; // If auth_type exists and is not "oauth", then OAuth is not selected @@ -116,7 +118,10 @@ function ConfigureDs({ }, [formData]); useEffect(() => { - if (!metadata) return; + if (!metadata) { + setFormData({}); + return; + } setFormData(metadata); }, [selectedSourceId, metadata, setFormData]); @@ -199,13 +204,9 @@ function ConfigureDs({ }; url = getUrl("test_adapters/"); - try { - setPostHogCustomEvent(posthogTcEventText[type], { - info: `Test connection was triggered: ${selectedSourceName}`, - }); - } catch (err) { - // If an error occurs while setting custom posthog event, ignore it and continue - } + setPostHogCustomEvent(posthogTcEventText[type], { + info: `Test connection was triggered: ${selectedSourceName}`, + }); } if (oAuthProvider?.length > 0 && isOAuthMethodSelected()) { @@ -278,16 +279,12 @@ function ConfigureDs({ url = getUrl("connector/"); - try { - const eventKey = `${type.toUpperCase()}`; - if (posthogConnectorAddedEventText[eventKey]) { - setPostHogCustomEvent(posthogConnectorAddedEventText[eventKey], { - info: `Clicked on 'Submit' button`, - connector_name: selectedSourceName, - }); - } - } catch (err) { - // If an error occurs while setting custom posthog event, ignore it and continue + const eventKey = `${type.toUpperCase()}`; + if (posthogConnectorAddedEventText[eventKey]) { + setPostHogCustomEvent(posthogConnectorAddedEventText[eventKey], { + info: `Clicked on 'Submit' button`, + connector_name: selectedSourceName, + }); } } else { const adapterMetadata = { ...formData }; @@ -301,14 +298,10 @@ function ConfigureDs({ }; url = getUrl("adapter/"); - try { - setPostHogCustomEvent(posthogSubmitEventText[type], { - info: "Clicked on 'Submit' button", - adpater_name: selectedSourceName, - }); - } catch (err) { - // If an error occurs while setting custom posthog event, ignore it and continue - } + setPostHogCustomEvent(posthogSubmitEventText[type], { + info: "Clicked on 'Submit' button", + adpater_name: selectedSourceName, + }); } let method = "POST"; diff --git a/frontend/src/components/input-output/data-source-card/DataSourceCard.jsx b/frontend/src/components/input-output/data-source-card/DataSourceCard.jsx index 899d5eb187..7452ec694b 100644 --- a/frontend/src/components/input-output/data-source-card/DataSourceCard.jsx +++ b/frontend/src/components/input-output/data-source-card/DataSourceCard.jsx @@ -19,7 +19,7 @@ function DataSourceCard({ srcDetails, setSelectedSourceId, type }) { info: "Clicked on the adapters card", adapter_name: srcDetails?.name, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; diff --git a/frontend/src/components/input-output/file-system/FileSystem.jsx b/frontend/src/components/input-output/file-system/FileSystem.jsx index 13e72686ce..2ce90ef485 100644 --- a/frontend/src/components/input-output/file-system/FileSystem.jsx +++ b/frontend/src/components/input-output/file-system/FileSystem.jsx @@ -64,7 +64,9 @@ function FileExplorer({ .then(() => { resolve(); }) - .catch(() => {}); + .catch(() => { + // Intentionally empty: errors handled upstream + }); }); } diff --git a/frontend/src/components/input-output/list-of-sources/ListOfSources.jsx b/frontend/src/components/input-output/list-of-sources/ListOfSources.jsx index 888e716698..c4bee8252d 100644 --- a/frontend/src/components/input-output/list-of-sources/ListOfSources.jsx +++ b/frontend/src/components/input-output/list-of-sources/ListOfSources.jsx @@ -47,7 +47,9 @@ function ListOfSources({ }, 300); const renderModeFilters = () => { - if (!isConnector || connectorMode) return null; + if (!isConnector || connectorMode) { + return null; + } return ( { setFiles([]); setError(""); - if (!selectedConnector) return; + if (!selectedConnector) { + return; + } setLoadingData(true); let cancelled = false; inpService .getFileList(selectedConnector) .then((res) => { - if (cancelled) return; + if (cancelled) { + return; + } setFiles(res.data); setError(""); }) .catch((err) => { - if (cancelled) return; + if (cancelled) { + return; + } const errorDetails = handleException(err, "Error loading files"); setError(errorDetails.content); }) .finally(() => { - if (cancelled) return; + if (cancelled) { + return; + } setLoadingData(false); }); return () => { diff --git a/frontend/src/components/logging/detailed-logs/DetailedLogs.jsx b/frontend/src/components/logging/detailed-logs/DetailedLogs.jsx index 00723b0ddd..b7870942d7 100644 --- a/frontend/src/components/logging/detailed-logs/DetailedLogs.jsx +++ b/frontend/src/components/logging/detailed-logs/DetailedLogs.jsx @@ -599,4 +599,5 @@ const DetailedLogs = () => {
); }; + export { DetailedLogs }; diff --git a/frontend/src/components/logs-and-notifications/DisplayLogsAndNotifications.jsx b/frontend/src/components/logs-and-notifications/DisplayLogsAndNotifications.jsx index ffdd75fa63..81fe1e85c0 100644 --- a/frontend/src/components/logs-and-notifications/DisplayLogsAndNotifications.jsx +++ b/frontend/src/components/logs-and-notifications/DisplayLogsAndNotifications.jsx @@ -108,7 +108,9 @@ export function DisplayLogsAndNotifications() { const onMouseMove = useCallback( (e) => { - if (!draggingRef.current) return; + if (!draggingRef.current) { + return; + } const diff = startYRef.current - e.clientY; const newHeight = startHeightRef.current + diff; const parentHeight = getParentHeight(); diff --git a/frontend/src/components/metrics-dashboard/MetricsChart.jsx b/frontend/src/components/metrics-dashboard/MetricsChart.jsx index 80dad597e6..8da8953261 100644 --- a/frontend/src/components/metrics-dashboard/MetricsChart.jsx +++ b/frontend/src/components/metrics-dashboard/MetricsChart.jsx @@ -532,4 +532,4 @@ HITLChart.propTypes = { loading: PropTypes.bool, }; -export { PagesChart, TrendAnalysisChart, HITLChart }; +export { HITLChart, PagesChart, TrendAnalysisChart }; diff --git a/frontend/src/components/metrics-dashboard/RecentActivity.jsx b/frontend/src/components/metrics-dashboard/RecentActivity.jsx index f8bbb3864b..f6f1ad5104 100644 --- a/frontend/src/components/metrics-dashboard/RecentActivity.jsx +++ b/frontend/src/components/metrics-dashboard/RecentActivity.jsx @@ -79,7 +79,9 @@ function RecentActivity({ data, loading }) { const orgName = sessionDetails?.orgName; const handleActivityClick = (item) => { - if (!item.execution_id || !orgName) return; + if (!item.execution_id || !orgName) { + return; + } const typeConfig = TYPE_CONFIG[item.type] || TYPE_CONFIG.workflow; navigate(`/${orgName}/logs/${typeConfig.logType}/${item.execution_id}`, { state: { from: "dashboard" }, diff --git a/frontend/src/components/pipelines-or-deployments/file-history-modal/FileHistoryModal.jsx b/frontend/src/components/pipelines-or-deployments/file-history-modal/FileHistoryModal.jsx index 54bdf355aa..6dfbde0d21 100644 --- a/frontend/src/components/pipelines-or-deployments/file-history-modal/FileHistoryModal.jsx +++ b/frontend/src/components/pipelines-or-deployments/file-history-modal/FileHistoryModal.jsx @@ -245,7 +245,9 @@ const FileHistoryModal = ({ open, setOpen, workflowId, workflowName }) => { // Delete selected file histories (bulk delete by IDs) const handleDeleteSelected = async () => { - if (selectedRowKeys.length === 0) return; + if (selectedRowKeys.length === 0) { + return; + } if (selectedRowKeys.length > MAX_BULK_DELETE) { setAlertDetails({ @@ -439,7 +441,9 @@ const FileHistoryModal = ({ open, setOpen, workflowId, workflowName }) => { width: "12%", responsive: ["md"], render: (date) => { - if (!date) return "N/A"; + if (!date) { + return "N/A"; + } return new Date(date).toLocaleString(); }, }, diff --git a/frontend/src/components/set-org/SetOrg.jsx b/frontend/src/components/set-org/SetOrg.jsx index e612d94788..bb2c00943d 100644 --- a/frontend/src/components/set-org/SetOrg.jsx +++ b/frontend/src/components/set-org/SetOrg.jsx @@ -33,7 +33,7 @@ function SetOrg() { if (state === null || signedInOrgId) { navigate("/"); } - } catch (error) { + } catch (_error) { navigate("/"); } finally { setLoading(false); diff --git a/frontend/src/components/settings/default-triad/DefaultTriad.jsx b/frontend/src/components/settings/default-triad/DefaultTriad.jsx index 2bd1b77a08..31cb045df6 100644 --- a/frontend/src/components/settings/default-triad/DefaultTriad.jsx +++ b/frontend/src/components/settings/default-triad/DefaultTriad.jsx @@ -128,7 +128,7 @@ function DefaultTriad() { info: "Selected default triad", adapter_name: adapterType, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; diff --git a/frontend/src/components/settings/invite/InviteEditUser.jsx b/frontend/src/components/settings/invite/InviteEditUser.jsx index d10c341e17..334426e47a 100644 --- a/frontend/src/components/settings/invite/InviteEditUser.jsx +++ b/frontend/src/components/settings/invite/InviteEditUser.jsx @@ -135,7 +135,7 @@ function InviteEditUser() { ? "Clicked on 'Invite' button" : "Clicked on 'Update' button"; setPostHogCustomEvent("intent_success_add_user", { info }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; diff --git a/frontend/src/components/settings/platform-api-keys/PlatformApiKeys.jsx b/frontend/src/components/settings/platform-api-keys/PlatformApiKeys.jsx index 6c9b8d07f1..0e245fb8ff 100644 --- a/frontend/src/components/settings/platform-api-keys/PlatformApiKeys.jsx +++ b/frontend/src/components/settings/platform-api-keys/PlatformApiKeys.jsx @@ -57,7 +57,9 @@ function PlatformApiKeys() { const basePath = `/api/v1/unstract/${sessionDetails?.orgId}/platform-api`; const fetchKeys = useCallback(() => { - if (!sessionDetails?.orgId) return; + if (!sessionDetails?.orgId) { + return; + } setIsLoading(true); axiosPrivate({ method: "GET", diff --git a/frontend/src/components/settings/platform/PlatformSettings.jsx b/frontend/src/components/settings/platform/PlatformSettings.jsx index bdc996a6ae..f255da6a73 100644 --- a/frontend/src/components/settings/platform/PlatformSettings.jsx +++ b/frontend/src/components/settings/platform/PlatformSettings.jsx @@ -126,7 +126,7 @@ function PlatformSettings() { info: "API Key has been generated", }); } - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } diff --git a/frontend/src/components/settings/users/Users.jsx b/frontend/src/components/settings/users/Users.jsx index 17d971ebe9..55efbd5873 100644 --- a/frontend/src/components/settings/users/Users.jsx +++ b/frontend/src/components/settings/users/Users.jsx @@ -176,7 +176,7 @@ function Users() { setPostHogCustomEvent("intent_add_user", { info: "Clicked on '+ Invite User' button", }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; diff --git a/frontend/src/components/tool-settings/tool-settings/ToolSettings.jsx b/frontend/src/components/tool-settings/tool-settings/ToolSettings.jsx index 9bf7c0f2c0..cd084c944b 100644 --- a/frontend/src/components/tool-settings/tool-settings/ToolSettings.jsx +++ b/frontend/src/components/tool-settings/tool-settings/ToolSettings.jsx @@ -211,7 +211,7 @@ function ToolSettings({ type }) { setPostHogCustomEvent(posthogEventText[type], { info: `Clicked on '+ ${btnText[type]}' button`, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; diff --git a/frontend/src/components/widgets/card-grid-view/CardFieldComponents.jsx b/frontend/src/components/widgets/card-grid-view/CardFieldComponents.jsx index dbc2f7a94f..15ca6873cc 100644 --- a/frontend/src/components/widgets/card-grid-view/CardFieldComponents.jsx +++ b/frontend/src/components/widgets/card-grid-view/CardFieldComponents.jsx @@ -363,11 +363,11 @@ CardHeaderRow.propTypes = { }; export { + ApiEndpointSection, CardActionBox, - OwnerFieldRow, - LastRunFieldRow, + CardHeaderRow, Last5RunsFieldRow, + LastRunFieldRow, + OwnerFieldRow, WorkflowFieldRow, - ApiEndpointSection, - CardHeaderRow, }; diff --git a/frontend/src/components/widgets/card-grid-view/CardItem.jsx b/frontend/src/components/widgets/card-grid-view/CardItem.jsx index d8a2b83fb6..e62fe26c6d 100644 --- a/frontend/src/components/widgets/card-grid-view/CardItem.jsx +++ b/frontend/src/components/widgets/card-grid-view/CardItem.jsx @@ -370,4 +370,5 @@ CardItem.propTypes = { // Wrap with memo to prevent unnecessary re-renders when parent array reference changes const MemoizedCardItem = memo(CardItem); + export { MemoizedCardItem as CardItem }; diff --git a/frontend/src/components/widgets/error-boundary/ErrorBoundary.jsx b/frontend/src/components/widgets/error-boundary/ErrorBoundary.jsx index 4402a8cb1f..72e00db6af 100644 --- a/frontend/src/components/widgets/error-boundary/ErrorBoundary.jsx +++ b/frontend/src/components/widgets/error-boundary/ErrorBoundary.jsx @@ -40,7 +40,9 @@ ErrorBoundary.propTypes = { }; ErrorBoundary.defaultProps = { - onError: () => {}, + onError: () => { + // No-op default error handler + }, fallbackComponent: There was an error, }; diff --git a/frontend/src/components/workflows/new-workflow/NewWorkflow.jsx b/frontend/src/components/workflows/new-workflow/NewWorkflow.jsx index 4d06e2c917..fc5e225129 100644 --- a/frontend/src/components/workflows/new-workflow/NewWorkflow.jsx +++ b/frontend/src/components/workflows/new-workflow/NewWorkflow.jsx @@ -8,10 +8,16 @@ const { TextArea } = Input; function NewWorkflow({ name = "", description = "", - onDone = () => {}, - onClose = () => {}, + onDone = () => { + // No-op default + }, + onClose = () => { + // No-op default + }, loading = {}, - toggleModal = () => {}, + toggleModal = () => { + // No-op default + }, openModal = {}, backendErrors, setBackendErrors, diff --git a/frontend/src/components/workflows/workflow/Workflows.jsx b/frontend/src/components/workflows/workflow/Workflows.jsx index 73da9ec981..9b9bcede50 100644 --- a/frontend/src/components/workflows/workflow/Workflows.jsx +++ b/frontend/src/components/workflows/workflow/Workflows.jsx @@ -12,6 +12,7 @@ import { useWorkflowStore } from "../../../store/workflow-store"; import { CustomButton } from "../../widgets/custom-button/CustomButton.jsx"; import { EmptyState } from "../../widgets/empty-state/EmptyState.jsx"; import { LazyLoader } from "../../widgets/lazy-loader/LazyLoader.jsx"; +import { SharePermission } from "../../widgets/share-permission/SharePermission.jsx"; import { SpinnerLoader } from "../../widgets/spinner-loader/SpinnerLoader.jsx"; import "./Workflows.css"; import { useExceptionHandler } from "../../../hooks/useExceptionHandler.jsx"; @@ -384,11 +385,7 @@ function Workflows() { /> )} {shareOpen && selectedWorkflow && ( - - import("../../widgets/share-permission/SharePermission.jsx") - } - componentName={"SharePermission"} + { const [socket, setSocket] = useState(null); useEffect(() => { - let baseUrl = ""; - const body = { + // Always connect to the same origin as the page. + // - Dev: CRA proxy (ws: true in setupProxy.js) forwards to the backend. + // - Prod: Traefik routes /api/v1/socket to the backend. + // This ensures session cookies are sent (same-origin) and avoids + // cross-origin WebSocket issues. + const newSocket = io(getBaseUrl(), { transports: ["websocket"], path: "/api/v1/socket", - }; - if (!import.meta.env.MODE || import.meta.env.MODE === "development") { - baseUrl = import.meta.env.VITE_BACKEND_URL; - } else { - baseUrl = getBaseUrl(); - } - const newSocket = io(baseUrl, body); + }); setSocket(newSocket); // Clean up the socket connection on browser unload window.onbeforeunload = () => { diff --git a/frontend/src/helpers/dateFormatter.js b/frontend/src/helpers/dateFormatter.js index 237479cf35..fd8a11d2e7 100644 --- a/frontend/src/helpers/dateFormatter.js +++ b/frontend/src/helpers/dateFormatter.js @@ -147,4 +147,4 @@ function checkIsToday(isoDate) { } } -export { formatCompactDate, formatSmartDate, formatFullDate, checkIsToday }; +export { checkIsToday, formatCompactDate, formatFullDate, formatSmartDate }; diff --git a/frontend/src/helpers/metricsCache.js b/frontend/src/helpers/metricsCache.js index 2acf1ff564..0525906a74 100644 --- a/frontend/src/helpers/metricsCache.js +++ b/frontend/src/helpers/metricsCache.js @@ -166,10 +166,10 @@ function getCacheInfo(endpoint, params = {}) { } export { - getCached, - setCache, + CACHE_TTL, clearMetricsCache, evictExpiredCache, + getCached, getCacheInfo, - CACHE_TTL, + setCache, }; diff --git a/frontend/src/hooks/usePromptOutput.js b/frontend/src/hooks/usePromptOutput.js index dd5d02e40e..530cdf2bc7 100644 --- a/frontend/src/hooks/usePromptOutput.js +++ b/frontend/src/hooks/usePromptOutput.js @@ -27,8 +27,7 @@ const usePromptOutput = () => { const { sessionDetails } = useSessionStore(); const { setTokenUsage, updateTokenUsage } = useTokenUsageStore(); const { setPromptOutput, updatePromptOutput } = usePromptOutputStore(); - const { isSimplePromptStudio, isPublicSource, selectedDoc } = - useCustomToolStore(); + const { isSimplePromptStudio, isPublicSource } = useCustomToolStore(); const axiosPrivate = useAxiosPrivate(); const { id } = useParams(); @@ -126,8 +125,9 @@ const usePromptOutput = () => { wordConfidenceData: item?.word_confidence_data, }; - if (item?.is_single_pass_extract && isTokenUsageForSinglePassAdded) + if (item?.is_single_pass_extract && isTokenUsageForSinglePassAdded) { return; + } if (item?.is_single_pass_extract) { const tokenUsageId = generatePromptOutputKeyForSinglePass( @@ -165,13 +165,14 @@ const usePromptOutput = () => { }; const updateCoverage = (promptOutputs, outputs) => { + const currentSelectedDoc = useCustomToolStore.getState().selectedDoc; let updatedPromptOutputs = promptOutputs; Object.keys(outputs).forEach((key) => { const [keyPromptId, keyDoctId, , keyIsSinglePass] = key.split("__"); // only add output of selected document - if (keyDoctId === selectedDoc?.document_id) { + if (keyDoctId === currentSelectedDoc?.document_id) { const currentOutput = { [key]: outputs[key] }; - updatedPromptOutputs = { ...promptOutputs, ...currentOutput }; + updatedPromptOutputs = { ...updatedPromptOutputs, ...currentOutput }; } Object.keys(updatedPromptOutputs).forEach((innerKey) => { const [existingPromptId, , , existingIsSinglePass] = diff --git a/frontend/src/hooks/usePromptOutput.test.js b/frontend/src/hooks/usePromptOutput.test.js new file mode 100644 index 0000000000..c245d4654c --- /dev/null +++ b/frontend/src/hooks/usePromptOutput.test.js @@ -0,0 +1,18 @@ +import fs from "node:fs"; +import path from "node:path"; +import { describe, expect, it } from "vitest"; + +const source = fs.readFileSync( + path.resolve(__dirname, "usePromptOutput.js"), + "utf-8", +); + +describe("usePromptOutput source-text checks", () => { + it("updateCoverage uses getState() instead of stale closure", () => { + expect(source).toContain("useCustomToolStore.getState()"); + }); + + it("updateCoverage does NOT compare via stale selectedDoc closure", () => { + expect(source).not.toContain("=== selectedDoc?.document_id"); + }); +}); diff --git a/frontend/src/hooks/usePromptRun.js b/frontend/src/hooks/usePromptRun.js index 753c128a8e..44df3811c5 100644 --- a/frontend/src/hooks/usePromptRun.js +++ b/frontend/src/hooks/usePromptRun.js @@ -3,7 +3,6 @@ import { generateUUID, PROMPT_RUN_API_STATUSES, PROMPT_RUN_TYPES, - pollForCompletion, } from "../helpers/GetStaticData"; import { useAlertStore } from "../store/alert-store"; import { useCustomToolStore } from "../store/custom-tool-store"; @@ -14,13 +13,16 @@ import { useAxiosPrivate } from "./useAxiosPrivate"; import { useExceptionHandler } from "./useExceptionHandler"; import usePromptOutput from "./usePromptOutput"; +// Tracks the latest run nonce per (promptId, statusKey) so stale timeouts +// from a previous run don't falsely cancel a newer run of the same combo. +const runNonceMap = new Map(); +const SOCKET_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes + const usePromptRun = () => { const { pushPromptRunApi, freeActiveApi } = usePromptRunQueueStore(); - const { generatePromptOutputKey, updatePromptOutputState } = - usePromptOutput(); + const { generatePromptOutputKey } = usePromptOutput(); const { addPromptStatus, removePromptStatus } = usePromptRunStatusStore(); - const { details, llmProfiles, listOfDocs, selectedDoc } = - useCustomToolStore(); + const { details, llmProfiles, listOfDocs } = useCustomToolStore(); const { sessionDetails } = useSessionStore(); const axiosPrivate = useAxiosPrivate(); const { setAlertDetails } = useAlertStore(); @@ -49,37 +51,137 @@ const usePromptRun = () => { data: body, }; - const startTime = Date.now(); - const maxWaitTime = 30 * 1000; // 30 seconds - const pollingInterval = 5000; // 5 seconds - - pollForCompletion( - startTime, - requestOptions, - maxWaitTime, - pollingInterval, - makeApiRequest, - ) + // Fire-and-forget: POST dispatches the Celery task, socket delivers result. + const statusKey = generateApiRunStatusId(docId, profileId); + const nonceKey = `${promptId}__${statusKey}`; + const nonce = generateUUID(); + runNonceMap.set(nonceKey, nonce); + + makeApiRequest(requestOptions) .then((res) => { - if (docId !== selectedDoc?.document_id) return; - const data = res?.data || []; - const timeTakenInSeconds = Math.floor((Date.now() - startTime) / 1000); - updatePromptOutputState(data, false, timeTakenInSeconds); + // Handle pending-indexing response: clear running status immediately + if (res?.data?.status === "pending") { + removePromptStatus(promptId, statusKey); + setAlertDetails({ + type: "info", + content: + res?.data?.message || "Document is being indexed. Please wait.", + }); + return; + } + + // Timeout safety net: clear stale status if socket event never arrives. + // Only clears if this is still the latest run for this combo. + setTimeout(() => { + if (runNonceMap.get(nonceKey) !== nonce) { + return; + } + const current = usePromptRunStatusStore.getState().promptRunStatus; + if ( + current?.[promptId]?.[statusKey] === PROMPT_RUN_API_STATUSES.RUNNING + ) { + removePromptStatus(promptId, statusKey); + setAlertDetails({ + type: "warning", + content: "Prompt execution timed out. Please try again.", + }); + } + runNonceMap.delete(nonceKey); + }, SOCKET_TIMEOUT_MS); }) .catch((err) => { setAlertDetails( handleException(err, "Failed to generate prompt output"), ); + removePromptStatus(promptId, statusKey); + runNonceMap.delete(nonceKey); + }) + .finally(() => { + freeActiveApi(); + }); + }; + + const runBulkPromptApi = (promptIds, docId, profileId) => { + const runId = generateUUID(); + const body = { + prompt_ids: promptIds, + document_id: docId, + profile_manager: profileId, + run_id: runId, + }; + + const requestOptions = { + method: "POST", + url: `/api/v1/unstract/${sessionDetails?.orgId}/prompt-studio/bulk_fetch_response/${details?.tool_id}`, + headers: { + "X-CSRFToken": sessionDetails?.csrfToken, + "Content-Type": "application/json", + }, + data: body, + }; + + const statusKey = generateApiRunStatusId(docId, profileId); + const nonces = {}; + promptIds.forEach((promptId) => { + const nonceKey = `${promptId}__${statusKey}`; + const nonce = generateUUID(); + nonces[promptId] = nonce; + runNonceMap.set(nonceKey, nonce); + }); + + // Timeout safety net: clear stale status if socket event never arrives. + // Only clears if this is still the latest run for each prompt combo. + const clearStaleStatuses = () => { + promptIds.forEach((promptId) => { + const nonceKey = `${promptId}__${statusKey}`; + if (runNonceMap.get(nonceKey) !== nonces[promptId]) { + return; + } + const current = usePromptRunStatusStore.getState().promptRunStatus; + if ( + current?.[promptId]?.[statusKey] === PROMPT_RUN_API_STATUSES.RUNNING + ) { + removePromptStatus(promptId, statusKey); + } + runNonceMap.delete(nonceKey); + }); + }; + + makeApiRequest(requestOptions) + .then((res) => { + if (res?.data?.status === "pending") { + promptIds.forEach((promptId) => { + removePromptStatus(promptId, statusKey); + }); + setAlertDetails({ + type: "info", + content: + res?.data?.message || "Document is being indexed. Please wait.", + }); + return; + } + + setTimeout(clearStaleStatuses, SOCKET_TIMEOUT_MS); + }) + .catch((err) => { + setAlertDetails( + handleException(err, "Failed to generate prompt output"), + ); + promptIds.forEach((promptId) => { + const nonceKey = `${promptId}__${statusKey}`; + removePromptStatus(promptId, statusKey); + runNonceMap.delete(nonceKey); + }); }) .finally(() => { freeActiveApi(); - const statusKey = generateApiRunStatusId(docId, profileId); - removePromptStatus(promptId, statusKey); }); }; const runPrompt = (listOfApis) => { - if (!listOfApis?.length) return; + if (!listOfApis?.length) { + return; + } listOfApis.forEach(runPromptApi); }; @@ -190,14 +292,18 @@ const usePromptRun = () => { }; const params = paramsMap[promptRunType]; - if (!params) return; + if (!params) { + return; + } const paramValues = { promptId, profileId, docId }; const missingParams = params.requiredParams.filter( (param) => !paramValues[param], ); - if (missingParams.length > 0) return; + if (missingParams.length > 0) { + return; + } ({ apiRequestsToQueue, promptRunApiStatus } = prepareApiRequests( params.prompts, @@ -206,7 +312,19 @@ const usePromptRun = () => { )); addPromptStatus(promptRunApiStatus); - pushPromptRunApi(apiRequestsToQueue); + + // Use bulk API when multiple prompts target the same (profile, doc) to + // prevent the "Document being indexed" race condition. + const isBulk = params.prompts.length > 1; + if (isBulk) { + for (const pId of params.profiles) { + for (const dId of params.docs) { + runBulkPromptApi(params.prompts, dId, pId); + } + } + } else { + pushPromptRunApi(apiRequestsToQueue); + } }; return { diff --git a/frontend/src/hooks/usePromptStudioSocket.js b/frontend/src/hooks/usePromptStudioSocket.js new file mode 100644 index 0000000000..1f93cd8e32 --- /dev/null +++ b/frontend/src/hooks/usePromptStudioSocket.js @@ -0,0 +1,173 @@ +import { useCallback, useContext, useEffect } from "react"; +import { generateApiRunStatusId } from "../helpers/GetStaticData"; +import { SocketContext } from "../helpers/SocketContext"; +import { useAlertStore } from "../store/alert-store"; +import { useCustomToolStore } from "../store/custom-tool-store"; +import { usePromptRunStatusStore } from "../store/prompt-run-status-store"; +import { useExceptionHandler } from "./useExceptionHandler"; +import usePromptOutput from "./usePromptOutput"; + +const PROMPT_STUDIO_RESULT_EVENT = "prompt_studio_result"; + +/** + * Hook that listens for `prompt_studio_result` Socket.IO events emitted by + * backend Celery tasks (fetch_response, single_pass_extraction, index_document). + * + * On completion it feeds the result into the prompt-output store and clears + * the corresponding run-status entries so the UI stops showing spinners. + */ +const usePromptStudioSocket = () => { + const socket = useContext(SocketContext); + const { removePromptStatus, clearPromptStatusById } = + usePromptRunStatusStore(); + const { updateCustomTool, deleteIndexDoc, selectedDoc, details } = + useCustomToolStore(); + const { setAlertDetails } = useAlertStore(); + const handleException = useExceptionHandler(); + const { updatePromptOutputState } = usePromptOutput(); + + const clearPromptStatuses = useCallback( + (promptIds, docId, profileId) => { + if (!docId || !profileId) { + return; + } + const statusKey = generateApiRunStatusId(docId, profileId); + (promptIds || []).forEach((promptId) => { + removePromptStatus(promptId, statusKey); + }); + }, + [removePromptStatus], + ); + + const handleCompleted = useCallback( + (operation, result, extra) => { + if (operation === "fetch_response") { + const data = Array.isArray(result) ? result : []; + updatePromptOutputState(data, false, extra?.elapsed || 0); + clearPromptStatuses( + extra?.prompt_ids, + extra?.document_id, + extra?.profile_manager_id, + ); + setAlertDetails({ + type: "success", + content: "Prompt execution completed successfully.", + }); + } else if (operation === "single_pass_extraction") { + const data = Array.isArray(result) ? result : []; + updatePromptOutputState(data, false, extra?.elapsed || 0); + updateCustomTool({ isSinglePassExtractLoading: false }); + clearPromptStatuses( + extra?.prompt_ids, + extra?.document_id, + extra?.profile_manager_id, + ); + setAlertDetails({ + type: "success", + content: "Single pass extraction completed successfully.", + }); + } else if (operation === "index_document") { + const docId = result?.document_id; + if (docId) { + deleteIndexDoc(docId); + } + setAlertDetails({ + type: "success", + content: `${selectedDoc?.document_name || "Document"} - Indexed successfully`, + }); + } + }, + [ + updatePromptOutputState, + clearPromptStatuses, + updateCustomTool, + setAlertDetails, + deleteIndexDoc, + selectedDoc, + ], + ); + + const handleFailed = useCallback( + (operation, error, extra) => { + setAlertDetails({ + type: "error", + content: error || `${operation} failed`, + }); + if (operation === "single_pass_extraction") { + updateCustomTool({ isSinglePassExtractLoading: false }); + } else if (operation === "index_document") { + const docId = extra?.document_id; + if (docId) { + deleteIndexDoc(docId); + } + } + + // Clear spinner for prompt operations so buttons re-enable + if ( + operation === "fetch_response" || + operation === "single_pass_extraction" + ) { + const promptIds = extra?.prompt_ids || []; + const docId = extra?.document_id; + const profileId = extra?.profile_manager_id; + if (docId && profileId) { + // Specific clearing (ideal path) + const statusKey = generateApiRunStatusId(docId, profileId); + promptIds.forEach((promptId) => { + removePromptStatus(promptId, statusKey); + }); + } else { + // Fallback: clear ALL statuses for these prompts + promptIds.forEach((promptId) => { + clearPromptStatusById(promptId); + }); + } + } + }, + [ + setAlertDetails, + updateCustomTool, + deleteIndexDoc, + removePromptStatus, + clearPromptStatusById, + ], + ); + + const onResult = useCallback( + (payload) => { + try { + const msg = payload?.data || payload; + const { status, operation, result, error, tool_id, ...extra } = msg; + + // Ignore events belonging to a different tool (multi-tab safety) + if (tool_id && details?.tool_id && tool_id !== details.tool_id) { + return; + } + + if (status === "completed") { + handleCompleted(operation, result, extra); + } else if (status === "failed") { + handleFailed(operation, error, extra); + } + } catch (err) { + setAlertDetails( + handleException(err, "Failed to process prompt studio result"), + ); + } + }, + [handleCompleted, handleFailed, setAlertDetails, handleException, details], + ); + + useEffect(() => { + if (!socket) { + return; + } + + socket.on(PROMPT_STUDIO_RESULT_EVENT, onResult); + return () => { + socket.off(PROMPT_STUDIO_RESULT_EVENT, onResult); + }; + }, [socket, onResult]); +}; + +export default usePromptStudioSocket; diff --git a/frontend/src/hooks/useRequestUrl.js b/frontend/src/hooks/useRequestUrl.js index 4fd39164f3..c96d32ed97 100644 --- a/frontend/src/hooks/useRequestUrl.js +++ b/frontend/src/hooks/useRequestUrl.js @@ -4,7 +4,9 @@ const useRequestUrl = () => { const { sessionDetails } = useSessionStore(); const getUrl = (url) => { - if (!url) return null; + if (!url) { + return null; + } const baseUrl = `/api/v1/unstract/${sessionDetails?.orgId}/`; return baseUrl + url.replace(/^\//, ""); diff --git a/frontend/src/hooks/useUserSession.js b/frontend/src/hooks/useUserSession.js index 66a237dd47..06958373e0 100644 --- a/frontend/src/hooks/useUserSession.js +++ b/frontend/src/hooks/useUserSession.js @@ -33,4 +33,5 @@ const useUserSession = () => { } }; }; + export { useUserSession }; diff --git a/frontend/src/layouts/rjsf-form-layout/CustomObjectFieldTemplate.jsx b/frontend/src/layouts/rjsf-form-layout/CustomObjectFieldTemplate.jsx index 150fef56f2..87f07bbc93 100644 --- a/frontend/src/layouts/rjsf-form-layout/CustomObjectFieldTemplate.jsx +++ b/frontend/src/layouts/rjsf-form-layout/CustomObjectFieldTemplate.jsx @@ -39,7 +39,9 @@ const CustomObjectFieldTemplate = (props) => { {fieldOrder.map((fieldName) => { const property = propertyMap[fieldName]; - if (!property) return null; + if (!property) { + return null; + } // Skip conditional fields when they shouldn't be shown if ( diff --git a/frontend/src/store/alert-store.js b/frontend/src/store/alert-store.js index b77c6600ad..24549e09ce 100644 --- a/frontend/src/store/alert-store.js +++ b/frontend/src/store/alert-store.js @@ -19,7 +19,9 @@ const STORE_VARIABLES = { const useAlertStore = create((setState) => ({ ...STORE_VARIABLES, setAlertDetails: (details) => { - if (!details) return STORE_VARIABLES; + if (!details) { + return STORE_VARIABLES; + } const isErrorType = details?.type === "error"; const isSuccessType = details?.type === "success"; diff --git a/frontend/src/store/prompt-run-queue-store.js b/frontend/src/store/prompt-run-queue-store.js index 35e888e1dc..267a8f2b26 100644 --- a/frontend/src/store/prompt-run-queue-store.js +++ b/frontend/src/store/prompt-run-queue-store.js @@ -24,7 +24,9 @@ const usePromptRunQueueStore = create((setState, getState) => ({ const existingState = { ...getState() }; const newActiveApis = existingState?.activeApis - numOfApis; - if (newActiveApis < 0) return; + if (newActiveApis < 0) { + return; + } setState({ ...existingState, ...{ activeApis: newActiveApis } }); }, diff --git a/frontend/src/store/prompt-run-status-store.js b/frontend/src/store/prompt-run-status-store.js index dcc852a502..8c55e27ac9 100644 --- a/frontend/src/store/prompt-run-status-store.js +++ b/frontend/src/store/prompt-run-status-store.js @@ -26,6 +26,13 @@ const usePromptRunStatusStore = create((setState, getState) => ({ return { promptRunStatus: newStatus }; }); }, + clearPromptStatusById: (promptId) => { + setState((state) => { + const newStatus = { ...state.promptRunStatus }; + delete newStatus[promptId]; + return { promptRunStatus: newStatus }; + }); + }, removePromptStatus: (promptId, key) => { setState((state) => { const currentStatus = state.promptRunStatus || {}; diff --git a/frontend/src/store/prompt-studio-store.js b/frontend/src/store/prompt-studio-store.js index 2195586fa6..b5fe7acd0c 100644 --- a/frontend/src/store/prompt-studio-store.js +++ b/frontend/src/store/prompt-studio-store.js @@ -11,7 +11,9 @@ const usePromptStudioStore = create((set, get) => { ...STORE_VARIABLES, fetchCount: async (getPromptStudioCount) => { // Prevent duplicate calls if already loading or if we already have a count - if (get().isLoading || (get().count > 0 && !get().error)) return; + if (get().isLoading || (get().count > 0 && !get().error)) { + return; + } set({ isLoading: true }); try { diff --git a/frontend/src/store/retrieval-strategies-store.js b/frontend/src/store/retrieval-strategies-store.js index 46645b751d..357608ece8 100644 --- a/frontend/src/store/retrieval-strategies-store.js +++ b/frontend/src/store/retrieval-strategies-store.js @@ -31,7 +31,9 @@ const useRetrievalStrategiesStore = create((set, get) => ({ // Check if strategies need to be fetched (cache for 1 hour) shouldFetch: () => { const { strategies, lastFetched } = get(); - if (!strategies) return true; + if (!strategies) { + return true; + } const oneHour = 60 * 60 * 1000; // 1 hour in milliseconds const now = Date.now(); diff --git a/frontend/src/store/socket-logs-store.js b/frontend/src/store/socket-logs-store.js index 9eef2c5539..c12433c0e8 100644 --- a/frontend/src/store/socket-logs-store.js +++ b/frontend/src/store/socket-logs-store.js @@ -46,7 +46,9 @@ const useSocketLogsStore = create((setState, getState) => ({ }, data: { log: JSON.stringify(newLog) }, }; - axios(requestOptions).catch((err) => {}); + axios(requestOptions).catch(() => { + // Intentionally empty: best-effort log persistence + }); } }); diff --git a/frontend/src/store/workflow-store.js b/frontend/src/store/workflow-store.js index 0627b0b7d7..c68b1b97df 100644 --- a/frontend/src/store/workflow-store.js +++ b/frontend/src/store/workflow-store.js @@ -60,7 +60,7 @@ const useWorkflowStore = create((setState, getState) => ({ setState(() => { return { ...getState(), ...{ existingState } }; }); - } catch (err) { + } catch (_err) { return; } }, diff --git a/frontend/vite.config.js b/frontend/vite.config.js index d3310e847c..397f001312 100644 --- a/frontend/vite.config.js +++ b/frontend/vite.config.js @@ -123,7 +123,7 @@ export default defineConfig(({ mode }) => { port: Number(env.PORT) || 3000, clientPort: env.WDS_SOCKET_PORT ? Number(env.WDS_SOCKET_PORT) - : (Number(env.PORT) || 3000), + : Number(env.PORT) || 3000, }, // Proxy configuration (similar to setupProxy.js in CRA) proxy: diff --git a/frontend/vitest.config.mjs b/frontend/vitest.config.mjs new file mode 100644 index 0000000000..b315886669 --- /dev/null +++ b/frontend/vitest.config.mjs @@ -0,0 +1,19 @@ +import react from "@vitejs/plugin-react"; +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + plugins: [ + react({ + include: "**/*.{jsx,js}", + }), + ], + esbuild: { + loader: "jsx", + include: /src\/.*\.jsx?$/, + }, + test: { + globals: true, + environment: "happy-dom", + setupFiles: "./src/setupTests.js", + }, +}); diff --git a/run-platform.sh b/run-platform.sh index bcacfa82e8..a2b793a131 100755 --- a/run-platform.sh +++ b/run-platform.sh @@ -294,6 +294,7 @@ run_services() { python3 "$script_dir/docker/scripts/release-notes/print_release_notes.py" "$current_version" "$target_branch" fi echo -e "\nOnce the services are up, visit ""$blue_text""http://frontend.unstract.localhost""$default_text"" in your browser." + echo -e "The async executor worker is included — Prompt Studio IDE runs are non-blocking." echo -e "\nSee logs with:" echo -e " ""$blue_text""$docker_compose_cmd -f docker/docker-compose.yaml logs -f""$default_text" echo -e "Configure services by updating corresponding ""$yellow_text""/.env""$default_text"" files." diff --git a/unstract/core/src/unstract/core/pubsub_helper.py b/unstract/core/src/unstract/core/pubsub_helper.py index 5f89f5c7c4..3bec8475e0 100644 --- a/unstract/core/src/unstract/core/pubsub_helper.py +++ b/unstract/core/src/unstract/core/pubsub_helper.py @@ -16,8 +16,8 @@ class LogPublisher: broker_url = str( httpx.URL(os.getenv("CELERY_BROKER_BASE_URL", "amqp://")).copy_with( - username=os.getenv("CELERY_BROKER_USER"), - password=os.getenv("CELERY_BROKER_PASS"), + username=os.getenv("CELERY_BROKER_USER") or None, + password=os.getenv("CELERY_BROKER_PASS") or None, ) ) kombu_conn = Connection(broker_url) @@ -92,6 +92,29 @@ def log_workflow_update( "message": message, } + @staticmethod + def log_progress( + component: dict[str, str], + level: str, + state: str, + message: str, + ) -> dict[str, str]: + """Build a progress log message for streaming to the frontend. + + Same structure as ``log_prompt()`` but uses ``type: "PROGRESS"`` + so the frontend can distinguish executor progress from regular + log messages. + """ + return { + "timestamp": datetime.now(UTC).timestamp(), + "type": "PROGRESS", + "service": "prompt", + "component": component, + "level": level, + "state": state, + "message": message, + } + @staticmethod def log_prompt( component: dict[str, str], diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/ocr/register.py b/unstract/sdk1/src/unstract/sdk1/adapters/ocr/register.py index fde5558c16..cbc1a6ea67 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/ocr/register.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/ocr/register.py @@ -45,5 +45,5 @@ def _build_adapter_list(adapter: str, package: str, adapters: dict[str, Any]) -> Common.MODULE: module, Common.METADATA: metadata, } - except ModuleNotFoundError as exception: - logger.warning(f"Unable to import ocr adapters : {exception}") + except Exception as exception: + logger.warning(f"Unable to import OCR adapter '{adapter}': {exception}") diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/exceptions.py b/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/exceptions.py index edef6bd043..e44784671e 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/exceptions.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/exceptions.py @@ -1,5 +1,3 @@ -from qdrant_client.http.exceptions import ApiException as QdrantAPIException -from unstract.sdk1.adapters.vectordb.qdrant.src import Qdrant from unstract.sdk1.adapters.vectordb.vectordb_adapter import VectorDBAdapter from unstract.sdk1.exceptions import VectorDBError @@ -20,9 +18,18 @@ def parse_vector_db_err(e: Exception, vector_db: VectorDBAdapter) -> VectorDBErr if isinstance(e, VectorDBError): return e - if isinstance(e, QdrantAPIException): - err = Qdrant.parse_vector_db_err(e) - else: + # Lazy import to avoid hard dependency on qdrant_client at module level. + # qdrant_client's protobuf files can fail to load depending on the + # protobuf runtime version (KeyError: '_POINTID'). + try: + from qdrant_client.http.exceptions import ApiException as QdrantAPIException + from unstract.sdk1.adapters.vectordb.qdrant.src import Qdrant + + if isinstance(e, QdrantAPIException): + err = Qdrant.parse_vector_db_err(e) + else: + err = VectorDBError(str(e), actual_err=e) + except Exception: err = VectorDBError(str(e), actual_err=e) msg = f"Error from vector DB '{vector_db.get_name()}'." diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/register.py b/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/register.py index 1c551dafe1..05c01d822e 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/register.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/register.py @@ -45,5 +45,5 @@ def _build_adapter_list(adapter: str, package: str, adapters: dict[str, Any]) -> Common.MODULE: module, Common.METADATA: metadata, } - except ModuleNotFoundError as exception: - logger.warning(f"Unable to import vectorDB adapters : {exception}") + except Exception as exception: + logger.warning(f"Unable to import vectorDB adapter '{adapter}': {exception}") diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py index 7d0983673d..ade89f7cba 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py @@ -206,23 +206,33 @@ def get_whisperer_params( ), WhispererConfig.ADD_LINE_NOS: extra_params.enable_highlight, WhispererConfig.INCLUDE_LINE_CONFIDENCE: extra_params.enable_highlight, - # Not providing default value to maintain legacy compatablity - # these are optional params and identifiers for audit - WhispererConfig.TAG: extra_params.tag - or config.get( - WhispererConfig.TAG, - WhispererDefaults.TAG, - ), - WhispererConfig.USE_WEBHOOK: config.get(WhispererConfig.USE_WEBHOOK, ""), - WhispererConfig.WEBHOOK_METADATA: config.get( - WhispererConfig.WEBHOOK_METADATA - ), - WhispererConfig.WAIT_TIMEOUT: config.get( - WhispererConfig.WAIT_TIMEOUT, - WhispererDefaults.WAIT_TIMEOUT, - ), - WhispererConfig.WAIT_FOR_COMPLETION: WhispererDefaults.WAIT_FOR_COMPLETION, } + logger.info( + "HIGHLIGHT_DEBUG whisper params: ADD_LINE_NOS=%s", + params.get(WhispererConfig.ADD_LINE_NOS), + ) + params.update( + { + # Not providing default value to maintain legacy compatablity + # these are optional params and identifiers for audit + WhispererConfig.TAG: extra_params.tag + or config.get( + WhispererConfig.TAG, + WhispererDefaults.TAG, + ), + WhispererConfig.USE_WEBHOOK: config.get(WhispererConfig.USE_WEBHOOK, ""), + WhispererConfig.WEBHOOK_METADATA: config.get( + WhispererConfig.WEBHOOK_METADATA + ), + WhispererConfig.WAIT_TIMEOUT: config.get( + WhispererConfig.WAIT_TIMEOUT, + WhispererDefaults.WAIT_TIMEOUT, + ), + WhispererConfig.WAIT_FOR_COMPLETION: ( + WhispererDefaults.WAIT_FOR_COMPLETION + ), + } + ) if params[WhispererConfig.MODE] == Modes.LOW_COST.value: params.update( { diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/llm_whisperer_v2.py b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/llm_whisperer_v2.py index 892339a9be..3a48a57647 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/llm_whisperer_v2.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/llm_whisperer_v2.py @@ -82,6 +82,10 @@ def process( if fs is None: fs = FileStorage(provider=FileStorageProvider.LOCAL) enable_highlight = kwargs.get(X2TextConstants.ENABLE_HIGHLIGHT, False) + logger.info( + "HIGHLIGHT_DEBUG LLMWhispererV2.process: enable_highlight=%s", + enable_highlight, + ) extra_params = WhispererRequestParams( tag=kwargs.get(X2TextConstants.TAGS), enable_highlight=enable_highlight, diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/register.py b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/register.py index 48d6a606af..3318887f95 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/register.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/register.py @@ -45,5 +45,5 @@ def _build_adapter_list(adapter: str, package: str, adapters: dict[str, Any]) -> Common.MODULE: module, Common.METADATA: metadata, } - except ModuleNotFoundError as exception: - logger.warning(f"Unable to import X2Text adapters : {exception}") + except Exception as exception: + logger.warning(f"Unable to import X2Text adapter '{adapter}': {exception}") diff --git a/unstract/sdk1/src/unstract/sdk1/execution/__init__.py b/unstract/sdk1/src/unstract/sdk1/execution/__init__.py new file mode 100644 index 0000000000..fa70c88821 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/execution/__init__.py @@ -0,0 +1,15 @@ +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + +__all__ = [ + "BaseExecutor", + "ExecutionContext", + "ExecutionDispatcher", + "ExecutionOrchestrator", + "ExecutionResult", + "ExecutorRegistry", +] diff --git a/unstract/sdk1/src/unstract/sdk1/execution/context.py b/unstract/sdk1/src/unstract/sdk1/execution/context.py new file mode 100644 index 0000000000..a1efb4c3f8 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/execution/context.py @@ -0,0 +1,128 @@ +"""Execution context model for the executor framework. + +Defines the serializable context that is dispatched to executor +workers via Celery. Used by both the workflow path (structure tool +task) and the IDE path (PromptStudioHelper). +""" + +import uuid +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + + +class ExecutionSource(str, Enum): + """Origin of the execution request.""" + + IDE = "ide" + TOOL = "tool" + + +class Operation(str, Enum): + """Supported extraction operations. + + Maps 1-to-1 with current PromptTool HTTP endpoints. + """ + + EXTRACT = "extract" + INDEX = "index" + ANSWER_PROMPT = "answer_prompt" + SINGLE_PASS_EXTRACTION = "single_pass_extraction" + SUMMARIZE = "summarize" + IDE_INDEX = "ide_index" + STRUCTURE_PIPELINE = "structure_pipeline" + TABLE_EXTRACT = "table_extract" + SMART_TABLE_EXTRACT = "smart_table_extract" + SPS_ANSWER_PROMPT = "sps_answer_prompt" + SPS_INDEX = "sps_index" + AGENTIC_EXTRACT = "agentic_extract" + AGENTIC_SUMMARIZE = "agentic_summarize" + AGENTIC_UNIFORMIZE = "agentic_uniformize" + AGENTIC_FINALIZE = "agentic_finalize" + AGENTIC_GENERATE_PROMPT = "agentic_generate_prompt" + AGENTIC_GENERATE_PROMPT_PIPELINE = "agentic_generate_prompt_pipeline" + AGENTIC_COMPARE = "agentic_compare" + AGENTIC_TUNE_FIELD = "agentic_tune_field" + + +@dataclass +class ExecutionContext: + """Serializable execution context dispatched to executor worker. + + This is the single payload sent as a Celery task argument to + ``execute_extraction``. It must remain JSON-serializable (no + ORM objects, no file handles, no callables). + + Attributes: + executor_name: Registered executor to handle this request + (e.g. ``"legacy"``, ``"agentic_table"``). + operation: The extraction operation to perform. + run_id: Unique identifier for this execution run. + execution_source: Where the request originated + (``"ide"`` or ``"tool"``). + organization_id: Tenant/org scope. ``None`` for public + calls. + executor_params: Opaque, operation-specific payload passed + through to the executor. Must be JSON-serializable. + request_id: Correlation ID for tracing across services. + log_events_id: Socket.IO channel ID for streaming progress + logs to the frontend. ``None`` when not in an IDE + session (no logs published). + """ + + executor_name: str + operation: str + run_id: str + execution_source: str + organization_id: str | None = None + executor_params: dict[str, Any] = field(default_factory=dict) + request_id: str | None = None + log_events_id: str | None = None + + def __post_init__(self) -> None: + """Validate required fields after initialization.""" + if not self.executor_name: + raise ValueError("executor_name is required") + if not self.operation: + raise ValueError("operation is required") + if not self.run_id: + raise ValueError("run_id is required") + if not self.execution_source: + raise ValueError("execution_source is required") + + # Normalize enum values to plain strings for serialization + if isinstance(self.operation, Operation): + self.operation = self.operation.value + if isinstance(self.execution_source, ExecutionSource): + self.execution_source = self.execution_source.value + + # Auto-generate request_id if not provided + if self.request_id is None: + self.request_id = str(uuid.uuid4()) + + def to_dict(self) -> dict[str, Any]: + """Serialize to a JSON-compatible dict for Celery dispatch.""" + return { + "executor_name": self.executor_name, + "operation": self.operation, + "run_id": self.run_id, + "execution_source": self.execution_source, + "organization_id": self.organization_id, + "executor_params": self.executor_params, + "request_id": self.request_id, + "log_events_id": self.log_events_id, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "ExecutionContext": + """Deserialize from a dict (e.g. Celery task argument).""" + return cls( + executor_name=data["executor_name"], + operation=data["operation"], + run_id=data["run_id"], + execution_source=data["execution_source"], + organization_id=data.get("organization_id"), + executor_params=data.get("executor_params", {}), + request_id=data.get("request_id"), + log_events_id=data.get("log_events_id"), + ) diff --git a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py new file mode 100644 index 0000000000..e6be76d7c3 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py @@ -0,0 +1,279 @@ +"""Execution dispatcher for sending Celery tasks to executor workers. + +The dispatcher is the caller-side component used by both: +- Structure tool Celery task (workflow path) +- PromptStudioHelper (IDE path) + +It sends ``execute_extraction`` tasks to the ``executor`` queue. +Three dispatch modes are available: + +- ``dispatch()``: Send and block until result (synchronous). +- ``dispatch_async()``: Fire-and-forget, returns task_id for polling. +- ``dispatch_with_callback()``: Fire-and-forget with Celery ``link`` + / ``link_error`` callbacks for post-processing. +""" + +from __future__ import annotations + +import logging +import os +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from celery import Signature + from celery.result import AsyncResult + from unstract.sdk1.execution.context import ExecutionContext + +from unstract.sdk1.execution.result import ExecutionResult + +logger = logging.getLogger(__name__) + +# Constants matching workers/shared/enums values. +# Defined here to avoid an SDK1 → workers package dependency. +_TASK_NAME = "execute_extraction" + +# Queue-per-executor prefix. Each executor gets its own Celery queue +# named ``celery_executor_{executor_name}``, derived automatically +# from ``ExecutionContext.executor_name``. +_QUEUE_PREFIX = "celery_executor_" + +# Caller-side timeout (seconds) for AsyncResult.get(). +# This controls how long the *caller* waits for the executor to +# finish — distinct from the executor worker's +# ``EXECUTOR_TASK_TIME_LIMIT`` which controls how long the +# *worker* allows a task to run. +# +# Resolution order (matches workers convention): +# 1. Explicit ``timeout`` parameter on dispatch() +# 2. ``EXECUTOR_RESULT_TIMEOUT`` env var +# 3. Hardcoded default (3600s) +# +# The default (3600s) is intentionally <= the executor worker's +# ``task_time_limit`` default (also 3600s) so the caller never +# waits longer than the worker allows the task to run. +_DEFAULT_TIMEOUT_ENV = "EXECUTOR_RESULT_TIMEOUT" +_DEFAULT_TIMEOUT = 3600 # 1 hour — matches executor worker default +_NO_CELERY_APP_MSG = "No Celery app configured on ExecutionDispatcher" + + +class ExecutionDispatcher: + """Dispatches execution to executor worker via Celery task. + + Usage:: + + dispatcher = ExecutionDispatcher(celery_app=app) + result = dispatcher.dispatch(context, timeout=120) + + Fire-and-forget:: + + task_id = dispatcher.dispatch_async(context) + + Fire-and-forget with callbacks:: + + from celery import signature + + task = dispatcher.dispatch_with_callback( + context, + on_success=signature("my_success_task", args=[...], queue="q"), + on_error=signature("my_error_task", args=[...], queue="q"), + ) + """ + + def __init__(self, celery_app: object | None = None) -> None: + """Initialize the dispatcher. + + Args: + celery_app: A Celery application instance. Required + for dispatching tasks. Can be ``None`` only if + set later via ``celery_app`` attribute. + """ + self._app = celery_app + + @staticmethod + def _get_queue(executor_name: str) -> str: + """Derive the Celery queue name from *executor_name*. + + Convention: ``celery_executor_{executor_name}``. + Adding a new executor automatically gets its own queue — + no registry change needed. + """ + return f"{_QUEUE_PREFIX}{executor_name}" + + def dispatch( + self, + context: ExecutionContext, + timeout: int | None = None, + ) -> ExecutionResult: + """Dispatch context as a Celery task and wait for result. + + Args: + context: ExecutionContext to dispatch. + timeout: Max seconds to wait. ``None`` reads from + the ``EXECUTOR_RESULT_TIMEOUT`` env var, + falling back to 3600s. + + Returns: + ExecutionResult from the executor. + + Raises: + ValueError: If no Celery app is configured. + """ + if self._app is None: + raise ValueError(_NO_CELERY_APP_MSG) + + if timeout is None: + timeout = int(os.environ.get(_DEFAULT_TIMEOUT_ENV, _DEFAULT_TIMEOUT)) + + queue = self._get_queue(context.executor_name) + logger.info( + "Dispatching execution: executor=%s operation=%s" + " run_id=%s request_id=%s timeout=%ss queue=%s", + context.executor_name, + context.operation, + context.run_id, + context.request_id, + timeout, + queue, + ) + + async_result = self._app.send_task( + _TASK_NAME, + args=[context.to_dict()], + queue=queue, + ) + logger.info( + "Task sent: celery_task_id=%s, waiting for result...", + async_result.id, + ) + + try: + # disable_sync_subtasks=False: safe because the executor task + # runs on a separate worker pool (worker-v2) — no deadlock + # risk even when dispatch() is called from inside a Django + # Celery task. + result_dict = async_result.get( + timeout=timeout, + disable_sync_subtasks=False, + ) + except Exception as exc: + logger.error( + "Dispatch failed: executor=%s operation=%s run_id=%s error=%s", + context.executor_name, + context.operation, + context.run_id, + exc, + ) + return ExecutionResult.failure( + error=f"{type(exc).__name__}: {exc}", + ) + + return ExecutionResult.from_dict(result_dict) + + def dispatch_async( + self, + context: ExecutionContext, + ) -> str: + """Dispatch without waiting. Returns task_id for polling. + + Args: + context: ExecutionContext to dispatch. + + Returns: + The Celery task ID (use with ``AsyncResult`` to poll). + + Raises: + ValueError: If no Celery app is configured. + """ + if self._app is None: + raise ValueError(_NO_CELERY_APP_MSG) + + queue = self._get_queue(context.executor_name) + logger.info( + "Dispatching async execution: executor=%s " + "operation=%s run_id=%s request_id=%s queue=%s", + context.executor_name, + context.operation, + context.run_id, + context.request_id, + queue, + ) + + async_result = self._app.send_task( + _TASK_NAME, + args=[context.to_dict()], + queue=queue, + ) + return async_result.id + + def dispatch_with_callback( + self, + context: ExecutionContext, + on_success: Signature | None = None, + on_error: Signature | None = None, + task_id: str | None = None, + ) -> AsyncResult: + """Fire-and-forget dispatch with Celery link callbacks. + + Sends the task to the executor queue and returns immediately. + When the executor task completes, Celery invokes the + ``on_success`` callback (via ``link``). If the executor task + raises an exception, Celery invokes ``on_error`` (via + ``link_error``). + + Args: + context: ExecutionContext to dispatch. + on_success: A Celery ``Signature`` invoked on success. + Receives ``(result_dict,)`` as first positional arg + followed by the signature's own args. + on_error: A Celery ``Signature`` invoked on failure. + Receives ``(failed_task_uuid,)`` as first positional + arg followed by the signature's own args. + task_id: Optional pre-generated Celery task ID. Useful + when the caller needs to know the task ID before + dispatch (e.g. to include it in callback kwargs). + + Returns: + The ``AsyncResult`` from ``send_task``. Callers can + use ``.id`` for task tracking but should NOT call + ``.get()`` (that would block, defeating the purpose). + + Raises: + ValueError: If no Celery app is configured. + """ + if self._app is None: + raise ValueError(_NO_CELERY_APP_MSG) + + queue = self._get_queue(context.executor_name) + logger.info( + "Dispatching with callback: executor=%s " + "operation=%s run_id=%s request_id=%s " + "on_success=%s on_error=%s queue=%s", + context.executor_name, + context.operation, + context.run_id, + context.request_id, + on_success, + on_error, + queue, + ) + + send_kwargs: dict[str, Any] = { + "args": [context.to_dict()], + "queue": queue, + } + if on_success is not None: + send_kwargs["link"] = on_success + if on_error is not None: + send_kwargs["link_error"] = on_error + if task_id is not None: + send_kwargs["task_id"] = task_id + + async_result = self._app.send_task( + _TASK_NAME, + **send_kwargs, + ) + logger.info( + "Task sent with callbacks: celery_task_id=%s", + async_result.id, + ) + return async_result diff --git a/unstract/sdk1/src/unstract/sdk1/execution/executor.py b/unstract/sdk1/src/unstract/sdk1/execution/executor.py new file mode 100644 index 0000000000..142109945d --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/execution/executor.py @@ -0,0 +1,44 @@ +"""Base executor interface for the pluggable executor framework. + +All executors must subclass ``BaseExecutor`` and implement ``name`` +and ``execute``. Registration is handled by +``ExecutorRegistry.register``. +""" + +from abc import ABC, abstractmethod + +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.result import ExecutionResult + + +class BaseExecutor(ABC): + """Abstract base class for execution strategy implementations. + + Each executor encapsulates a particular extraction strategy + (e.g. the legacy promptservice pipeline, an agentic table + extractor, etc.). Executors are stateless — all request- + specific data arrives via ``ExecutionContext``. + """ + + @property + @abstractmethod + def name(self) -> str: + """Unique identifier used to look up this executor. + + Must match the ``executor_name`` value in + ``ExecutionContext``. Convention: lowercase, snake_case + (e.g. ``"legacy"``, ``"agentic_table"``). + """ + + @abstractmethod + def execute(self, context: ExecutionContext) -> ExecutionResult: + """Run the extraction strategy described by *context*. + + Args: + context: Fully-populated execution context with + operation type and executor params. + + Returns: + An ``ExecutionResult`` whose ``data`` dict conforms to + the response contract for the given operation. + """ diff --git a/unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py b/unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py new file mode 100644 index 0000000000..2c0f66f3bb --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py @@ -0,0 +1,77 @@ +"""Execution orchestrator for the executor worker. + +The orchestrator is the entry point called by the +``execute_extraction`` Celery task. It resolves the correct +executor from the registry and delegates execution, ensuring +that unhandled exceptions are always wrapped in a failed +``ExecutionResult``. +""" + +import logging +import time + +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + +logger = logging.getLogger(__name__) + + +class ExecutionOrchestrator: + """Looks up and invokes the executor for a given context. + + Usage (inside the Celery task):: + + orchestrator = ExecutionOrchestrator() + result = orchestrator.execute(context) + """ + + def execute(self, context: ExecutionContext) -> ExecutionResult: + """Resolve the executor and run it. + + Args: + context: Fully-populated execution context. + + Returns: + ``ExecutionResult`` — always, even on unhandled + exceptions (wrapped as a failure result). + """ + logger.info( + "Orchestrating execution: executor=%s operation=%s " + "run_id=%s request_id=%s", + context.executor_name, + context.operation, + context.run_id, + context.request_id, + ) + + start = time.monotonic() + try: + executor = ExecutorRegistry.get(context.executor_name) + except KeyError as exc: + logger.error("Executor lookup failed: %s", exc) + return ExecutionResult.failure(error=str(exc)) + + try: + result = executor.execute(context) + except Exception as exc: + elapsed = time.monotonic() - start + logger.exception( + "Executor %r raised an unhandled exception after %.2fs", + context.executor_name, + elapsed, + ) + return ExecutionResult.failure( + error=f"{type(exc).__name__}: {exc}", + metadata={"elapsed_seconds": round(elapsed, 3)}, + ) + + elapsed = time.monotonic() - start + logger.info( + "Execution completed: executor=%s operation=%s success=%s elapsed=%.2fs", + context.executor_name, + context.operation, + result.success, + elapsed, + ) + return result diff --git a/unstract/sdk1/src/unstract/sdk1/execution/registry.py b/unstract/sdk1/src/unstract/sdk1/execution/registry.py new file mode 100644 index 0000000000..c9ca1fee12 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/execution/registry.py @@ -0,0 +1,112 @@ +"""Executor registry for the pluggable executor framework. + +Provides a simple in-process registry where executor classes +self-register at import time via the ``@ExecutorRegistry.register`` +decorator. The executor worker imports all executor modules so +that registration happens before any task is processed. +""" + +import logging +from typing import TypeVar + +from unstract.sdk1.execution.executor import BaseExecutor + +logger = logging.getLogger(__name__) + +T = TypeVar("T", bound=type[BaseExecutor]) + + +class ExecutorRegistry: + """In-process registry mapping executor names to classes. + + Usage:: + + @ExecutorRegistry.register + class LegacyExecutor(BaseExecutor): + @property + def name(self) -> str: + return "legacy" + + ... + + + executor = ExecutorRegistry.get("legacy") + """ + + _registry: dict[str, type[BaseExecutor]] = {} + + @classmethod + def register(cls, executor_cls: T) -> T: + """Class decorator that registers an executor. + + Instantiates the class once to read its ``name`` property, + then stores the *class* (not the instance) so a fresh + instance is created per ``get()`` call. + + Args: + executor_cls: A concrete ``BaseExecutor`` subclass. + + Returns: + The same class, unmodified (passthrough decorator). + + Raises: + TypeError: If *executor_cls* is not a BaseExecutor + subclass. + ValueError: If an executor with the same name is + already registered. + """ + if not ( + isinstance(executor_cls, type) and issubclass(executor_cls, BaseExecutor) + ): + raise TypeError(f"{executor_cls!r} is not a BaseExecutor subclass") + + # Instantiate temporarily to read the name property + instance = executor_cls() + name = instance.name + + if name in cls._registry: + existing = cls._registry[name] + raise ValueError( + f"Executor name {name!r} is already registered " + f"by {existing.__name__}; cannot register " + f"{executor_cls.__name__}" + ) + + cls._registry[name] = executor_cls + logger.info( + "Registered executor %r (%s)", + name, + executor_cls.__name__, + ) + return executor_cls + + @classmethod + def get(cls, name: str) -> BaseExecutor: + """Look up and instantiate an executor by name. + + Args: + name: The executor name (e.g. ``"legacy"``). + + Returns: + A fresh ``BaseExecutor`` instance. + + Raises: + KeyError: If no executor is registered under *name*. + """ + executor_cls = cls._registry.get(name) + if executor_cls is None: + available = ", ".join(sorted(cls._registry)) or "(none)" + raise KeyError( + f"No executor registered with name {name!r}. Available: {available}" + ) + return executor_cls() + + @classmethod + def list_executors(cls) -> list[str]: + """Return sorted list of registered executor names.""" + return sorted(cls._registry) + + @classmethod + def clear(cls) -> None: + """Remove all registered executors (for testing).""" + cls._registry.clear() diff --git a/unstract/sdk1/src/unstract/sdk1/execution/result.py b/unstract/sdk1/src/unstract/sdk1/execution/result.py new file mode 100644 index 0000000000..6d425ef3c1 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/execution/result.py @@ -0,0 +1,73 @@ +"""Execution result model for the executor framework. + +Defines the standardized result returned by executors via the +Celery result backend. All executors must return an +``ExecutionResult`` so that callers (structure tool task, +PromptStudioHelper) have a uniform interface. +""" + +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class ExecutionResult: + """Standardized result from an executor. + + Returned via the Celery result backend as a JSON dict. + + Attributes: + success: Whether the execution completed without error. + data: Operation-specific output payload. The shape depends + on the operation (see response contract in the + migration plan). + metadata: Auxiliary information such as token usage, + timings, or adapter metrics. + error: Human-readable error message when ``success`` is + ``False``. ``None`` on success. + """ + + success: bool + data: dict[str, Any] = field(default_factory=dict) + metadata: dict[str, Any] = field(default_factory=dict) + error: str | None = None + + def __post_init__(self) -> None: + """Validate result consistency after initialization.""" + if not self.success and not self.error: + raise ValueError("error message is required when success is False") + if self.success and self.error: + raise ValueError("error must be None when success is True") + + def to_dict(self) -> dict[str, Any]: + """Serialize to a JSON-compatible dict for Celery.""" + result: dict[str, Any] = { + "success": self.success, + "data": self.data, + "metadata": self.metadata, + } + result["error"] = self.error + return result + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "ExecutionResult": + """Deserialize from a dict (e.g. Celery result backend).""" + return cls( + success=data["success"], + data=data.get("data", {}), + metadata=data.get("metadata", {}), + error=data.get("error"), + ) + + @classmethod + def failure( + cls, + error: str, + metadata: dict[str, Any] | None = None, + ) -> "ExecutionResult": + """Convenience factory for a failed result.""" + return cls( + success=False, + error=error, + metadata=metadata or {}, + ) diff --git a/unstract/sdk1/src/unstract/sdk1/file_storage/impl.py b/unstract/sdk1/src/unstract/sdk1/file_storage/impl.py index 9666c86524..24e35e6fcf 100644 --- a/unstract/sdk1/src/unstract/sdk1/file_storage/impl.py +++ b/unstract/sdk1/src/unstract/sdk1/file_storage/impl.py @@ -168,7 +168,33 @@ def rm(self, path: str, recursive: bool = True) -> None: Returns: NA """ - return self.fs.rm(path=path, recursive=recursive) + try: + return self.fs.rm(path=path, recursive=recursive) + except Exception as e: + if "MissingContentMD5" in str(e) and recursive: + logger.warning( + "Bulk delete failed with MissingContentMD5 for path: %s. " + "Falling back to individual file deletion.", + path, + ) + self._rm_files_individually(path) + else: + raise + + def _rm_files_individually(self, path: str) -> None: + """Fallback deletion: delete files one at a time. + + Used when bulk S3 DeleteObjects fails (e.g., MissingContentMD5 + with certain S3-compatible providers). + """ + files = self.fs.find(path) + for file_path in files: + self.fs.rm(file_path, recursive=False) + # Clean up the "directory" prefix if it still exists + try: + self.fs.rmdir(path) + except Exception: + pass # Directory prefix may already be gone @skip_local_cache def cp( diff --git a/unstract/sdk1/src/unstract/sdk1/platform.py b/unstract/sdk1/src/unstract/sdk1/platform.py index 2a3b75a97b..2d3d95616a 100644 --- a/unstract/sdk1/src/unstract/sdk1/platform.py +++ b/unstract/sdk1/src/unstract/sdk1/platform.py @@ -17,6 +17,7 @@ ) from unstract.sdk1.exceptions import SdkError from unstract.sdk1.tool.base import BaseTool +from unstract.sdk1.tool.stream import StreamMixin from unstract.sdk1.utils.common import Utils from unstract.sdk1.utils.retry_utils import retry_platform_service_call @@ -100,7 +101,7 @@ def is_public_adapter(cls: type[Self], adapter_id: str) -> bool: @retry_platform_service_call def _get_adapter_configuration( cls: type[Self], - tool: BaseTool, + tool: BaseTool | StreamMixin, adapter_instance_id: str, ) -> dict[str, Any]: """Get Adapter. @@ -144,8 +145,8 @@ def _get_adapter_configuration( adapter_data[Common.ADAPTER_NAME] = adapter_name # TODO: Print metadata after redacting sensitive information tool.stream_log( - f"Retrieved config for '{adapter_instance_id}', type: " - f"'{adapter_type}', provider: '{provider}', name: '{adapter_name}'", + f"Retrieved adapter config — name: '{adapter_name}', " + f"type: '{adapter_type}', provider: '{provider}'", level=LogLevel.DEBUG, ) except HTTPError as e: @@ -167,7 +168,7 @@ def _get_adapter_configuration( @classmethod def get_adapter_config( - cls: type[Self], tool: BaseTool, adapter_instance_id: str + cls: type[Self], tool: BaseTool | StreamMixin, adapter_instance_id: str ) -> dict[str, Any] | None: """Get adapter spec by the help of unstract DB tool. @@ -192,7 +193,7 @@ def get_adapter_config( return adapter_metadata tool.stream_log( - f"Retrieving config from DB for '{adapter_instance_id}'", + "Retrieving adapter configuration from platform service", level=LogLevel.DEBUG, ) diff --git a/unstract/sdk1/src/unstract/sdk1/utils/indexing.py b/unstract/sdk1/src/unstract/sdk1/utils/indexing.py index 5d3604294b..c477353763 100644 --- a/unstract/sdk1/src/unstract/sdk1/utils/indexing.py +++ b/unstract/sdk1/src/unstract/sdk1/utils/indexing.py @@ -2,7 +2,7 @@ from unstract.sdk1.file_storage import FileStorage, FileStorageProvider from unstract.sdk1.platform import PlatformHelper -from unstract.sdk1.tool.base import BaseTool +from unstract.sdk1.tool.stream import StreamMixin from unstract.sdk1.utils.common import Utils from unstract.sdk1.utils.tool import ToolUtils @@ -15,7 +15,7 @@ def generate_index_key( x2text: str, chunk_size: str, chunk_overlap: str, - tool: BaseTool, + tool: StreamMixin, file_path: str | None = None, file_hash: str | None = None, fs: FileStorage | None = None, diff --git a/unstract/sdk1/src/unstract/sdk1/vector_db.py b/unstract/sdk1/src/unstract/sdk1/vector_db.py index 9225fb8002..ac991e0dec 100644 --- a/unstract/sdk1/src/unstract/sdk1/vector_db.py +++ b/unstract/sdk1/src/unstract/sdk1/vector_db.py @@ -112,7 +112,7 @@ def _get_vector_db(self) -> BasePydanticVectorStore | VectorStore: except Exception as e: adapter_info = getattr(self, "_adapter_name", "") or self._adapter_instance_id self._tool.stream_log( - log=f"Unable to get vector_db '{adapter_info}': {e}", + log=f"Unable to get vector database: {e}", level=LogLevel.ERROR, ) raise VectorDBError( diff --git a/unstract/sdk1/tests/test_execution.py b/unstract/sdk1/tests/test_execution.py new file mode 100644 index 0000000000..3747345978 --- /dev/null +++ b/unstract/sdk1/tests/test_execution.py @@ -0,0 +1,1072 @@ +"""Unit tests for execution framework (Phase 1A–1G).""" + +import json +import logging +import os +import tempfile +from typing import Any, Self +from unittest.mock import MagicMock + +import pytest +from unstract.sdk1.constants import LogLevel, ToolEnv +from unstract.sdk1.exceptions import SdkError +from unstract.sdk1.execution.context import ( + ExecutionContext, + ExecutionSource, + Operation, +) +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + +_TEST_FILE_PATH = os.path.join(tempfile.mkdtemp(), "test.pdf") + + +class TestExecutionContext: + """Tests for ExecutionContext serialization and validation.""" + + def _make_context(self, **overrides: Any) -> ExecutionContext: # noqa: ANN401 + """Create a default ExecutionContext with optional overrides.""" + defaults: dict[str, Any] = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-001", + "execution_source": "tool", + "organization_id": "org-123", + "executor_params": {"file_path": _TEST_FILE_PATH}, + "request_id": "req-abc", + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + def test_round_trip_serialization(self: Self) -> None: + """to_dict -> from_dict produces identical context.""" + original = self._make_context() + restored = ExecutionContext.from_dict(original.to_dict()) + + assert restored.executor_name == original.executor_name + assert restored.operation == original.operation + assert restored.run_id == original.run_id + assert restored.execution_source == original.execution_source + assert restored.organization_id == original.organization_id + assert restored.executor_params == original.executor_params + assert restored.request_id == original.request_id + + def test_json_serializable(self: Self) -> None: + """to_dict output is JSON-serializable (Celery requirement).""" + ctx = self._make_context() + serialized = json.dumps(ctx.to_dict()) + deserialized = json.loads(serialized) + restored = ExecutionContext.from_dict(deserialized) + assert restored.executor_name == ctx.executor_name + + def test_enum_values_normalized(self: Self) -> None: + """Enum instances are normalized to plain strings.""" + ctx = self._make_context( + operation=Operation.ANSWER_PROMPT, + execution_source=ExecutionSource.IDE, + ) + assert ctx.operation == "answer_prompt" + assert ctx.execution_source == "ide" + # Also check dict output + d = ctx.to_dict() + assert d["operation"] == "answer_prompt" + assert d["execution_source"] == "ide" + + def test_string_values_accepted(self: Self) -> None: + """Plain string values work without enum coercion.""" + ctx = self._make_context( + operation="custom_op", + execution_source="tool", + ) + assert ctx.operation == "custom_op" + assert ctx.execution_source == "tool" + + def test_auto_generates_request_id(self: Self) -> None: + """request_id is generated when not provided.""" + ctx = self._make_context(request_id=None) + assert ctx.request_id is not None + assert len(ctx.request_id) > 0 + + def test_explicit_request_id_preserved(self: Self) -> None: + """Explicit request_id is not overwritten.""" + ctx = self._make_context(request_id="my-req-id") + assert ctx.request_id == "my-req-id" + + def test_optional_organization_id(self: Self) -> None: + """organization_id can be None (public calls).""" + ctx = self._make_context(organization_id=None) + assert ctx.organization_id is None + d = ctx.to_dict() + assert d["organization_id"] is None + restored = ExecutionContext.from_dict(d) + assert restored.organization_id is None + + def test_empty_executor_params_default(self: Self) -> None: + """executor_params defaults to empty dict.""" + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="run-001", + execution_source="tool", + ) + assert ctx.executor_params == {} + + def test_complex_executor_params(self: Self) -> None: + """Nested executor_params round-trip correctly.""" + params = { + "file_path": "/data/doc.pdf", + "outputs": [ + {"prompt_key": "p1", "llm": "adapter-1"}, + {"prompt_key": "p2", "llm": "adapter-2"}, + ], + "options": {"reindex": True, "chunk_size": 512}, + } + ctx = self._make_context(executor_params=params) + restored = ExecutionContext.from_dict(ctx.to_dict()) + assert restored.executor_params == params + + @pytest.mark.parametrize( + "field,value", + [ + ("executor_name", ""), + ("operation", ""), + ("run_id", ""), + ("execution_source", ""), + ], + ) + def test_validation_rejects_empty_required_fields( + self: Self, field: str, value: str + ) -> None: + """Empty required fields raise ValueError.""" + with pytest.raises(ValueError, match=f"{field} is required"): + self._make_context(**{field: value}) + + def test_all_operations_accepted(self: Self) -> None: + """All Operation enum values create valid contexts.""" + for op in Operation: + ctx = self._make_context(operation=op) + assert ctx.operation == op.value + + def test_from_dict_missing_optional_fields(self: Self) -> None: + """from_dict handles missing optional fields gracefully.""" + minimal = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-001", + "execution_source": "tool", + } + ctx = ExecutionContext.from_dict(minimal) + assert ctx.organization_id is None + assert ctx.executor_params == {} + # request_id is None from dict (no auto-gen in from_dict) + # but __post_init__ auto-generates it + assert ctx.request_id is not None + + +class TestExecutionResult: + """Tests for ExecutionResult serialization and validation.""" + + def test_success_round_trip(self: Self) -> None: + """Successful result round-trips through dict.""" + original = ExecutionResult( + success=True, + data={"output": {"key": "value"}, "metadata": {}}, + metadata={"tokens": 150, "latency_ms": 320}, + ) + restored = ExecutionResult.from_dict(original.to_dict()) + assert restored.success is True + assert restored.data == original.data + assert restored.metadata == original.metadata + assert restored.error is None + + def test_failure_round_trip(self: Self) -> None: + """Failed result round-trips through dict.""" + original = ExecutionResult( + success=False, + error="LLM adapter timeout", + metadata={"retry_count": 2}, + ) + restored = ExecutionResult.from_dict(original.to_dict()) + assert restored.success is False + assert restored.error == "LLM adapter timeout" + assert restored.data == {} + assert restored.metadata == {"retry_count": 2} + + def test_json_serializable(self: Self) -> None: + """to_dict output is JSON-serializable.""" + result = ExecutionResult( + success=True, + data={"extracted_text": "Hello world"}, + ) + serialized = json.dumps(result.to_dict()) + deserialized = json.loads(serialized) + restored = ExecutionResult.from_dict(deserialized) + assert restored.data == result.data + + def test_failure_requires_error_message(self: Self) -> None: + """success=False without error raises ValueError.""" + with pytest.raises( + ValueError, + match="error message is required", + ): + ExecutionResult(success=False) + + def test_success_allows_no_error(self: Self) -> None: + """success=True with no error is valid.""" + result = ExecutionResult(success=True) + assert result.error is None + + def test_success_rejects_error(self: Self) -> None: + """success=True with error string raises ValueError.""" + with pytest.raises(ValueError, match="error must be None when success is True"): + ExecutionResult(success=True, error="should not be here") + + def test_failure_factory(self: Self) -> None: + """ExecutionResult.failure() convenience constructor.""" + result = ExecutionResult.failure( + error="Something broke", + metadata={"debug": True}, + ) + assert result.success is False + assert result.error == "Something broke" + assert result.data == {} + assert result.metadata == {"debug": True} + + def test_failure_factory_no_metadata(self: Self) -> None: + """failure() works without metadata.""" + result = ExecutionResult.failure(error="Oops") + assert result.metadata == {} + + def test_error_none_in_success_dict(self: Self) -> None: + """Successful result dict always includes error key (None on success).""" + result = ExecutionResult(success=True, data={"k": "v"}) + d = result.to_dict() + assert "error" in d + assert d["error"] is None + + def test_error_in_failure_dict(self: Self) -> None: + """Failed result dict includes error key.""" + result = ExecutionResult.failure(error="fail") + d = result.to_dict() + assert d["error"] == "fail" + + def test_default_empty_dicts(self: Self) -> None: + """Data and metadata default to empty dicts.""" + result = ExecutionResult(success=True) + assert result.data == {} + assert result.metadata == {} + + def test_from_dict_missing_optional_fields(self: Self) -> None: + """from_dict handles missing optional fields.""" + minimal = {"success": True} + result = ExecutionResult.from_dict(minimal) + assert result.data == {} + assert result.metadata == {} + assert result.error is None + + def test_response_contract_extract(self: Self) -> None: + """Verify extract operation response shape.""" + result = ExecutionResult( + success=True, + data={"extracted_text": "The quick brown fox"}, + ) + assert "extracted_text" in result.data + + def test_response_contract_index(self: Self) -> None: + """Verify index operation response shape.""" + result = ExecutionResult( + success=True, + data={"doc_id": "doc-abc-123"}, + ) + assert "doc_id" in result.data + + def test_response_contract_answer_prompt(self: Self) -> None: + """Verify answer_prompt operation response shape.""" + result = ExecutionResult( + success=True, + data={ + "output": {"field1": "value1"}, + "metadata": {"confidence": 0.95}, + "metrics": {"tokens": 200}, + }, + ) + assert "output" in result.data + assert "metadata" in result.data + assert "metrics" in result.data + + +# ---- Phase 1B: BaseExecutor & ExecutorRegistry ---- + + +def _make_executor_class( + executor_name: str, +) -> type[BaseExecutor]: + """Helper: build a concrete BaseExecutor subclass dynamically.""" + + class _Executor(BaseExecutor): + @property + def name(self) -> str: + return executor_name + + def execute(self, context: ExecutionContext) -> ExecutionResult: + return ExecutionResult( + success=True, + data={"echo": context.operation}, + ) + + # Give it a readable __name__ for error messages + _Executor.__name__ = f"{executor_name.title()}Executor" + _Executor.__qualname__ = _Executor.__name__ + return _Executor + + +class TestBaseExecutor: + """Tests for BaseExecutor ABC contract.""" + + def test_cannot_instantiate_abstract(self: Self) -> None: + """BaseExecutor itself cannot be instantiated.""" + with pytest.raises(TypeError): + BaseExecutor() # type: ignore[abstract] + + def test_concrete_subclass_works(self: Self) -> None: + """A properly implemented subclass can be instantiated.""" + cls = _make_executor_class("test_abc") + instance = cls() + assert instance.name == "test_abc" + + def test_execute_returns_result(self: Self) -> None: + """execute() returns an ExecutionResult.""" + cls = _make_executor_class("test_exec") + instance = cls() + ctx = ExecutionContext( + executor_name="test_exec", + operation="extract", + run_id="run-1", + execution_source="tool", + ) + result = instance.execute(ctx) + assert isinstance(result, ExecutionResult) + assert result.success is True + assert result.data == {"echo": "extract"} + + +class TestExecutorRegistry: + """Tests for ExecutorRegistry.""" + + @pytest.fixture(autouse=True) + def _clean_registry(self: Self) -> None: + """Ensure a clean registry for every test.""" + ExecutorRegistry.clear() + + def test_register_and_get(self: Self) -> None: + """Register an executor and retrieve by name.""" + cls = _make_executor_class("alpha") + ExecutorRegistry.register(cls) + + executor = ExecutorRegistry.get("alpha") + assert isinstance(executor, BaseExecutor) + assert executor.name == "alpha" + + def test_get_returns_fresh_instance(self: Self) -> None: + """Each get() call returns a new instance.""" + cls = _make_executor_class("fresh") + ExecutorRegistry.register(cls) + + a = ExecutorRegistry.get("fresh") + b = ExecutorRegistry.get("fresh") + assert a is not b + + def test_register_as_decorator(self: Self) -> None: + """@ExecutorRegistry.register works as a class decorator.""" + + @ExecutorRegistry.register + class MyExecutor(BaseExecutor): + @property + def name(self) -> str: + return "decorated" + + def execute(self, context: ExecutionContext) -> ExecutionResult: + return ExecutionResult(success=True) + + executor = ExecutorRegistry.get("decorated") + assert executor.name == "decorated" + assert "decorated" in ExecutorRegistry.list_executors() + + def test_list_executors(self: Self) -> None: + """list_executors() returns sorted names.""" + ExecutorRegistry.register(_make_executor_class("charlie")) + ExecutorRegistry.register(_make_executor_class("alpha")) + ExecutorRegistry.register(_make_executor_class("bravo")) + + assert ExecutorRegistry.list_executors() == [ + "alpha", + "bravo", + "charlie", + ] + + def test_list_executors_empty(self: Self) -> None: + """list_executors() returns empty list when nothing registered.""" + assert ExecutorRegistry.list_executors() == [] + + def test_get_unknown_raises_key_error(self: Self) -> None: + """get() with unknown name raises KeyError.""" + with pytest.raises(KeyError, match="no_such_executor"): + ExecutorRegistry.get("no_such_executor") + + def test_get_unknown_lists_available(self: Self) -> None: + """KeyError message includes available executor names.""" + ExecutorRegistry.register(_make_executor_class("one")) + ExecutorRegistry.register(_make_executor_class("two")) + + with pytest.raises(KeyError, match="one") as exc_info: + ExecutorRegistry.get("missing") + assert "two" in str(exc_info.value) + + def test_duplicate_name_raises_value_error(self: Self) -> None: + """Registering two executors with the same name fails.""" + ExecutorRegistry.register(_make_executor_class("dup")) + with pytest.raises(ValueError, match="already registered"): + ExecutorRegistry.register(_make_executor_class("dup")) + + def test_register_non_subclass_raises_type_error(self: Self) -> None: + """Registering a non-BaseExecutor class raises TypeError.""" + with pytest.raises(TypeError, match="not a BaseExecutor"): + ExecutorRegistry.register(dict) # type: ignore[arg-type] + + def test_register_non_class_raises_type_error(self: Self) -> None: + """Registering a non-class object raises TypeError.""" + with pytest.raises(TypeError, match="not a BaseExecutor"): + ExecutorRegistry.register("not_a_class") # type: ignore[arg-type] + + def test_clear(self: Self) -> None: + """clear() removes all registrations.""" + ExecutorRegistry.register(_make_executor_class("temp")) + assert ExecutorRegistry.list_executors() == ["temp"] + ExecutorRegistry.clear() + assert ExecutorRegistry.list_executors() == [] + + def test_execute_through_registry(self: Self) -> None: + """End-to-end: register, get, execute.""" + ExecutorRegistry.register(_make_executor_class("e2e")) + + ctx = ExecutionContext( + executor_name="e2e", + operation="index", + run_id="run-42", + execution_source="ide", + ) + executor = ExecutorRegistry.get("e2e") + result = executor.execute(ctx) + + assert result.success is True + assert result.data == {"echo": "index"} + + +# ---- Phase 1C: ExecutionOrchestrator ---- + + +def _make_failing_executor_class( + executor_name: str, + exc: BaseException, +) -> type[BaseExecutor]: + """Build an executor that always raises *exc*.""" + + class _FailExecutor(BaseExecutor): + @property + def name(self) -> str: + return executor_name + + def execute(self, context: ExecutionContext) -> ExecutionResult: + raise exc # NOSONAR + + _FailExecutor.__name__ = f"{executor_name.title()}FailExecutor" + _FailExecutor.__qualname__ = _FailExecutor.__name__ + return _FailExecutor + + +class TestExecutionOrchestrator: + """Tests for ExecutionOrchestrator.""" + + @pytest.fixture(autouse=True) + def _clean_registry(self: Self) -> None: + """Ensure a clean registry for every test.""" + ExecutorRegistry.clear() + + def _make_context(self, **overrides: Any) -> ExecutionContext: # noqa: ANN401 + defaults: dict[str, Any] = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-1", + "execution_source": "tool", + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + def test_dispatches_to_correct_executor(self: Self) -> None: + """Orchestrator routes to the right executor by name.""" + ExecutorRegistry.register(_make_executor_class("alpha")) + ExecutorRegistry.register(_make_executor_class("bravo")) + + orchestrator = ExecutionOrchestrator() + + result_a = orchestrator.execute( + self._make_context(executor_name="alpha", operation="extract") + ) + assert result_a.success is True + assert result_a.data == {"echo": "extract"} + + result_b = orchestrator.execute( + self._make_context(executor_name="bravo", operation="index") + ) + assert result_b.success is True + assert result_b.data == {"echo": "index"} + + def test_unknown_executor_returns_failure(self: Self) -> None: + """Unknown executor_name yields a failure result (not exception).""" + orchestrator = ExecutionOrchestrator() + result = orchestrator.execute(self._make_context(executor_name="nonexistent")) + assert result.success is False + assert "nonexistent" in result.error + + def test_executor_exception_returns_failure(self: Self) -> None: + """Unhandled executor exception is wrapped in failure result.""" + ExecutorRegistry.register( + _make_failing_executor_class("boom", RuntimeError("kaboom")) + ) + orchestrator = ExecutionOrchestrator() + result = orchestrator.execute(self._make_context(executor_name="boom")) + assert result.success is False + assert "RuntimeError" in result.error + assert "kaboom" in result.error + + def test_exception_result_has_elapsed_metadata(self: Self) -> None: + """Failure from exception includes elapsed_seconds metadata.""" + ExecutorRegistry.register( + _make_failing_executor_class("slow_fail", ValueError("bad input")) + ) + orchestrator = ExecutionOrchestrator() + result = orchestrator.execute(self._make_context(executor_name="slow_fail")) + assert result.success is False + assert "elapsed_seconds" in result.metadata + assert isinstance(result.metadata["elapsed_seconds"], float) + + def test_successful_result_passed_through(self: Self) -> None: + """Orchestrator returns the executor's result as-is on success.""" + ExecutorRegistry.register(_make_executor_class("passthru")) + orchestrator = ExecutionOrchestrator() + + ctx = self._make_context(executor_name="passthru", operation="answer_prompt") + result = orchestrator.execute(ctx) + + assert result.success is True + assert result.data == {"echo": "answer_prompt"} + + def test_executor_returning_failure_is_not_wrapped( + self: Self, + ) -> None: + """An executor that returns failure result is passed through.""" + + class FailingExecutor(BaseExecutor): + @property + def name(self) -> str: + return "graceful_fail" + + def execute(self, context: ExecutionContext) -> ExecutionResult: + return ExecutionResult.failure(error="LLM rate limited") + + ExecutorRegistry.register(FailingExecutor) + orchestrator = ExecutionOrchestrator() + + result = orchestrator.execute(self._make_context(executor_name="graceful_fail")) + assert result.success is False + assert result.error == "LLM rate limited" + + +# ---- Phase 1F: ExecutionDispatcher ---- + + +class TestExecutionDispatcher: + """Tests for ExecutionDispatcher (mocked Celery).""" + + def _make_context(self, **overrides: Any) -> ExecutionContext: # noqa: ANN401 + defaults: dict[str, Any] = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-1", + "execution_source": "tool", + "request_id": "req-1", + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + def _make_mock_app( + self, + result_dict: dict[str, Any] | None = None, + side_effect: Exception | None = None, + task_id: str = "celery-task-123", + ) -> MagicMock: + """Create a mock Celery app with send_task configured.""" + mock_app = MagicMock() + mock_async_result = MagicMock() + mock_async_result.id = task_id + + if side_effect is not None: + mock_async_result.get.side_effect = side_effect + else: + mock_async_result.get.return_value = ( + result_dict + if result_dict is not None + else {"success": True, "data": {}, "metadata": {}} + ) + + mock_app.send_task.return_value = mock_async_result + return mock_app + + def test_dispatch_sends_task_and_returns_result( + self: Self, + ) -> None: + """dispatch() sends task to executor queue and returns result.""" + result_dict = { + "success": True, + "data": {"extracted_text": "hello"}, + "metadata": {}, + } + mock_app = self._make_mock_app(result_dict=result_dict) + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + result = dispatcher.dispatch(ctx, timeout=60) + + assert result.success is True + assert result.data == {"extracted_text": "hello"} + + # Verify send_task was called correctly + mock_app.send_task.assert_called_once_with( + "execute_extraction", + args=[ctx.to_dict()], + queue="celery_executor_legacy", + ) + mock_app.send_task.return_value.get.assert_called_once_with( + timeout=60, disable_sync_subtasks=False + ) + + def test_dispatch_uses_default_timeout(self: Self) -> None: + """dispatch() without timeout uses default (3600s).""" + mock_app = self._make_mock_app() + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + dispatcher.dispatch(ctx) + + mock_app.send_task.return_value.get.assert_called_once_with( + timeout=3600, disable_sync_subtasks=False + ) + + def test_dispatch_timeout_from_env( + self: Self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """dispatch() reads timeout from EXECUTOR_RESULT_TIMEOUT env.""" + monkeypatch.setenv("EXECUTOR_RESULT_TIMEOUT", "120") + mock_app = self._make_mock_app() + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + dispatcher.dispatch(ctx) + + mock_app.send_task.return_value.get.assert_called_once_with( + timeout=120, disable_sync_subtasks=False + ) + + def test_dispatch_explicit_timeout_overrides_env( + self: Self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Explicit timeout parameter overrides env var.""" + monkeypatch.setenv("EXECUTOR_RESULT_TIMEOUT", "120") + mock_app = self._make_mock_app() + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + dispatcher.dispatch(ctx, timeout=30) + + mock_app.send_task.return_value.get.assert_called_once_with( + timeout=30, disable_sync_subtasks=False + ) + + def test_dispatch_timeout_returns_failure( + self: Self, + ) -> None: + """TimeoutError from AsyncResult.get() is wrapped in failure.""" + mock_app = self._make_mock_app(side_effect=TimeoutError("Task timed out")) + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + result = dispatcher.dispatch(ctx, timeout=1) + + assert result.success is False + assert "TimeoutError" in result.error + + def test_dispatch_generic_exception_returns_failure( + self: Self, + ) -> None: + """Any exception from AsyncResult.get() becomes a failure.""" + mock_app = self._make_mock_app(side_effect=RuntimeError("broker down")) + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + result = dispatcher.dispatch(ctx, timeout=10) + + assert result.success is False + assert "RuntimeError" in result.error + assert "broker down" in result.error + + def test_dispatch_async_returns_task_id(self: Self) -> None: + """dispatch_async() returns the Celery task ID.""" + mock_app = self._make_mock_app(task_id="task-xyz-789") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + task_id = dispatcher.dispatch_async(ctx) + + assert task_id == "task-xyz-789" + mock_app.send_task.assert_called_once_with( + "execute_extraction", + args=[ctx.to_dict()], + queue="celery_executor_legacy", + ) + + def test_dispatch_no_app_raises_value_error( + self: Self, + ) -> None: + """dispatch() without celery_app raises ValueError.""" + dispatcher = ExecutionDispatcher(celery_app=None) + ctx = self._make_context() + + with pytest.raises(ValueError, match="No Celery app"): + dispatcher.dispatch(ctx) + + def test_dispatch_async_no_app_raises_value_error( + self: Self, + ) -> None: + """dispatch_async() without celery_app raises ValueError.""" + dispatcher = ExecutionDispatcher(celery_app=None) + ctx = self._make_context() + + with pytest.raises(ValueError, match="No Celery app"): + dispatcher.dispatch_async(ctx) + + def test_dispatch_failure_result_from_executor( + self: Self, + ) -> None: + """Executor failure is deserialized correctly.""" + result_dict = { + "success": False, + "data": {}, + "metadata": {}, + "error": "LLM adapter timeout", + } + mock_app = self._make_mock_app(result_dict=result_dict) + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + result = dispatcher.dispatch(ctx, timeout=60) + + assert result.success is False + assert result.error == "LLM adapter timeout" + + def test_dispatch_context_serialized_correctly( + self: Self, + ) -> None: + """The full ExecutionContext is serialized in the task args.""" + mock_app = self._make_mock_app() + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context( + executor_name="agentic_table", + operation="agentic_extraction", + organization_id="org-42", + executor_params={"schema": {"name": "str"}}, + ) + + dispatcher.dispatch(ctx, timeout=60) + + sent_args = mock_app.send_task.call_args + context_dict = sent_args[1]["args"][0] + + assert context_dict["executor_name"] == "agentic_table" + assert context_dict["operation"] == "agentic_extraction" + assert context_dict["organization_id"] == "org-42" + assert context_dict["executor_params"] == {"schema": {"name": "str"}} + + # ---- Phase 5A: dispatch_with_callback ---- + + def test_dispatch_with_callback_sends_link_and_link_error( + self: Self, + ) -> None: + """dispatch_with_callback() passes on_success as link, on_error as link_error.""" + mock_app = self._make_mock_app(task_id="cb-task-001") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + on_success = MagicMock(name="on_success_sig") + on_error = MagicMock(name="on_error_sig") + + result = dispatcher.dispatch_with_callback( + ctx, on_success=on_success, on_error=on_error + ) + + assert result.id == "cb-task-001" + mock_app.send_task.assert_called_once_with( + "execute_extraction", + args=[ctx.to_dict()], + queue="celery_executor_legacy", + link=on_success, + link_error=on_error, + ) + + def test_dispatch_with_callback_success_only( + self: Self, + ) -> None: + """dispatch_with_callback() with only on_success omits link_error.""" + mock_app = self._make_mock_app(task_id="cb-task-002") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + on_success = MagicMock(name="on_success_sig") + + dispatcher.dispatch_with_callback(ctx, on_success=on_success) + + call_kwargs = mock_app.send_task.call_args + assert call_kwargs[1]["link"] is on_success + assert "link_error" not in call_kwargs[1] + + def test_dispatch_with_callback_error_only( + self: Self, + ) -> None: + """dispatch_with_callback() with only on_error omits link.""" + mock_app = self._make_mock_app(task_id="cb-task-003") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + on_error = MagicMock(name="on_error_sig") + + dispatcher.dispatch_with_callback(ctx, on_error=on_error) + + call_kwargs = mock_app.send_task.call_args + assert "link" not in call_kwargs[1] + assert call_kwargs[1]["link_error"] is on_error + + def test_dispatch_with_callback_no_callbacks( + self: Self, + ) -> None: + """dispatch_with_callback() with no callbacks sends plain task.""" + mock_app = self._make_mock_app(task_id="cb-task-004") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + result = dispatcher.dispatch_with_callback(ctx) + + assert result.id == "cb-task-004" + call_kwargs = mock_app.send_task.call_args + assert "link" not in call_kwargs[1] + assert "link_error" not in call_kwargs[1] + + def test_dispatch_with_callback_returns_async_result( + self: Self, + ) -> None: + """dispatch_with_callback() returns the AsyncResult object (not just task_id).""" + mock_app = self._make_mock_app(task_id="cb-task-005") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + result = dispatcher.dispatch_with_callback(ctx) + + # Returns the full AsyncResult, not just the id string + assert result is mock_app.send_task.return_value + assert result.id == "cb-task-005" + + def test_dispatch_with_callback_no_app_raises_value_error( + self: Self, + ) -> None: + """dispatch_with_callback() without celery_app raises ValueError.""" + dispatcher = ExecutionDispatcher(celery_app=None) + ctx = self._make_context() + + with pytest.raises(ValueError, match="No Celery app"): + dispatcher.dispatch_with_callback(ctx) + + def test_dispatch_with_callback_context_serialized( + self: Self, + ) -> None: + """dispatch_with_callback() serializes context correctly.""" + mock_app = self._make_mock_app() + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context( + operation="answer_prompt", + executor_params={"prompt_key": "p1"}, + ) + + dispatcher.dispatch_with_callback(ctx, on_success=MagicMock()) + + sent_args = mock_app.send_task.call_args + context_dict = sent_args[1]["args"][0] + assert context_dict["operation"] == "answer_prompt" + assert context_dict["executor_params"] == {"prompt_key": "p1"} + + def test_dispatch_with_callback_custom_task_id( + self: Self, + ) -> None: + """dispatch_with_callback() passes custom task_id to send_task.""" + mock_app = self._make_mock_app(task_id="pre-gen-id-123") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + dispatcher.dispatch_with_callback(ctx, task_id="pre-gen-id-123") + + call_kwargs = mock_app.send_task.call_args + assert call_kwargs[1]["task_id"] == "pre-gen-id-123" + + def test_dispatch_with_callback_no_task_id_omits_kwarg( + self: Self, + ) -> None: + """dispatch_with_callback() without task_id doesn't pass task_id.""" + mock_app = self._make_mock_app() + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + dispatcher.dispatch_with_callback(ctx) + + call_kwargs = mock_app.send_task.call_args + assert "task_id" not in call_kwargs[1] + + +# ---- Phase 1G: ExecutorToolShim ---- +# Note: ExecutorToolShim lives in workers/executor/ but the tests +# import it directly via sys.path manipulation since the workers +# package requires Celery (not installed in SDK1 test venv). +# We test the shim's logic here by importing its direct dependencies +# from SDK1 (StreamMixin, SdkError, LogLevel, ToolEnv). + + +class _MockExecutorToolShim: + """In-test replica of ExecutorToolShim for SDK1 test isolation. + + The real ExecutorToolShim lives in workers/executor/ and cannot + be imported here (Celery not in SDK1 venv). This replica + mirrors the same logic so we can verify the behavior contract + without importing the workers package. + """ + + def __init__(self, platform_api_key: str = "") -> None: + self.platform_api_key = platform_api_key + + def get_env_or_die(self, env_key: str) -> str: + import os + + if env_key == ToolEnv.PLATFORM_API_KEY: + if not self.platform_api_key: + raise SdkError(f"Env variable '{env_key}' is required") + return self.platform_api_key + + env_value = os.environ.get(env_key) + if env_value is None or env_value == "": + raise SdkError(f"Env variable '{env_key}' is required") + return env_value + + def stream_log( + self, + log: str, + level: LogLevel = LogLevel.INFO, + stage: str = "TOOL_RUN", + **kwargs: Any, # noqa: ANN401 + ) -> None: + _level_map = { + LogLevel.DEBUG: logging.DEBUG, + LogLevel.INFO: logging.INFO, + LogLevel.WARN: logging.WARNING, + LogLevel.ERROR: logging.ERROR, + LogLevel.FATAL: logging.CRITICAL, + } + py_level = _level_map.get(level, logging.INFO) + logging.getLogger("executor_tool_shim").log(py_level, log) + + def stream_error_and_exit(self, message: str, err: Exception | None = None) -> None: + raise SdkError(message, actual_err=err) + + +class TestExecutorToolShim: + """Tests for ExecutorToolShim behavior contract.""" + + def test_platform_api_key_returned(self: Self) -> None: + """get_env_or_die('PLATFORM_SERVICE_API_KEY') returns configured key.""" + shim = _MockExecutorToolShim(platform_api_key="sk-test-123") + result = shim.get_env_or_die(ToolEnv.PLATFORM_API_KEY) + assert result == "sk-test-123" + + def test_platform_api_key_missing_raises(self: Self) -> None: + """get_env_or_die('PLATFORM_SERVICE_API_KEY') raises when not configured.""" + shim = _MockExecutorToolShim(platform_api_key="") + with pytest.raises(SdkError, match="PLATFORM_SERVICE_API_KEY"): + shim.get_env_or_die(ToolEnv.PLATFORM_API_KEY) + + def test_other_env_var_from_environ( + self: Self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """get_env_or_die() reads non-platform vars from os.environ.""" + monkeypatch.setenv("MY_CUSTOM_VAR", "custom_value") + shim = _MockExecutorToolShim(platform_api_key="sk-test") + result = shim.get_env_or_die("MY_CUSTOM_VAR") + assert result == "custom_value" + + def test_missing_env_var_raises(self: Self) -> None: + """get_env_or_die() raises SdkError for missing env var.""" + shim = _MockExecutorToolShim(platform_api_key="sk-test") + with pytest.raises(SdkError, match="NONEXISTENT_VAR"): + shim.get_env_or_die("NONEXISTENT_VAR") + + def test_empty_env_var_raises(self: Self, monkeypatch: pytest.MonkeyPatch) -> None: + """get_env_or_die() raises SdkError for empty env var.""" + monkeypatch.setenv("EMPTY_VAR", "") + shim = _MockExecutorToolShim(platform_api_key="sk-test") + with pytest.raises(SdkError, match="EMPTY_VAR"): + shim.get_env_or_die("EMPTY_VAR") + + def test_stream_log_routes_to_logging( + self: Self, caplog: pytest.LogCaptureFixture + ) -> None: + """stream_log() routes to Python logging, not stdout.""" + shim = _MockExecutorToolShim() + with caplog.at_level(logging.INFO, logger="executor_tool_shim"): + shim.stream_log("test message", level=LogLevel.INFO) + assert "test message" in caplog.text + + def test_stream_log_respects_level( + self: Self, caplog: pytest.LogCaptureFixture + ) -> None: + """stream_log() maps SDK LogLevel to Python logging level.""" + shim = _MockExecutorToolShim() + with caplog.at_level(logging.WARNING, logger="executor_tool_shim"): + shim.stream_log("debug msg", level=LogLevel.DEBUG) + shim.stream_log("warn msg", level=LogLevel.WARN) + # DEBUG should be filtered out at WARNING level + assert "debug msg" not in caplog.text + assert "warn msg" in caplog.text + + def test_stream_error_and_exit_raises_sdk_error( + self: Self, + ) -> None: + """stream_error_and_exit() raises SdkError (no sys.exit).""" + shim = _MockExecutorToolShim() + with pytest.raises(SdkError, match="something failed"): + shim.stream_error_and_exit("something failed") + + def test_stream_error_and_exit_wraps_original( + self: Self, + ) -> None: + """stream_error_and_exit() passes original exception.""" + shim = _MockExecutorToolShim() + original = ValueError("root cause") + with pytest.raises(SdkError) as exc_info: + shim.stream_error_and_exit("wrapper msg", err=original) + assert exc_info.value.actual_err is original diff --git a/unstract/sdk1/uv.lock b/unstract/sdk1/uv.lock index 935fca31d8..57561cc209 100644 --- a/unstract/sdk1/uv.lock +++ b/unstract/sdk1/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = "==3.12.*" [[package]] diff --git a/uv.lock b/uv.lock index e275f07da1..989ee7f1f2 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = "==3.12.*" [[package]] @@ -286,9 +286,9 @@ dependencies = [ { name = "platformdirs" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/47/5d/54c79aaaa9aa1278af24cae98d81d6ef635ad840f046bc2ccb5041ddeb1b/banks-2.4.1.tar.gz", hash = "sha256:8cbf1553f14c44d4f7e9c2064ad9212ce53ee4da000b2f8308d548b60db56655", size = 188033, upload-time = "2026-02-17T11:21:14.855Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/f8/25ef24814f77f3fd7f0fd3bd1ef3749e38a9dbd23502fbb53034de49900c/banks-2.2.0.tar.gz", hash = "sha256:d1446280ce6e00301e3e952dd754fd8cee23ff277d29ed160994a84d0d7ffe62", size = 179052, upload-time = "2025-07-18T16:28:26.892Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/5a/f38b49e8b225b0c774e97c9495e52ab9ccdf6d82bde68c513bd736820eb2/banks-2.4.1-py3-none-any.whl", hash = "sha256:40e6d9b6e9b69fb403fa31f2853b3297e4919c1b6f2179b2119d2d4473c6ed13", size = 35032, upload-time = "2026-02-17T11:21:13.236Z" }, + { url = "https://files.pythonhosted.org/packages/b4/d6/f9168956276934162ec8d48232f9920f2985ee45aa7602e3c6b4bc203613/banks-2.2.0-py3-none-any.whl", hash = "sha256:963cd5c85a587b122abde4f4064078def35c50c688c1b9d36f43c92503854e7d", size = 29244, upload-time = "2025-07-18T16:28:27.835Z" }, ] [[package]] @@ -1610,9 +1610,9 @@ dependencies = [ { name = "llama-index-readers-llama-parse" }, { name = "nltk" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e5/43/440cbd852b9372fd392cc81f72df75f17d6dfbe93a427c5911a3400ea168/llama_index-0.14.16.tar.gz", hash = "sha256:266c9b066f2eaee584188bbdb440ed4fd9ad41694c6c9c55c5f15e55eb9dcbc2", size = 9048, upload-time = "2026-03-10T19:20:29.96Z" } +sdist = { url = "https://files.pythonhosted.org/packages/40/27/5fffc57b98e753eff580184b6260b47d8d2fff4fc91edf75352402f33881/llama_index-0.13.2.tar.gz", hash = "sha256:110e5e8e077aab7643eecb0962bcdb927bdea6a2c9897606b4b26e498d93dd5b", size = 8029, upload-time = "2025-08-14T22:04:03.732Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/50/98/12ff971b3b5f4f82153adb7f5528a276f42aafcdd6193392c43998dd32de/llama_index-0.14.16-py3-none-any.whl", hash = "sha256:cb98fece42d485f52ca847d3d16af61984fdeb7f4c0793a069357ac6eb8293ce", size = 7847, upload-time = "2026-03-10T19:20:31.151Z" }, + { url = "https://files.pythonhosted.org/packages/5c/3a/de1a7d6cf24c41082464fa4bda82dba014acee0f438ef0cec606ba43ed28/llama_index-0.13.2-py3-none-any.whl", hash = "sha256:8de8eefffcfa64a9225267d7813fcb55b8ea12181d4044efe5b22642d91d2294", size = 7027, upload-time = "2025-08-14T22:04:02.408Z" }, ] [[package]] @@ -1624,9 +1624,9 @@ dependencies = [ { name = "llama-index-embeddings-openai" }, { name = "llama-index-llms-openai" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3e/66/90747a02fa9f4e9503da40259d18105f75a02b3f3b6b722faf0502d8b40d/llama_index_cli-0.5.5.tar.gz", hash = "sha256:a2de5a22f675f60908c8cd1fd873f132cf2bfdf3462fa79ef5fbe6b95727a30b", size = 24852, upload-time = "2026-03-04T23:00:55.646Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d2/e3/ac6928586e20cfd327a2a38a00781cbc8fae923edcd0316c23e38aae1537/llama_index_cli-0.5.1.tar.gz", hash = "sha256:0446159d85c56c29022c1c830c9886f670d5f59d69343c3c029a3b20eda1a9d8", size = 24821, upload-time = "2025-09-12T15:22:44.064Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fc/ef/ca63ce9ae26de1c64fcf1876d67ee7996cecf6127f43a49c1e4a485d806c/llama_index_cli-0.5.5-py3-none-any.whl", hash = "sha256:ac041aa61c2e194266a07fea617500a063f389af7dd6ae02f8cd3f1f7644d06d", size = 28210, upload-time = "2026-03-04T23:00:54.696Z" }, + { url = "https://files.pythonhosted.org/packages/b3/16/b53af5b23921d1e18f57b7a79d557b34554df295c63f5c59d5bee1f5fb47/llama_index_cli-0.5.1-py3-none-any.whl", hash = "sha256:5429b2fd7960df7724c2955b6e6901f6fa910b7b5ecef411c979a8b545a6b7e2", size = 28179, upload-time = "2025-09-12T15:22:43.169Z" }, ] [[package]] @@ -1663,9 +1663,9 @@ dependencies = [ { name = "typing-inspect" }, { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/13/cb/1d7383f9f4520bb1d921c34f18c147b4b270007135212cedfa240edcd4c3/llama_index_core-0.14.16.tar.gz", hash = "sha256:cf2b7e4b798cb5ebad19c935174c200595c7ecff84a83793540cc27b03636a52", size = 11599715, upload-time = "2026-03-10T19:19:52.476Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2d/f8/4f6e2bbc34ec6586456727a644960a1ff2d9db60b92071e213ad9d160456/llama_index_core-0.13.6.tar.gz", hash = "sha256:80315a6bd1f9804f48c1870eff1a0315bf9fe5a413747d53eb88a8ebb2602b97", size = 7232179, upload-time = "2025-09-07T03:27:26.544Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b4/f5/a33839bae0bd07e4030969bdba1ac90665e359ae88c56c296991ae16b8a8/llama_index_core-0.14.16-py3-none-any.whl", hash = "sha256:0cc273ebc44d51ad636217661a25f9cd02fb2d0440641430f105da3ae9f43a6b", size = 11944927, upload-time = "2026-03-10T19:19:48.043Z" }, + { url = "https://files.pythonhosted.org/packages/35/23/7e497216ece6e041c6a271f2b7952e5609729da0dcdf09dd3f25a4efc1b9/llama_index_core-0.13.6-py3-none-any.whl", hash = "sha256:67bec3c06a8105cd82d83db0f8c3122f4e4d8a4b9c7a2768cced6a2686ddb331", size = 7575324, upload-time = "2025-09-07T03:27:19.243Z" }, ] [[package]] @@ -1676,9 +1676,9 @@ dependencies = [ { name = "llama-index-core" }, { name = "openai" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ea/a1/d238dfa453ba8ebc4f6261d6384b663f50b8dba6f4b22d8be800b305863d/llama_index_embeddings_openai-0.5.2.tar.gz", hash = "sha256:091bd0c3e9182748e8827de7d79713a219d5f5e0dc97d1bb7b271cf524520e4b", size = 7630, upload-time = "2026-03-03T11:27:38.127Z" } +sdist = { url = "https://files.pythonhosted.org/packages/26/6a/80ed46993c6827786cdec4f6b553f3f4e5fc8741c31e8903c694833d24bf/llama_index_embeddings_openai-0.5.0.tar.gz", hash = "sha256:ac587839a111089ea8a6255f9214016d7a813b383bbbbf9207799be1100758eb", size = 7019, upload-time = "2025-07-30T19:55:05.699Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1f/5e/da156f9c77443d22287eeaea341fe35fdcc25e59a9250e4cb10d4d5a066a/llama_index_embeddings_openai-0.5.2-py3-none-any.whl", hash = "sha256:37e7967de05b05f16c9b171091110bb1c6e5a0720198ea306d57cd3920cb81b7", size = 7667, upload-time = "2026-03-03T11:27:37.394Z" }, + { url = "https://files.pythonhosted.org/packages/01/21/65f13a385292d7d573dfde472da7daff5f779345d60c5c3e274142ec8ba2/llama_index_embeddings_openai-0.5.0-py3-none-any.whl", hash = "sha256:d817edb22e3ff475e8cd1833faf1147028986bc1d688f7894ef947558864b728", size = 7009, upload-time = "2025-07-30T19:55:04.86Z" }, ] [[package]] @@ -1703,9 +1703,9 @@ dependencies = [ { name = "deprecated" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0b/45/293b89d330a989e444ade307252c190b497142761f1a6a8f20b300fefeb2/llama_index_instrumentation-0.4.3.tar.gz", hash = "sha256:6a8bd34b0c2fb9485971f952f3e5d63341eb87f8c55c82f2819a37e174494eb9", size = 48458, upload-time = "2026-03-03T19:01:27.146Z" } +sdist = { url = "https://files.pythonhosted.org/packages/70/e5/a3628da5d716d6bbc2c0a8d39b629dff81b33d5625c5b934e1456370064f/llama_index_instrumentation-0.4.1.tar.gz", hash = "sha256:a79d0dd2baba34f05ff4354d63a99b212322635b8afa6cc96ed00a7e11ebfdc3", size = 45788, upload-time = "2025-09-15T03:53:00.219Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/0f/5fe062395c8d444235fc228bb2577588be4e9db6fb5f4d1b662e9ac98aee/llama_index_instrumentation-0.4.3-py3-none-any.whl", hash = "sha256:4669d5f6da4b478784a196489d4b58072cb2e6928c3781d2bddd34f36da9cac1", size = 16446, upload-time = "2026-03-03T19:01:28.807Z" }, + { url = "https://files.pythonhosted.org/packages/3a/7a/c414f4dc9a7dd90d050c387489436bab2d678a566b704ede2f5b62f82ad7/llama_index_instrumentation-0.4.1-py3-none-any.whl", hash = "sha256:0d3ac926d0db3d39c0ec34ee72da5322d61e06b87fe956407e4a1e7a2708b936", size = 15063, upload-time = "2025-09-15T03:52:59.098Z" }, ] [[package]] @@ -1716,9 +1716,9 @@ dependencies = [ { name = "llama-index-core" }, { name = "openai" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/4a/5e/a7a47d46dc2eb30953d83654112c8af6f61821ca78ef3ea22e30729aac3a/llama_index_llms_openai-0.6.26.tar.gz", hash = "sha256:3474602ecbc30c88a8b585cfd5737891d45da78251a5e067c4dbc2d3cc3d08db", size = 27262, upload-time = "2026-03-05T02:53:50.581Z" } +sdist = { url = "https://files.pythonhosted.org/packages/20/59/4c414d79a21189d9db6de58ecbc297cd0f5ea121803b836bd134c67dd7a3/llama_index_llms_openai-0.5.4.tar.gz", hash = "sha256:9e36b6d2fc5f056b00ee655901b3bb7e7060b23f7b19439889fb78d696340f54", size = 24230, upload-time = "2025-08-16T22:41:17.408Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2e/8a/f46f59279c078b001374813f69987b43b7c3bd9df01981af545cf2d954d7/llama_index_llms_openai-0.6.26-py3-none-any.whl", hash = "sha256:2062ef505676d0a1c7c116c138c2f890aa7653619fc3ca697e47df7bd2ef8b3f", size = 28330, upload-time = "2026-03-05T02:53:40.421Z" }, + { url = "https://files.pythonhosted.org/packages/ad/62/aec65450b8d7ba723fa557884ac34d94b2b8f3876a54249c05d240a2be6c/llama_index_llms_openai-0.5.4-py3-none-any.whl", hash = "sha256:8d42fbfa56b5f281ad0dfcb2915916c188b5876625f9f8d27016b7dc4366cc24", size = 25357, upload-time = "2025-08-16T22:41:16.472Z" }, ] [[package]] @@ -1759,9 +1759,9 @@ dependencies = [ { name = "llama-index-core" }, { name = "pymilvus" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ef/79/72bf70fac3b77770a5c2b1b7d441aa1998bb522d50fe88b0f9c4071854e5/llama_index_vector_stores_milvus-0.9.6.tar.gz", hash = "sha256:6d38ac5939a570e0240687f54fbee4e1ff6c5faa2d28d25377a3f38d2ca07e2b", size = 15584, upload-time = "2026-01-13T11:46:41.394Z" } +sdist = { url = "https://files.pythonhosted.org/packages/92/50/428b4af2d65b3f0ec0b41638579a5d67c027d64f46c2e11769975737f0ef/llama_index_vector_stores_milvus-0.9.0.tar.gz", hash = "sha256:938f002aa0817c3afc85f233791fdeefd87093e806c5108411f07d8d616b3d30", size = 15284, upload-time = "2025-07-30T21:12:38.4Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/51/04/98359849c095b5a3eb06f0649865a30b5a733b9fdbfeac3c6951253f804c/llama_index_vector_stores_milvus-0.9.6-py3-none-any.whl", hash = "sha256:916cbd9b07035ec137905970ef6a49dd77d3ece6e0a79271db35705cca5f5f84", size = 15792, upload-time = "2026-01-13T11:46:39.708Z" }, + { url = "https://files.pythonhosted.org/packages/ac/fa/56b1d6626a4fcd968a940b13683b181cfd14bdb8b348772bedfa82b7e71d/llama_index_vector_stores_milvus-0.9.0-py3-none-any.whl", hash = "sha256:a08e20e72816c7b81cb82d27211e63ca175e4683b07e954adef1bae7a2c844f7", size = 15563, upload-time = "2025-07-30T21:12:37.465Z" }, ] [[package]] @@ -1772,9 +1772,9 @@ dependencies = [ { name = "llama-index-core" }, { name = "pinecone" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5c/a0/2e2e969a133894f10b3a55b5148feef0c546ca8047b461f51f79d115c5b9/llama_index_vector_stores_pinecone-0.7.1.tar.gz", hash = "sha256:0ab3cc44f309bca1d74e58f221dade672169da01561114b067f4734293bd0280", size = 7852, upload-time = "2025-09-08T20:28:54.11Z" } +sdist = { url = "https://files.pythonhosted.org/packages/13/31/9be27780523a4784bea5cf7910004b0e805b9fef09a4a5ed3af38757cb2b/llama_index_vector_stores_pinecone-0.7.0.tar.gz", hash = "sha256:72f4828115d5857249fc7d7a0753a6b1c2644c929687d86f5bed41274e5b7e76", size = 7852, upload-time = "2025-07-30T20:54:28.213Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/93/22/ae8c3073e4866a41eb53030db7cfecca1d84192c16a67d40a76f8d593e6d/llama_index_vector_stores_pinecone-0.7.1-py3-none-any.whl", hash = "sha256:861c4d01b3766cdca318f1285c03cd5e52dabf3d2f136cb38db421b16103129a", size = 8041, upload-time = "2025-09-08T20:28:53.406Z" }, + { url = "https://files.pythonhosted.org/packages/54/de/901d76d42474cce0aa8c054ee76e4dc9967d8df84907797ab99b3423d988/llama_index_vector_stores_pinecone-0.7.0-py3-none-any.whl", hash = "sha256:023ac4cde067f7154cc90534b72388c0b6905eaa41f30c7ef1446f67e3549b25", size = 8039, upload-time = "2025-07-30T20:54:27.487Z" }, ] [[package]] @@ -1788,9 +1788,9 @@ dependencies = [ { name = "psycopg2-binary" }, { name = "sqlalchemy", extra = ["asyncio"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ef/78/04ff0cb9e14b8c1c3cb8716fab35c95ec2a4b551d769c65031c5c8624337/llama_index_vector_stores_postgres-0.7.3.tar.gz", hash = "sha256:7b5c62e462d681d7b8d8668b93e5b0023bfd3aaafcf76e2b4bfcf885dc3b49c6", size = 11950, upload-time = "2026-01-22T15:14:13.007Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a3/2e/ddd8accef30a39f8ffb7bae9f5a5c91ba5f1f45ede1d55c73ba78e61e23a/llama_index_vector_stores_postgres-0.6.3.tar.gz", hash = "sha256:b15d2e7c3bf2a0b18754934a84cf5324403b9401e2b31bcdb00418ed2d03770c", size = 11316, upload-time = "2025-08-12T12:36:35.281Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/de/140f678d930ea869fc989adaa0140b9267c5ee0c7b971d061112d0d5b75a/llama_index_vector_stores_postgres-0.7.3-py3-none-any.whl", hash = "sha256:65b70266cc6041ab5011d64d1183d8783112ba5b38eb32ca21e00ea5b96aa058", size = 11635, upload-time = "2026-01-22T15:14:13.722Z" }, + { url = "https://files.pythonhosted.org/packages/a0/cd/0aa5189615f33e805d8bc306d8a0f646892b55245e88fe6fb8df61059f66/llama_index_vector_stores_postgres-0.6.3-py3-none-any.whl", hash = "sha256:6086b7d450bf1204eb5523cd924c8395fc9cbd212f337d1caef18ce41cefc198", size = 11042, upload-time = "2025-08-12T12:36:33.019Z" }, ] [[package]] @@ -1802,9 +1802,9 @@ dependencies = [ { name = "llama-index-core" }, { name = "qdrant-client" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c2/5b/4957f112b2405e960f425f92e337f1ad22315c0cda50c5117575b0e08d10/llama_index_vector_stores_qdrant-0.9.2.tar.gz", hash = "sha256:c7f8138a0f4f79bed79a32b7c875d2766849b02a779617e55b8d1feb1a9e605a", size = 14698, upload-time = "2026-03-06T11:20:52.802Z" } +sdist = { url = "https://files.pythonhosted.org/packages/88/84/441a41a34dea214c89e3cabc177f07615ba4b434d46a70ba810c8c3c5bcd/llama_index_vector_stores_qdrant-0.7.1.tar.gz", hash = "sha256:d51a561dc5aad270c4bbed72370cea9002e4b72d0038ec5b465f6bcdb67b1213", size = 13013, upload-time = "2025-07-31T18:18:55.931Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/42/d7/5e137cee4acf04ae49e6be865cf732174353439377ba65999e43b91ad978/llama_index_vector_stores_qdrant-0.9.2-py3-none-any.whl", hash = "sha256:631139e3dc84a831e949758f99b0858a53c835c5a5878a68d676b3eb579c824f", size = 14953, upload-time = "2026-03-06T11:20:55.127Z" }, + { url = "https://files.pythonhosted.org/packages/44/b3/623615e44ff4c19ca593a620eef670cad9bed78fe6e4d364753415b71aa0/llama_index_vector_stores_qdrant-0.7.1-py3-none-any.whl", hash = "sha256:f48eeb9228f7dc7e4d41a55d76dcf6d93b8bfbea1c943c09140a09252018f577", size = 13204, upload-time = "2025-07-31T18:18:54.364Z" }, ] [[package]] @@ -1815,9 +1815,9 @@ dependencies = [ { name = "llama-index-core" }, { name = "weaviate-client" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3d/9c/fd6eae66b87e736807776c6824bebebb44098630af2dac8cd5cdf5938d0d/llama_index_vector_stores_weaviate-1.5.0.tar.gz", hash = "sha256:99ba6dbdcf92e9ec56f464de2d71ed3c0503e3fc5b71f9d74dbc32da981b0cf5", size = 9679, upload-time = "2026-02-20T23:20:19.83Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/ab/6da9ec13e8c1a6dc2a00eb17074388a4720d66252f9b784b725f2282ca5e/llama_index_vector_stores_weaviate-1.4.0.tar.gz", hash = "sha256:c5374406b90b4f27455c623a84f56c6df3d71408ffac8984cab39edc8f6a748e", size = 8535, upload-time = "2025-07-30T20:57:22.275Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/22/dd/09efa4551016a9e95d53dbb3cd7ae7a9bf3fd07be1a3e1ac3eada7b4c5e5/llama_index_vector_stores_weaviate-1.5.0-py3-none-any.whl", hash = "sha256:8e24d920a0cc241dcf0cdbfe29541aeca6a9f8cf29606ed90325978f972636a5", size = 10439, upload-time = "2026-02-20T23:20:19.057Z" }, + { url = "https://files.pythonhosted.org/packages/e6/70/aef7524a6ed14f79dca84685559045b303cb43f11a38b9f790e6274115e2/llama_index_vector_stores_weaviate-1.4.0-py3-none-any.whl", hash = "sha256:5e3ac7e499e20988f8165c7dfa223b64714572164114e5818c3d51ff273a0c53", size = 9326, upload-time = "2025-07-30T20:57:21.207Z" }, ] [[package]] @@ -1829,9 +1829,9 @@ dependencies = [ { name = "pydantic" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bf/0e/f928ab689d387bce277efc43bb4e6b0840af914315728a62203452f3b424/llama_index_workflows-2.16.0.tar.gz", hash = "sha256:be4df184498c649dc1558a074794402a3bb0eb59a83e28ff5cfe9885d9787187", size = 83347, upload-time = "2026-03-11T16:40:34.652Z" } +sdist = { url = "https://files.pythonhosted.org/packages/00/dc/54fd5dec0ad3c65f3e8a520db7a3024141b71cd41660d0baca3cd6b18707/llama_index_workflows-1.3.0.tar.gz", hash = "sha256:9c1688e237efad384f16485af71c6f9456a2eb6d85bf61ff49e5717f10ff286d", size = 1040839, upload-time = "2025-08-07T09:11:00.307Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/98/a5/823834f8ee527fde3976c66bddffb9d757bd3a666303658682fe0a3ce23a/llama_index_workflows-2.16.0-py3-none-any.whl", hash = "sha256:564f19987f6cbd6bca69581cc934a721a402c1cc773c58faea4fb0041350f77c", size = 106493, upload-time = "2026-03-11T16:40:32.062Z" }, + { url = "https://files.pythonhosted.org/packages/3b/cf/0c50bc6e5c4fb7913f5682a0d26a60b976533dd8a87a5dbd84f617c6f1ab/llama_index_workflows-1.3.0-py3-none-any.whl", hash = "sha256:328cc25d92b014ef527f105a2f2088c0924fff0494e53d93decb951f14fbfe47", size = 42527, upload-time = "2025-08-07T09:10:59.155Z" }, ] [[package]] @@ -1841,9 +1841,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "llama-cloud-services" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/08/f6/93b5d123c480bc8c93e6dc3ea930f4f8df8da27f829bb011100ba3ce23dc/llama_parse-0.6.54.tar.gz", hash = "sha256:c707b31152155c9bae84e316fab790bbc8c85f4d8825ce5ee386ebeb7db258f1", size = 3577, upload-time = "2025-08-01T20:09:23.762Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3b/02/63839a55f6f207110400c4f394152fd0290e9f8e450226b02a87cfdbd835/llama_parse-0.5.19.tar.gz", hash = "sha256:db69da70e199a2664705eb983a70fa92b7cee19dd6cff175af7692a0b8a4dd53", size = 16100, upload-time = "2024-12-27T19:08:43.051Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/05/50/c5ccd2a50daa0a10c7f3f7d4e6992392454198cd8a7d99fcb96cb60d0686/llama_parse-0.6.54-py3-none-any.whl", hash = "sha256:c66c8d51cf6f29a44eaa8595a595de5d2598afc86e5a33a4cebe5fe228036920", size = 4879, upload-time = "2025-08-01T20:09:22.651Z" }, + { url = "https://files.pythonhosted.org/packages/38/b7/3ff106e8199992bb62e72f195c8f6f2f2fe4a185f5f92746f0ed9db5c5d2/llama_parse-0.5.19-py3-none-any.whl", hash = "sha256:715cc895d183531b4299359d4f4004089b2e522f5f137f316084e7aa04035b62", size = 15421, upload-time = "2024-12-27T19:08:41.974Z" }, ] [[package]] @@ -1854,9 +1854,9 @@ dependencies = [ { name = "requests" }, { name = "tenacity" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/38/44/18d4158618ebbd76ceb8e43b8deb77f4983e6f1ccff2dffd73d6f3fb1628/llmwhisperer_client-2.6.2.tar.gz", hash = "sha256:ce846af62e7e7337dfcfe2960ec72de2989457b717ab7b9dd4110ee82c002ed0", size = 3268197, upload-time = "2026-02-23T10:52:17.634Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ac/2d/3657ff470c491c3778ae519e51756b2aee8b8ba25bf4d0fd0a77662146f5/llmwhisperer_client-2.5.0.tar.gz", hash = "sha256:8d08df695ca74513ca904ddb42620ecf70a1eb8b432872ba15fbf238529245ac", size = 3261186, upload-time = "2025-11-04T12:50:58.969Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/27/eb/0f9edd21302eddc6020a1b43a78e27f361e8b6b8af7611134a58487f7d8a/llmwhisperer_client-2.6.2-py3-none-any.whl", hash = "sha256:7226344506bc85a663e4d4f8feb763f853ec9bcb6cea9bd9cf170ba135c50cdd", size = 10857, upload-time = "2026-02-23T10:52:16.325Z" }, + { url = "https://files.pythonhosted.org/packages/0b/dc/f99fb0903d244066ef1207b7b465e3acfd68df575b129e45e397f05bc1f4/llmwhisperer_client-2.5.0-py3-none-any.whl", hash = "sha256:b637aa914875a25b76de60cb0e92be9237d554967380d2aeeab70ecadcff9bab", size = 9733, upload-time = "2025-11-04T12:50:57.671Z" }, ] [[package]] @@ -2106,6 +2106,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/a4/611155711f8af347875c15b8b83f5fd9e978bd4de45f90085b9a583b684d/Office365_REST_Python_Client-2.6.2-py3-none-any.whl", hash = "sha256:06fc6829c39b503897caa9d881db419d7f97a8e4f1c95c4c2d12db36ea6c955d", size = 1337139, upload-time = "2025-05-11T10:24:18.926Z" }, ] +[[package]] +name = "office365-rest-python-client" +version = "2.6.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "msal" }, + { name = "pytz" }, + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bc/04/6dce2d581c54a8e55a3b128cf79a93821a68a62bb9a956e65476c5bb247e/office365_rest_python_client-2.6.2.tar.gz", hash = "sha256:ce27f5a1c0cc3ff97041ccd9b386145692be4c64739f243f7d6ac3edbe0a3c46", size = 659460, upload-time = "2025-05-11T10:24:21.895Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/a4/611155711f8af347875c15b8b83f5fd9e978bd4de45f90085b9a583b684d/Office365_REST_Python_Client-2.6.2-py3-none-any.whl", hash = "sha256:06fc6829c39b503897caa9d881db419d7f97a8e4f1c95c4c2d12db36ea6c955d", size = 1337139, upload-time = "2025-05-11T10:24:18.926Z" }, +] + [[package]] name = "openai" version = "2.26.0" @@ -2120,9 +2135,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d7/91/2a06c4e9597c338cac1e5e5a8dd6f29e1836fc229c4c523529dca387fda8/openai-2.26.0.tar.gz", hash = "sha256:b41f37c140ae0034a6e92b0c509376d907f3a66109935fba2c1b471a7c05a8fb", size = 666702, upload-time = "2026-03-05T23:17:35.874Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c6/a1/a303104dc55fc546a3f6914c842d3da471c64eec92043aef8f652eb6c524/openai-1.109.1.tar.gz", hash = "sha256:d173ed8dbca665892a6db099b4a2dfac624f94d20a93f46eb0b56aae940ed869", size = 564133, upload-time = "2025-09-24T13:00:53.075Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/2e/3f73e8ca53718952222cacd0cf7eecc9db439d020f0c1fe7ae717e4e199a/openai-2.26.0-py3-none-any.whl", hash = "sha256:6151bf8f83802f036117f06cc8a57b3a4da60da9926826cc96747888b57f394f", size = 1136409, upload-time = "2026-03-05T23:17:34.072Z" }, + { url = "https://files.pythonhosted.org/packages/1d/2a/7dd3d207ec669cacc1f186fd856a0f61dbc255d24f6fdc1a6715d6051b0f/openai-1.109.1-py3-none-any.whl", hash = "sha256:6bcaf57086cf59159b8e27447e4e7dd019db5d29a438072fbd49c290c7e65315", size = 948627, upload-time = "2025-09-24T13:00:50.754Z" }, ] [[package]] @@ -2850,9 +2865,9 @@ dependencies = [ { name = "pydantic" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ca/7d/3cd10e26ae97b35cf856ca1dc67576e42414ae39502c51165bb36bb1dff8/qdrant_client-1.16.2.tar.gz", hash = "sha256:ca4ef5f9be7b5eadeec89a085d96d5c723585a391eb8b2be8192919ab63185f0", size = 331112, upload-time = "2025-12-12T10:58:30.866Z" } +sdist = { url = "https://files.pythonhosted.org/packages/79/8b/76c7d325e11d97cb8eb5e261c3759e9ed6664735afbf32fdded5b580690c/qdrant_client-1.15.1.tar.gz", hash = "sha256:631f1f3caebfad0fd0c1fba98f41be81d9962b7bf3ca653bed3b727c0e0cbe0e", size = 295297, upload-time = "2025-07-31T19:35:19.627Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/08/13/8ce16f808297e16968269de44a14f4fef19b64d9766be1d6ba5ba78b579d/qdrant_client-1.16.2-py3-none-any.whl", hash = "sha256:442c7ef32ae0f005e88b5d3c0783c63d4912b97ae756eb5e052523be682f17d3", size = 377186, upload-time = "2025-12-12T10:58:29.282Z" }, + { url = "https://files.pythonhosted.org/packages/ef/33/d8df6a2b214ffbe4138db9a1efe3248f67dc3c671f82308bea1582ecbbb7/qdrant_client-1.15.1-py3-none-any.whl", hash = "sha256:2b975099b378382f6ca1cfb43f0d59e541be6e16a5892f282a4b8de7eff5cb63", size = 337331, upload-time = "2025-07-31T19:35:17.539Z" }, ] [[package]] diff --git a/workers/.env.test b/workers/.env.test new file mode 100644 index 0000000000..8cda6b9dc0 --- /dev/null +++ b/workers/.env.test @@ -0,0 +1,4 @@ +# Test environment variables for workers tests. +# Loaded by tests/conftest.py before any shared package imports. +INTERNAL_API_BASE_URL=http://localhost:8000 +INTERNAL_SERVICE_API_KEY=test-key diff --git a/workers/callback/tasks.py b/workers/callback/tasks.py index d7252b9d29..42599f0659 100644 --- a/workers/callback/tasks.py +++ b/workers/callback/tasks.py @@ -1505,11 +1505,11 @@ def _process_batch_callback_core( # Re-raise for Celery retry mechanism raise finally: - if context.api_client is not None: - try: + try: + if context.api_client: context.api_client.close() - except Exception as e: - logger.warning("api_client.close() failed during cleanup: %s", e) + except Exception as e: + logger.warning("api_client.close() failed during callback cleanup: %s", e) @app.task( diff --git a/workers/executor/README.md b/workers/executor/README.md new file mode 100644 index 0000000000..562d2540f1 --- /dev/null +++ b/workers/executor/README.md @@ -0,0 +1,59 @@ +# Executor Worker + +Celery worker that handles LLM extraction, indexing, and prompt execution for the Unstract platform. + +## How It Works + +```text +Browser → Django Backend → RabbitMQ → Executor Worker → Callback → WebSocket → Browser +``` + +1. User clicks "Run" in Prompt Studio IDE → Backend dispatches task to `celery_executor_legacy` queue +2. Executor worker picks up task, runs LLM extraction +3. Result triggers callback on `ide_callback` queue +4. IDE callback worker persists results via internal API and pushes via Socket.IO +5. Browser receives result in real-time + +## Services Involved + +| Service | Purpose | +|---------|---------| +| `worker-executor-v2` | Runs LLM extraction, indexing, prompts | +| `worker-ide-callback` | Post-execution callbacks via internal API + Socket.IO events | +| `backend` | Django REST API + Socket.IO | +| `platform-service` | Adapter credential management | +| `prompt-service` | Prompt template service | + +## Configuration + +The executor worker starts automatically with `./run-platform.sh` — no extra configuration needed. + +Key environment variables (in `docker/sample.env` and `workers/sample.env`): + +| Variable | Default | Description | +|----------|---------|-------------| +| `WORKER_EXECUTOR_CONCURRENCY` | `2` | Number of concurrent executor processes | +| `WORKER_EXECUTOR_POOL` | `prefork` | Celery pool type | +| `EXECUTOR_TASK_TIME_LIMIT` | `3600` | Hard timeout per task (seconds) | +| `EXECUTOR_TASK_SOFT_TIME_LIMIT` | `3300` | Soft timeout per task (seconds) | +| `EXECUTOR_RESULT_TIMEOUT` | `3600` | How long callers wait for results | +| `EXECUTOR_AUTOSCALE` | `2,1` | Max,min worker autoscale | + +## Queue + +Listens on: `celery_executor_legacy` + +Configurable via `CELERY_QUEUES_EXECUTOR` environment variable. + +## Docker + +Defined in `docker/docker-compose.yaml` as `worker-executor-v2`. Uses the unified worker image (`unstract/worker-unified`) with `executor` command. + +## Local Development + +```bash +cd workers +cp sample.env .env +# Edit .env: change Docker hostnames to localhost +./run-worker.sh executor +``` diff --git a/workers/executor/__init__.py b/workers/executor/__init__.py new file mode 100644 index 0000000000..7982e4d411 --- /dev/null +++ b/workers/executor/__init__.py @@ -0,0 +1,12 @@ +"""Executor Worker + +Celery worker for running extraction executors. +Dispatches ExecutionContext to registered executors and returns +ExecutionResult via the Celery result backend. +""" + +from .worker import app as celery_app + +__all__ = [ + "celery_app", +] diff --git a/workers/executor/executor_tool_shim.py b/workers/executor/executor_tool_shim.py new file mode 100644 index 0000000000..63f48dd253 --- /dev/null +++ b/workers/executor/executor_tool_shim.py @@ -0,0 +1,182 @@ +"""ExecutorToolShim — Lightweight BaseTool substitute for executor workers. + +Adapters (PlatformHelper, LLM, Embedding, VectorDB, X2Text) all require +a ``tool: BaseTool`` parameter that provides ``get_env_or_die()`` and +``stream_log()``. The executor worker has no ``BaseTool`` instance, so +this shim provides just those two methods. + +Precedent: ``prompt-service/.../helpers/prompt_ide_base_tool.py`` +(``PromptServiceBaseTool``). +""" + +import logging +import os +from typing import Any + +from unstract.core.pubsub_helper import LogPublisher +from unstract.sdk1.constants import LogLevel, ToolEnv +from unstract.sdk1.exceptions import SdkError +from unstract.sdk1.tool.stream import StreamMixin + +logger = logging.getLogger(__name__) + +# Map SDK log levels to the string levels used by LogPublisher. +_SDK_TO_WF_LEVEL: dict[LogLevel, str] = { + LogLevel.DEBUG: "INFO", # DEBUG not surfaced to frontend + LogLevel.INFO: "INFO", + LogLevel.WARN: "WARN", + LogLevel.ERROR: "ERROR", + LogLevel.FATAL: "ERROR", +} + +# Mapping from SDK LogLevel enum to Python logging levels. +_LEVEL_MAP = { + LogLevel.DEBUG: logging.DEBUG, + LogLevel.INFO: logging.INFO, + LogLevel.WARN: logging.WARNING, + LogLevel.ERROR: logging.ERROR, + LogLevel.FATAL: logging.CRITICAL, +} + + +class ExecutorToolShim(StreamMixin): + """Minimal BaseTool substitute for use inside executor workers. + + Provides the two methods that adapters actually call: + + - ``get_env_or_die(env_key)`` — reads env vars, with special + handling for ``PLATFORM_SERVICE_API_KEY`` (multitenancy) + - ``stream_log(log, level)`` — routes to Python logging instead + of the Unstract stdout JSON protocol used by tools + + Usage:: + + shim = ExecutorToolShim(platform_api_key="sk-...") + adapter = SomeAdapter(tool=shim) # adapter calls shim.get_env_or_die() + """ + + def __init__( + self, + platform_api_key: str = "", + log_events_id: str = "", + component: dict[str, str] | None = None, + ) -> None: + """Initialize the shim. + + Args: + platform_api_key: The platform service API key for this + execution. Returned by ``get_env_or_die()`` when the + caller asks for ``PLATFORM_SERVICE_API_KEY``. + log_events_id: Socket.IO channel ID for streaming progress + logs. Empty string disables publishing. + component: Structured identifier dict for log correlation + (``tool_id``, ``run_id``, ``doc_name``, optionally + ``prompt_key``). + """ + self.platform_api_key = platform_api_key + self.log_events_id = log_events_id + self.component = component or {} + # Initialize StreamMixin. EXECUTION_BY_TOOL is not set in + # the worker environment, so _exec_by_tool will be False. + super().__init__(log_level=LogLevel.INFO) + + def get_env_or_die(self, env_key: str) -> str: + """Return environment variable value. + + Special-cases ``PLATFORM_SERVICE_API_KEY`` to return the key + passed at construction time (supports multitenancy — each + execution may use a different org's API key). + + Args: + env_key: Environment variable name. + + Returns: + The value of the environment variable. + + Raises: + SdkError: If the variable is missing or empty. + """ + if env_key == ToolEnv.PLATFORM_API_KEY: + if not self.platform_api_key: + raise SdkError(f"Env variable '{env_key}' is required") + return self.platform_api_key + + env_value = os.environ.get(env_key) + if env_value is None or env_value == "": + raise SdkError(f"Env variable '{env_key}' is required") + return env_value + + def stream_log( + self, + log: str, + level: LogLevel = LogLevel.INFO, + stage: str = "TOOL_RUN", + **kwargs: dict[str, Any], + ) -> None: + """Route log messages to Python logging and publish progress. + + In the executor worker context, logs go through the standard + Python logging framework (captured by Celery) rather than the + Unstract stdout JSON protocol used by tools. + + Progress messages are published via ``LogPublisher.publish()`` + to the Redis broker (shared with worker-logging). + + Args: + log: The log message. + level: SDK log level. + stage: Ignored (only meaningful for stdout protocol). + **kwargs: Ignored (only meaningful for stdout protocol). + """ + py_level = _LEVEL_MAP.get(level, logging.INFO) + logger.log(py_level, log) + + # Respect log level threshold for frontend publishing (matches + # StreamMixin.stream_log behaviour). Python logging above still + # captures everything for debugging. + _levels = [ + LogLevel.DEBUG, + LogLevel.INFO, + LogLevel.WARN, + LogLevel.ERROR, + LogLevel.FATAL, + ] + if _levels.index(level) < _levels.index(self.log_level): + return + + # Publish progress to frontend via the log consumer queue. + if self.log_events_id: + try: + wf_level = _SDK_TO_WF_LEVEL.get(level, "INFO") + payload = LogPublisher.log_progress( + component=self.component, + level=wf_level, + state=stage, + message=log, + ) + LogPublisher.publish( + channel_id=self.log_events_id, + payload=payload, + ) + except Exception: + logger.debug( + "Failed to publish progress log (non-fatal)", + exc_info=True, + ) + + def stream_error_and_exit(self, message: str, err: Exception | None = None) -> None: + """Log error and raise SdkError. + + Unlike the base StreamMixin which may call ``sys.exit(1)`` + when running as a tool, the executor worker always raises + an exception so the Celery task can handle it gracefully. + + Args: + message: Error description. + err: Original exception, if any. + + Raises: + SdkError: Always. + """ + logger.error(message) + raise SdkError(message, actual_err=err) diff --git a/workers/executor/executors/__init__.py b/workers/executor/executors/__init__.py new file mode 100644 index 0000000000..cb2b54c980 --- /dev/null +++ b/workers/executor/executors/__init__.py @@ -0,0 +1,16 @@ +"""Executor implementations package. + +Importing this module triggers ``@ExecutorRegistry.register`` for all +bundled executors and discovers cloud executors via entry points. +""" + +from executor.executors.legacy_executor import LegacyExecutor +from executor.executors.plugins.loader import ExecutorPluginLoader + +# Discover and register cloud executors installed via entry points. +# Each cloud executor class is decorated with @ExecutorRegistry.register, +# so importing it (via ep.load()) is enough to register it. +# If no cloud plugins are installed this returns an empty list. +_cloud_executors = ExecutorPluginLoader.discover_executors() + +__all__ = ["LegacyExecutor"] diff --git a/workers/executor/executors/answer_prompt.py b/workers/executor/executors/answer_prompt.py new file mode 100644 index 0000000000..89936fe598 --- /dev/null +++ b/workers/executor/executors/answer_prompt.py @@ -0,0 +1,364 @@ +"""Answer prompt service — prompt construction and LLM execution. + +Ported from prompt-service/.../services/answer_prompt.py. +Flask dependencies (app.logger, PluginManager, APIError) replaced with +standard logging and executor exceptions. + +Highlight/word-confidence support is available via the ``process_text`` +callback parameter — callers pass the highlight-data plugin's ``run`` +method when the plugin is installed. Challenge and evaluation plugins +are integrated at the caller level (LegacyExecutor). +""" + +import ipaddress +import logging +import os +import socket +from typing import Any +from urllib.parse import urlparse + +from executor.executors.constants import PromptServiceConstants as PSKeys +from executor.executors.exceptions import LegacyExecutorError, RateLimitError + +logger = logging.getLogger(__name__) + + +def _resolve_host_addresses(host: str) -> set[str]: + """Resolve a hostname or IP string to a set of IP address strings.""" + try: + ipaddress.ip_address(host) + return {host} + except ValueError: + pass + try: + return { + sockaddr[0] + for _family, _type, _proto, _canonname, sockaddr in socket.getaddrinfo( + host, None, type=socket.SOCK_STREAM + ) + } + except Exception: + return set() + + +def _is_safe_public_url(url: str) -> bool: + """Validate webhook URL for SSRF protection. + + Only allows HTTPS and blocks private/loopback/internal addresses. + """ + try: + p = urlparse(url) + if p.scheme not in ("https",): + return False + host = p.hostname or "" + if host in ("localhost",): + return False + + addrs = _resolve_host_addresses(host) + if not addrs: + return False + + for addr in addrs: + try: + ip = ipaddress.ip_address(addr) + except ValueError: + return False + if ( + ip.is_private + or ip.is_loopback + or ip.is_link_local + or ip.is_reserved + or ip.is_multicast + ): + return False + return True + except Exception: + return False + + +class AnswerPromptService: + @staticmethod + def extract_variable( + structured_output: dict[str, Any], + variable_names: list[Any], + output: dict[str, Any], + promptx: str, + ) -> str: + """Replace %variable_name% references in the prompt text.""" + for variable_name in variable_names: + if promptx.find(f"%{variable_name}%") >= 0: + if variable_name in structured_output: + promptx = promptx.replace( + f"%{variable_name}%", + str(structured_output[variable_name]), + ) + else: + raise ValueError( + f"Variable {variable_name} not found in structured output" + ) + + if promptx != output[PSKeys.PROMPT]: + logger.debug( + "Prompt modified by variable replacement for: %s", + output.get(PSKeys.NAME, ""), + ) + return promptx + + @staticmethod + def construct_and_run_prompt( + tool_settings: dict[str, Any], + output: dict[str, Any], + llm: Any, + context: str, + prompt: str, + metadata: dict[str, Any], + file_path: str = "", + execution_source: str | None = "ide", + process_text: Any = None, + ) -> str: + """Construct the full prompt and run LLM completion. + + Args: + tool_settings: Global tool settings (preamble, postamble, etc.) + output: The prompt definition dict. + llm: LLM adapter instance. + context: Retrieved context string. + prompt: Key into ``output`` for the prompt text (usually "promptx"). + metadata: Metadata dict (updated in place with highlight info). + file_path: Path to the extracted text file. + execution_source: "ide" or "tool". + process_text: Optional callback for text processing during + completion (e.g. highlight-data plugin's ``run`` method). + + Returns: + The LLM answer string. + """ + platform_postamble = tool_settings.get(PSKeys.PLATFORM_POSTAMBLE, "") + word_confidence_postamble = tool_settings.get( + PSKeys.WORD_CONFIDENCE_POSTAMBLE, "" + ) + summarize_as_source = tool_settings.get(PSKeys.SUMMARIZE_AS_SOURCE) + enable_highlight = tool_settings.get(PSKeys.ENABLE_HIGHLIGHT, False) + enable_word_confidence = tool_settings.get(PSKeys.ENABLE_WORD_CONFIDENCE, False) + if not enable_highlight: + enable_word_confidence = False + prompt_type = output.get(PSKeys.TYPE, PSKeys.TEXT) + if not enable_highlight or summarize_as_source: + platform_postamble = "" + if not enable_word_confidence or summarize_as_source: + word_confidence_postamble = "" + + prompt = AnswerPromptService.construct_prompt( + preamble=tool_settings.get(PSKeys.PREAMBLE, ""), + prompt=output[prompt], + postamble=tool_settings.get(PSKeys.POSTAMBLE, ""), + grammar_list=tool_settings.get(PSKeys.GRAMMAR, []), + context=context, + platform_postamble=platform_postamble, + word_confidence_postamble=word_confidence_postamble, + prompt_type=prompt_type, + ) + output[PSKeys.COMBINED_PROMPT] = prompt + return AnswerPromptService.run_completion( + llm=llm, + prompt=prompt, + metadata=metadata, + prompt_key=output[PSKeys.NAME], + prompt_type=prompt_type, + enable_highlight=enable_highlight, + enable_word_confidence=enable_word_confidence, + file_path=file_path, + execution_source=execution_source, + process_text=process_text, + ) + + @staticmethod + def _build_grammar_notes(grammar_list: list[dict[str, Any]]) -> str: + """Build grammar synonym notes for prompt injection.""" + if not grammar_list: + return "" + notes = "\n" + for grammar in grammar_list: + word = grammar.get(PSKeys.WORD, "") + synonyms = grammar.get(PSKeys.SYNONYMS, []) if word else [] + if synonyms and word: + notes += ( + f"\nNote: You can consider that the word '{word}' " + f"is the same as {', '.join(synonyms)} " + f"in both the question and the context." + ) + return notes + + @staticmethod + def construct_prompt( + preamble: str, + prompt: str, + postamble: str, + grammar_list: list[dict[str, Any]], + context: str, + platform_postamble: str, + word_confidence_postamble: str, + prompt_type: str = "text", + ) -> str: + """Build the full prompt string with preamble, grammar, postamble, context.""" + prompt = f"{preamble}\n\nQuestion or Instruction: {prompt}" + prompt += AnswerPromptService._build_grammar_notes(grammar_list) + if prompt_type == PSKeys.JSON: + json_postamble = os.environ.get( + PSKeys.JSON_POSTAMBLE, PSKeys.DEFAULT_JSON_POSTAMBLE + ) + postamble += f"\n{json_postamble}" + if platform_postamble: + platform_postamble += "\n\n" + if word_confidence_postamble: + platform_postamble += f"{word_confidence_postamble}\n\n" + prompt += ( + f"\n\n{postamble}\n\nContext:\n---------------\n{context}\n" + f"-----------------\n\n{platform_postamble}Answer:" + ) + return prompt + + @staticmethod + def run_completion( + llm: Any, + prompt: str, + metadata: dict[str, str] | None = None, + prompt_key: str | None = None, + prompt_type: str | None = "text", + enable_highlight: bool = False, + enable_word_confidence: bool = False, + file_path: str = "", + execution_source: str | None = None, + process_text: Any = None, + ) -> str: + """Run LLM completion and extract the answer. + + Args: + process_text: Optional callback for text processing during + completion (e.g. highlight-data plugin's ``run`` method). + When provided, the SDK passes LLM response text through + this callback, enabling source attribution. + """ + try: + from unstract.sdk1.exceptions import RateLimitError as _sdk_rate_limit_error + from unstract.sdk1.exceptions import SdkError as _sdk_error + except ImportError: + _sdk_rate_limit_error = Exception + _sdk_error = Exception + + try: + completion = llm.complete( + prompt=prompt, + process_text=process_text, + extract_json=prompt_type.lower() != PSKeys.TEXT, + ) + answer: str = completion[PSKeys.RESPONSE].text + highlight_data = completion.get(PSKeys.HIGHLIGHT_DATA, []) + confidence_data = completion.get(PSKeys.CONFIDENCE_DATA) + word_confidence_data = completion.get(PSKeys.WORD_CONFIDENCE_DATA) + line_numbers = completion.get(PSKeys.LINE_NUMBERS, []) + whisper_hash = completion.get(PSKeys.WHISPER_HASH, "") + if metadata is not None and prompt_key: + metadata.setdefault(PSKeys.HIGHLIGHT_DATA, {})[prompt_key] = ( + highlight_data + ) + metadata.setdefault(PSKeys.LINE_NUMBERS, {})[prompt_key] = line_numbers + metadata[PSKeys.WHISPER_HASH] = whisper_hash + if confidence_data: + metadata.setdefault(PSKeys.CONFIDENCE_DATA, {})[prompt_key] = ( + confidence_data + ) + if enable_word_confidence and word_confidence_data: + metadata.setdefault(PSKeys.WORD_CONFIDENCE_DATA, {})[prompt_key] = ( + word_confidence_data + ) + return answer + except _sdk_rate_limit_error as e: + raise RateLimitError(f"Rate limit error. {str(e)}") from e + except _sdk_error as e: + logger.error("Error fetching response for prompt: %s", e) + status_code = getattr(e, "status_code", None) or 500 + raise LegacyExecutorError(message=str(e), code=status_code) from e + + @staticmethod + def _run_webhook_postprocess( + parsed_data: Any, + webhook_url: str | None, + highlight_data: Any, + ) -> tuple[Any, Any]: + """Run webhook-based postprocessing; return (processed_data, updated_highlight).""" + from executor.executors.postprocessor import postprocess_data + + if not webhook_url: + logger.warning("Postprocessing webhook enabled but URL missing; skipping.") + return parsed_data, None + if not _is_safe_public_url(webhook_url): + logger.warning("Postprocessing webhook URL is not allowed; skipping.") + return parsed_data, None + try: + return postprocess_data( + parsed_data, + webhook_enabled=True, + webhook_url=webhook_url, + highlight_data=highlight_data, + timeout=60, + ) + except Exception as e: + logger.warning( + "Postprocessing webhook failed: %s. Using unprocessed data.", e + ) + return parsed_data, None + + @staticmethod + def handle_json( + answer: str, + structured_output: dict[str, Any], + output: dict[str, Any], + llm: Any, + enable_highlight: bool = False, + enable_word_confidence: bool = False, + execution_source: str = "ide", + metadata: dict[str, Any] | None = None, + file_path: str = "", + log_events_id: str = "", + tool_id: str = "", + doc_name: str = "", + ) -> None: + """Handle JSON responses from the LLM.""" + from executor.executors.json_repair_helper import repair_json_with_best_structure + + prompt_key = output[PSKeys.NAME] + if answer.lower() == "na": + structured_output[prompt_key] = None + return + + parsed_data = repair_json_with_best_structure(answer) + if isinstance(parsed_data, str): + logger.error("Error parsing response to JSON") + structured_output[prompt_key] = {} + return + + highlight_data = None + if enable_highlight and metadata and PSKeys.HIGHLIGHT_DATA in metadata: + highlight_data = metadata[PSKeys.HIGHLIGHT_DATA].get(prompt_key) + + processed_data = parsed_data + updated_highlight_data = None + + webhook_enabled = output.get(PSKeys.ENABLE_POSTPROCESSING_WEBHOOK, False) + if webhook_enabled: + webhook_url = output.get(PSKeys.POSTPROCESSING_WEBHOOK_URL) + processed_data, updated_highlight_data = ( + AnswerPromptService._run_webhook_postprocess( + parsed_data=parsed_data, + webhook_url=webhook_url, + highlight_data=highlight_data, + ) + ) + + structured_output[prompt_key] = processed_data + + if enable_highlight and metadata and updated_highlight_data is not None: + metadata.setdefault(PSKeys.HIGHLIGHT_DATA, {})[prompt_key] = ( + updated_highlight_data + ) diff --git a/workers/executor/executors/constants.py b/workers/executor/executors/constants.py new file mode 100644 index 0000000000..9eddab8423 --- /dev/null +++ b/workers/executor/executors/constants.py @@ -0,0 +1,203 @@ +from enum import Enum + + +class PromptServiceConstants: + """Constants used in the prompt service.""" + + WORD = "word" + SYNONYMS = "synonyms" + OUTPUTS = "outputs" + TOOL_ID = "tool_id" + RUN_ID = "run_id" + EXECUTION_ID = "execution_id" + FILE_NAME = "file_name" + FILE_HASH = "file_hash" + NAME = "name" + ACTIVE = "active" + PROMPT = "prompt" + CHUNK_SIZE = "chunk-size" + PROMPTX = "promptx" + VECTOR_DB = "vector-db" + EMBEDDING = "embedding" + X2TEXT_ADAPTER = "x2text_adapter" + CHUNK_OVERLAP = "chunk-overlap" + LLM = "llm" + IS_ASSERT = "is_assert" + ASSERTION_FAILURE_PROMPT = "assertion_failure_prompt" + RETRIEVAL_STRATEGY = "retrieval-strategy" + TYPE = "type" + NUMBER = "number" + EMAIL = "email" + DATE = "date" + BOOLEAN = "boolean" + JSON = "json" + PREAMBLE = "preamble" + SIMILARITY_TOP_K = "similarity-top-k" + PROMPT_TOKENS = "prompt_tokens" + COMPLETION_TOKENS = "completion_tokens" + TOTAL_TOKENS = "total_tokens" + RESPONSE = "response" + POSTAMBLE = "postamble" + GRAMMAR = "grammar" + PLATFORM_SERVICE_API_KEY = "PLATFORM_SERVICE_API_KEY" + EMBEDDING_SUFFIX = "embedding_suffix" + EVAL_SETTINGS = "eval_settings" + EVAL_SETTINGS_EVALUATE = "evaluate" + EVAL_SETTINGS_MONITOR_LLM = "monitor_llm" + EVAL_SETTINGS_EXCLUDE_FAILED = "exclude_failed" + TOOL_SETTINGS = "tool_settings" + LOG_EVENTS_ID = "log_events_id" + CHALLENGE_LLM = "challenge_llm" + CHALLENGE = "challenge" + ENABLE_CHALLENGE = "enable_challenge" + EXTRACTION = "extraction" + SUMMARIZE = "summarize" + SINGLE_PASS_EXTRACTION = "single-pass-extraction" + SIMPLE_PROMPT_STUDIO = "simple-prompt-studio" + LLM_USAGE_REASON = "llm_usage_reason" + METADATA = "metadata" + OUTPUT = "output" + CONTEXT = "context" + INCLUDE_METADATA = "include_metadata" + TABLE = "table" + TABLE_SETTINGS = "table_settings" + EPILOGUE = "epilogue" + PLATFORM_POSTAMBLE = "platform_postamble" + WORD_CONFIDENCE_POSTAMBLE = "word_confidence_postamble" + HIGHLIGHT_DATA_PLUGIN = "highlight-data" + SUMMARIZE_AS_SOURCE = "summarize_as_source" + VARIABLE_MAP = "variable_map" + RECORD = "record" + CUSTOM_DATA = "custom_data" + TEXT = "text" + ENABLE_HIGHLIGHT = "enable_highlight" + ENABLE_WORD_CONFIDENCE = "enable_word_confidence" + FILE_PATH = "file_path" + HIGHLIGHT_DATA = "highlight_data" + CONFIDENCE_DATA = "confidence_data" + WORD_CONFIDENCE_DATA = "word_confidence_data" + REQUIRED_FIELDS = "required_fields" + REQUIRED = "required" + EXECUTION_SOURCE = "execution_source" + METRICS = "metrics" + CAPTURE_METRICS = "capture_metrics" + LINE_ITEM = "line-item" + LINE_NUMBERS = "line_numbers" + WHISPER_HASH = "whisper_hash" + PAID_FEATURE_MSG = ( + "It is a cloud / enterprise feature. If you have purchased a plan and still " + "face this issue, please contact support" + ) + NO_CONTEXT_ERROR = ( + "Couldn't fetch context from vector DB. " + "This happens usually due to a delay by the Vector DB " + "provider to confirm writes to DB. " + "Please try again after some time" + ) + COMBINED_PROMPT = "combined_prompt" + TOOL = "tool" + JSON_POSTAMBLE = "JSON_POSTAMBLE" + DEFAULT_JSON_POSTAMBLE = "Wrap the final JSON result inbetween §§§ like below example:\n§§§\n\n§§§" + DOCUMENT_TYPE = "document_type" + # Webhook postprocessing settings + ENABLE_POSTPROCESSING_WEBHOOK = "enable_postprocessing_webhook" + POSTPROCESSING_WEBHOOK_URL = "postprocessing_webhook_url" + + +class RunLevel(Enum): + """Different stages of prompt execution. + + Comprises of prompt run and response evaluation stages. + """ + + RUN = "RUN" + EVAL = "EVAL" + CHALLENGE = "CHALLENGE" + TABLE_EXTRACTION = "TABLE_EXTRACTION" + + +class DBTableV2: + """Database tables.""" + + ORGANIZATION = "organization" + ADAPTER_INSTANCE = "adapter_instance" + PROMPT_STUDIO_REGISTRY = "prompt_studio_registry" + PLATFORM_KEY = "platform_key" + TOKEN_USAGE = "usage" + + +class FileStorageKeys: + """File storage keys.""" + + PERMANENT_REMOTE_STORAGE = "PERMANENT_REMOTE_STORAGE" + TEMPORARY_REMOTE_STORAGE = "TEMPORARY_REMOTE_STORAGE" + + +class FileStorageType(Enum): + """File storage type.""" + + PERMANENT = "permanent" + TEMPORARY = "temporary" + + +class ExecutionSource(Enum): + """Execution source.""" + + IDE = "ide" + TOOL = "tool" + + +class VariableType(str, Enum): + """Type of variable.""" + + STATIC = "STATIC" + DYNAMIC = "DYNAMIC" + CUSTOM_DATA = "CUSTOM_DATA" + + +class RetrievalStrategy(str, Enum): + """Available retrieval strategies for prompt service.""" + + SIMPLE = "simple" + SUBQUESTION = "subquestion" + FUSION = "fusion" + RECURSIVE = "recursive" + ROUTER = "router" + KEYWORD_TABLE = "keyword_table" + AUTOMERGING = "automerging" + + +class VariableConstants: + """Constants for variable extraction.""" + + VARIABLE_REGEX = "{{(.+?)}}" + DYNAMIC_VARIABLE_DATA_REGEX = r"\[(.*?)\]" + DYNAMIC_VARIABLE_URL_REGEX = ( + r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»" + "'']))" + ) # noqa: E501 + CUSTOM_DATA_VARIABLE_REGEX = r"custom_data\.([a-zA-Z0-9_\.]+)" + + +class IndexingConstants: + TOOL_ID = "tool_id" + EMBEDDING_INSTANCE_ID = "embedding_instance_id" + VECTOR_DB_INSTANCE_ID = "vector_db_instance_id" + X2TEXT_INSTANCE_ID = "x2text_instance_id" + FILE_PATH = "file_path" + CHUNK_SIZE = "chunk_size" + CHUNK_OVERLAP = "chunk_overlap" + REINDEX = "reindex" + FILE_HASH = "file_hash" + OUTPUT_FILE_PATH = "output_file_path" + ENABLE_HIGHLIGHT = "enable_highlight" + ENABLE_WORD_CONFIDENCE = "enable_word_confidence" + USAGE_KWARGS = "usage_kwargs" + PROCESS_TEXT = "process_text" + EXTRACTED_TEXT = "extracted_text" + TAGS = "tags" + EXECUTION_SOURCE = "execution_source" + DOC_ID = "doc_id" + TOOL_EXECUTION_METATADA = "tool_execution_metadata" + EXECUTION_DATA_DIR = "execution_data_dir" + METADATA_FILE = "METADATA.json" diff --git a/workers/executor/executors/dto.py b/workers/executor/executors/dto.py new file mode 100644 index 0000000000..8c9e4f3d3c --- /dev/null +++ b/workers/executor/executors/dto.py @@ -0,0 +1,39 @@ +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class InstanceIdentifiers: + embedding_instance_id: str + vector_db_instance_id: str + x2text_instance_id: str + llm_instance_id: str + tool_id: str + tags: list[str] | None = None + + +@dataclass +class FileInfo: + file_path: str + file_hash: str + + +@dataclass +class ChunkingConfig: + chunk_size: int + chunk_overlap: int + + def __post_init__(self) -> None: + if self.chunk_size == 0: + raise ValueError( + "Indexing cannot be done for zero chunks." + "Please provide a valid chunk_size." + ) + + +@dataclass +class ProcessingOptions: + reindex: bool = False + enable_highlight: bool = False + enable_word_confidence: bool = False + usage_kwargs: dict[Any, Any] = field(default_factory=dict) diff --git a/workers/executor/executors/exceptions.py b/workers/executor/executors/exceptions.py new file mode 100644 index 0000000000..69cd0a8a16 --- /dev/null +++ b/workers/executor/executors/exceptions.py @@ -0,0 +1,79 @@ +"""Standalone exceptions for the legacy executor. + +Adapted from prompt-service exceptions. The Flask ``APIError`` base +class is replaced with ``LegacyExecutorError`` so these exceptions +work outside of Flask (i.e. inside the Celery executor worker). +""" + + +class LegacyExecutorError(Exception): + """Base exception for legacy executor errors. + + Replaces Flask's ``APIError`` — carries ``message`` and ``code`` + attributes so callers can map to ``ExecutionResult.failure()``. + """ + + code: int = 500 + message: str = "Internal executor error" + + def __init__(self, message: str | None = None, code: int | None = None): + if message is not None: + self.message = message + if code is not None: + self.code = code + super().__init__(self.message) + + +class BadRequest(LegacyExecutorError): + code = 400 + message = "Bad Request / No payload" + + +class RateLimitError(LegacyExecutorError): + code = 429 + message = "Running into rate limit errors, please try again later" + + +class MissingFieldError(LegacyExecutorError): + """Custom error for missing fields.""" + + def __init__(self, missing_fields: list[str]): + message = f"Missing required fields: {', '.join(missing_fields)}" + super().__init__(message=message) + + +class RetrievalError(LegacyExecutorError): + """Custom exception raised for errors during retrieval from VectorDB.""" + + DEFAULT_MESSAGE = ( + "Error while retrieving data from the VectorDB. " + "Please contact the admin for further assistance." + ) + + +class ExtractionError(LegacyExecutorError): + DEFAULT_MESSAGE = "Error while extracting from a document" + + +class UnprocessableEntity(LegacyExecutorError): + code = 422 + message = "Unprocessable Entity" + + +class CustomDataError(LegacyExecutorError): + """Custom exception raised for errors with custom_data variables.""" + + code = 400 + + def __init__(self, variable: str, reason: str, is_ide: bool = True): + if is_ide: + help_text = "Please define this key in Prompt Studio Settings > Custom Data." + else: + help_text = ( + "Please include this key in the 'custom_data' field of your API request." + ) + variable_display = "{{custom_data." + variable + "}}" + message = ( + f"Custom data error for variable '{variable_display}': {reason} {help_text}" + ) + super().__init__(message=message) diff --git a/workers/executor/executors/file_utils.py b/workers/executor/executors/file_utils.py new file mode 100644 index 0000000000..92f80d6d76 --- /dev/null +++ b/workers/executor/executors/file_utils.py @@ -0,0 +1,40 @@ +"""File storage utilities for the legacy executor. + +Adapted from ``prompt-service/.../utils/file_utils.py``. +Returns the appropriate ``FileStorage`` instance based on execution source. +""" + +from executor.executors.constants import ExecutionSource, FileStorageKeys + +from unstract.sdk1.file_storage import FileStorage +from unstract.sdk1.file_storage.constants import StorageType +from unstract.sdk1.file_storage.env_helper import EnvHelper + + +class FileUtils: + @staticmethod + def get_fs_instance(execution_source: str) -> FileStorage: + """Returns a FileStorage instance based on the execution source. + + Args: + execution_source: The source from which the execution is triggered. + + Returns: + FileStorage: The file storage instance — Permanent/Shared temporary. + + Raises: + ValueError: If the execution source is invalid. + """ + if execution_source == ExecutionSource.IDE.value: + return EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + + if execution_source == ExecutionSource.TOOL.value: + return EnvHelper.get_storage( + storage_type=StorageType.SHARED_TEMPORARY, + env_name=FileStorageKeys.TEMPORARY_REMOTE_STORAGE, + ) + + raise ValueError(f"Invalid execution source: {execution_source}") diff --git a/workers/executor/executors/index.py b/workers/executor/executors/index.py new file mode 100644 index 0000000000..ebb4f6d599 --- /dev/null +++ b/workers/executor/executors/index.py @@ -0,0 +1,227 @@ +"""Indexing logic for the legacy executor. + +Adapted from ``prompt-service/.../core/index_v2.py``. +Performs document chunking and vector DB indexing. + +Heavy dependencies (``llama_index``, ``openai``, vectordb adapters) +are imported lazily inside methods to avoid protobuf descriptor +conflicts at test-collection time. +""" + +from __future__ import annotations + +import json +import logging +from typing import TYPE_CHECKING, Any + +from executor.executors.dto import ( + ChunkingConfig, + FileInfo, + InstanceIdentifiers, + ProcessingOptions, +) + +from unstract.sdk1.constants import LogLevel +from unstract.sdk1.exceptions import SdkError, parse_litellm_err +from unstract.sdk1.file_storage.impl import FileStorage +from unstract.sdk1.file_storage.provider import FileStorageProvider +from unstract.sdk1.platform import PlatformHelper as ToolAdapter +from unstract.sdk1.tool.stream import StreamMixin +from unstract.sdk1.utils.common import Utils +from unstract.sdk1.utils.tool import ToolUtils + +if TYPE_CHECKING: + from unstract.sdk1.embedding import Embedding + from unstract.sdk1.vector_db import VectorDB + +logger = logging.getLogger(__name__) + + +class Index: + def __init__( + self, + tool: StreamMixin, + instance_identifiers: InstanceIdentifiers, + chunking_config: ChunkingConfig, + processing_options: ProcessingOptions, + run_id: str | None = None, + capture_metrics: bool = False, + ): + self.tool = tool + self._run_id = run_id + self._capture_metrics = capture_metrics + self.instance_identifiers = instance_identifiers + self.chunking_config = chunking_config + self.processing_options = processing_options + self._metrics = {} + + def generate_index_key( + self, + file_info: FileInfo, + fs: FileStorage = FileStorage(provider=FileStorageProvider.LOCAL), + ) -> str: + """Generate a unique index key for document indexing.""" + if not file_info.file_path and not file_info.file_hash: + raise ValueError("One of `file_path` or `file_hash` need to be provided") + + file_hash = file_info.file_hash + if not file_hash: + file_hash = fs.get_hash_from_file(path=file_info.file_path) + + vector_db_config = ToolAdapter.get_adapter_config( + self.tool, self.instance_identifiers.vector_db_instance_id + ) + embedding_config = ToolAdapter.get_adapter_config( + self.tool, self.instance_identifiers.embedding_instance_id + ) + x2text_config = ToolAdapter.get_adapter_config( + self.tool, self.instance_identifiers.x2text_instance_id + ) + Utils.strip_adapter_name(vector_db_config, embedding_config, x2text_config) + index_key = { + "file_hash": file_hash, + "vector_db_config": vector_db_config, + "embedding_config": embedding_config, + "x2text_config": x2text_config, + "chunk_size": str(self.chunking_config.chunk_size), + "chunk_overlap": str(self.chunking_config.chunk_overlap), + } + hashed_index_key = ToolUtils.hash_str(json.dumps(index_key, sort_keys=True)) + return hashed_index_key + + def is_document_indexed( + self, + doc_id: str, + embedding: Embedding, + vector_db: VectorDB, + ) -> bool: + """Check if nodes are already present in the vector DB for a doc_id.""" + from llama_index.core.vector_stores import ( + FilterOperator, + MetadataFilter, + MetadataFilters, + VectorStoreQuery, + VectorStoreQueryResult, + ) + + doc_id_eq_filter = MetadataFilter.from_dict( + {"key": "doc_id", "operator": FilterOperator.EQ, "value": doc_id} + ) + filters = MetadataFilters(filters=[doc_id_eq_filter]) + q = VectorStoreQuery( + query_embedding=embedding.get_query_embedding(" "), + doc_ids=[doc_id], + filters=filters, + ) + + doc_id_found = False + try: + n: VectorStoreQueryResult = vector_db.query(query=q) + if len(n.nodes) > 0: + doc_id_found = True + self.tool.stream_log(f"Found {len(n.nodes)} nodes for {doc_id}") + else: + self.tool.stream_log(f"No nodes found for {doc_id}") + except Exception as e: + logger.warning( + f"Error querying {self.instance_identifiers.vector_db_instance_id}:" + f" {str(e)}, proceeding to index", + exc_info=True, + ) + + if doc_id_found and not self.processing_options.reindex: + self.tool.stream_log(f"File was indexed already under {doc_id}") + return doc_id_found + + return doc_id_found + + def perform_indexing( + self, + vector_db: VectorDB, + doc_id: str, + extracted_text: str, + doc_id_found: bool, + ) -> str: + from unstract.sdk1.adapters.vectordb.no_op.src.no_op_custom_vectordb import ( + NoOpCustomVectorDB, + ) + + if isinstance( + vector_db.get_vector_db( + adapter_instance_id=self.instance_identifiers.vector_db_instance_id, + embedding_dimension=1, + ), + (NoOpCustomVectorDB), + ): + return doc_id + + self.tool.stream_log("Indexing file...") + full_text = [ + { + "section": "full", + "text_contents": str(extracted_text), + } + ] + documents = self._prepare_documents(doc_id, full_text) + if self.processing_options.reindex and doc_id_found: + self.delete_nodes(vector_db, doc_id) + self._trigger_indexing(vector_db, documents) + return doc_id + + def _trigger_indexing(self, vector_db: Any, documents: list) -> None: + import openai + + self.tool.stream_log("Adding nodes to vector db...") + try: + vector_db.index_document( + documents, + chunk_size=self.chunking_config.chunk_size, + chunk_overlap=self.chunking_config.chunk_overlap, + show_progress=True, + ) + self.tool.stream_log("File has been indexed successfully") + except openai.OpenAIError as e: + e = parse_litellm_err(e) + raise e + except Exception as e: + self.tool.stream_log( + f"Error adding nodes to vector db: {e}", + level=LogLevel.ERROR, + ) + raise e + + def delete_nodes(self, vector_db: Any, doc_id: str) -> None: + try: + vector_db.delete(ref_doc_id=doc_id) + self.tool.stream_log(f"Deleted nodes for {doc_id}") + except Exception as e: + self.tool.stream_log( + f"Error deleting nodes for {doc_id}: {e}", + level=LogLevel.ERROR, + ) + raise SdkError(f"Error deleting nodes for {doc_id}: {e}") from e + + def _prepare_documents(self, doc_id: str, full_text: Any) -> list: + from llama_index.core import Document + + documents = [] + try: + for item in full_text: + text = item["text_contents"] + document = Document( + text=text, + doc_id=doc_id, + metadata={"section": item["section"]}, + ) + document.id_ = doc_id + documents.append(document) + self.tool.stream_log(f"Number of documents: {len(documents)}") + return documents + except Exception as e: + self.tool.stream_log( + f"Error while processing documents {doc_id}: {e}", + level=LogLevel.ERROR, + ) + raise SdkError( + f"Error while processing documents for indexing {doc_id}: {e}" + ) from e diff --git a/workers/executor/executors/json_repair_helper.py b/workers/executor/executors/json_repair_helper.py new file mode 100644 index 0000000000..0a36b1c217 --- /dev/null +++ b/workers/executor/executors/json_repair_helper.py @@ -0,0 +1,63 @@ +"""JSON repair utility functions. + +Copied from prompt-service/.../utils/json_repair_helper.py — already Flask-free. +""" + +import json +from typing import Any + + +def repair_json_with_best_structure(json_str: str) -> Any: + """Intelligently repair JSON string using the best parsing strategy. + + Attempts to parse as valid JSON first, then falls back to basic repair + heuristics. The full ``json_repair`` library is used when available for + more aggressive repair. + + Args: + json_str: The JSON string to repair + + Returns: + The parsed JSON object with the best structure + """ + # Fast path — try strict JSON first + try: + return json.loads(json_str) + except ValueError: + pass + + # Try to import json_repair for advanced repair + try: + from json_repair import repair_json + + parsed_as_is = repair_json( + json_str=json_str, return_objects=True, ensure_ascii=False + ) + parsed_with_wrap = repair_json( + json_str="[" + json_str, return_objects=True, ensure_ascii=False + ) + + if isinstance(parsed_as_is, str) and isinstance(parsed_with_wrap, str): + return parsed_as_is + if isinstance(parsed_as_is, str): + return parsed_with_wrap + if isinstance(parsed_with_wrap, str): + return parsed_as_is + + if ( + isinstance(parsed_with_wrap, list) + and len(parsed_with_wrap) == 1 + and parsed_with_wrap[0] == parsed_as_is + ): + return parsed_as_is + + if isinstance(parsed_as_is, (dict, list)): + if isinstance(parsed_with_wrap, list) and len(parsed_with_wrap) > 1: + return parsed_with_wrap + else: + return parsed_as_is + + return parsed_with_wrap + except ImportError: + # json_repair not installed — return the raw string + return json_str diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py new file mode 100644 index 0000000000..6a2c1be67c --- /dev/null +++ b/workers/executor/executors/legacy_executor.py @@ -0,0 +1,1881 @@ +"""Legacy executor — wraps the full prompt-service extraction pipeline. + +Routes ``ExecutionContext`` requests to handler methods for text +extraction, indexing, retrieval, prompt answering, single-pass +extraction, summarisation, and usage tracking. +""" + +import logging +import time +from pathlib import Path +from typing import Any + +from executor.executor_tool_shim import ExecutorToolShim +from executor.executors.constants import ExecutionSource +from executor.executors.constants import IndexingConstants as IKeys +from executor.executors.dto import ( + ChunkingConfig, + FileInfo, + InstanceIdentifiers, + ProcessingOptions, +) +from executor.executors.exceptions import ExtractionError, LegacyExecutorError +from executor.executors.file_utils import FileUtils + +from unstract.sdk1.adapters.exceptions import AdapterError +from unstract.sdk1.adapters.x2text.constants import X2TextConstants +from unstract.sdk1.adapters.x2text.llm_whisperer.src import LLMWhisperer +from unstract.sdk1.adapters.x2text.llm_whisperer_v2.src import LLMWhispererV2 +from unstract.sdk1.constants import LogLevel +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult +from unstract.sdk1.utils.tool import ToolUtils +from unstract.sdk1.x2txt import TextExtractionResult, X2Text + +logger = logging.getLogger(__name__) + + +@ExecutorRegistry.register +class LegacyExecutor(BaseExecutor): + """Executor that wraps the full prompt-service extraction pipeline. + + Routes incoming ``ExecutionContext`` requests to the appropriate + handler method based on the ``Operation`` enum. Each handler + corresponds to one of the original prompt-service HTTP endpoints. + """ + + # Maps Operation enum values to handler method names. + _OPERATION_MAP: dict[str, str] = { + Operation.EXTRACT.value: "_handle_extract", + Operation.INDEX.value: "_handle_index", + Operation.ANSWER_PROMPT.value: "_handle_answer_prompt", + Operation.SINGLE_PASS_EXTRACTION.value: "_handle_single_pass_extraction", + Operation.SUMMARIZE.value: "_handle_summarize", + Operation.IDE_INDEX.value: "_handle_ide_index", + Operation.STRUCTURE_PIPELINE.value: "_handle_structure_pipeline", + } + + # Defaults for log streaming (overridden by execute()). + _log_events_id: str = "" + _log_component: dict[str, str] = {} + + @property + def name(self) -> str: + return "legacy" + + def execute(self, context: ExecutionContext) -> ExecutionResult: + """Route to the handler for ``context.operation``. + + Returns: + ``ExecutionResult`` on success or for unsupported operations. + ``LegacyExecutorError`` subclasses are caught and mapped to + ``ExecutionResult.failure()`` so callers always get a result. + + Raises: + NotImplementedError: From stub handlers (until 2D–2H). + """ + # Extract log streaming info (set by tasks.py for IDE sessions). + self._log_events_id: str = context.log_events_id or "" + self._log_component: dict[str, str] = getattr(context, "_log_component", {}) + + handler_name = self._OPERATION_MAP.get(context.operation) + if handler_name is None: + return ExecutionResult.failure( + error=(f"LegacyExecutor does not support operation '{context.operation}'") + ) + + handler = getattr(self, handler_name) + logger.info( + "LegacyExecutor routing operation=%s to %s " + "(run_id=%s request_id=%s execution_source=%s)", + context.operation, + handler_name, + context.run_id, + context.request_id, + context.execution_source, + ) + start = time.monotonic() + try: + result = handler(context) + elapsed = time.monotonic() - start + logger.info( + "Handler %s completed in %.2fs (run_id=%s success=%s)", + handler_name, + elapsed, + context.run_id, + result.success, + ) + return result + except LegacyExecutorError as exc: + elapsed = time.monotonic() - start + logger.warning( + "Handler %s failed after %.2fs: %s: %s", + handler_name, + elapsed, + type(exc).__name__, + exc.message, + exc_info=True, + ) + # Stream error to FE so the user sees the failure in real-time + if self._log_events_id: + try: + shim = ExecutorToolShim( + log_events_id=self._log_events_id, + component=self._log_component, + ) + shim.stream_log( + f"Error: {exc.message or type(exc).__name__}", + level=LogLevel.ERROR, + ) + except Exception: + pass # Best-effort — don't mask the original error + return ExecutionResult.failure(error=exc.message) + + # ------------------------------------------------------------------ + # Phase 2B — Extract handler + # ------------------------------------------------------------------ + + def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: + """Handle ``Operation.EXTRACT`` — text extraction via x2text. + + Migrated from ``ExtractionService.perform_extraction()`` in + ``prompt-service/.../services/extraction.py``. + + Returns: + ExecutionResult with ``data`` containing ``extracted_text``. + """ + params: dict[str, Any] = context.executor_params + + # Required params + x2text_instance_id: str = params.get(IKeys.X2TEXT_INSTANCE_ID, "") + file_path: str = params.get(IKeys.FILE_PATH, "") + platform_api_key: str = params.get("platform_api_key", "") + + if not x2text_instance_id or not file_path: + missing = [] + if not x2text_instance_id: + missing.append(IKeys.X2TEXT_INSTANCE_ID) + if not file_path: + missing.append(IKeys.FILE_PATH) + return ExecutionResult.failure( + error=f"Missing required params: {', '.join(missing)}" + ) + + # Optional params + output_file_path: str | None = params.get(IKeys.OUTPUT_FILE_PATH) + enable_highlight: bool = params.get(IKeys.ENABLE_HIGHLIGHT, False) + usage_kwargs: dict[Any, Any] = params.get(IKeys.USAGE_KWARGS, {}) + tags: list[str] | None = params.get(IKeys.TAGS) + execution_source: str = context.execution_source + tool_exec_metadata: dict[str, Any] = params.get(IKeys.TOOL_EXECUTION_METATADA, {}) + execution_data_dir: str | None = params.get(IKeys.EXECUTION_DATA_DIR) + + # Build adapter shim and X2Text + shim = ExecutorToolShim( + platform_api_key=platform_api_key, + log_events_id=self._log_events_id, + component=self._log_component, + ) + x2text = X2Text( + tool=shim, + adapter_instance_id=x2text_instance_id, + usage_kwargs=usage_kwargs, + ) + fs = FileUtils.get_fs_instance(execution_source=execution_source) + + logger.info( + "Starting text extraction: x2text_adapter=%s file=%s run_id=%s", + x2text_instance_id, + Path(file_path).name, + context.run_id, + ) + logger.debug( + "HIGHLIGHT_DEBUG _handle_extract: enable_highlight=%s x2text_type=%s file=%s run_id=%s", + enable_highlight, + type(x2text.x2text_instance).__name__, + Path(file_path).name, + context.run_id, + ) + shim.stream_log("Initializing text extractor...") + shim.stream_log(f"Using text extractor: {type(x2text.x2text_instance).__name__}") + + try: + shim.stream_log("Extracting text from document...") + if enable_highlight and isinstance( + x2text.x2text_instance, (LLMWhisperer, LLMWhispererV2) + ): + shim.stream_log("Extracting text with highlight support enabled...") + process_response: TextExtractionResult = x2text.process( + input_file_path=file_path, + output_file_path=output_file_path, + enable_highlight=enable_highlight, + tags=tags, + fs=fs, + ) + self._update_exec_metadata( + fs=fs, + execution_source=execution_source, + tool_exec_metadata=tool_exec_metadata, + execution_data_dir=execution_data_dir, + process_response=process_response, + ) + else: + process_response = x2text.process( + input_file_path=file_path, + output_file_path=output_file_path, + tags=tags, + fs=fs, + ) + + has_metadata = bool( + process_response.extraction_metadata + and process_response.extraction_metadata.line_metadata + ) + logger.debug( + "HIGHLIGHT_DEBUG extraction result: has_line_metadata=%s " + "whisper_hash=%s run_id=%s", + has_metadata, + getattr(process_response.extraction_metadata, "whisper_hash", None) + if process_response.extraction_metadata + else None, + context.run_id, + ) + logger.info( + "Text extraction completed: file=%s run_id=%s", + Path(file_path).name, + context.run_id, + ) + shim.stream_log("Text extraction completed") + result_data: dict[str, Any] = { + IKeys.EXTRACTED_TEXT: process_response.extracted_text, + } + # Include highlight metadata when available + # (used by agentic extraction for PDF source referencing) + if ( + process_response.extraction_metadata + and process_response.extraction_metadata.line_metadata + ): + shim.stream_log("Saving extraction metadata...") + result_data["highlight_metadata"] = ( + process_response.extraction_metadata.line_metadata + ) + return ExecutionResult( + success=True, + data=result_data, + ) + except AdapterError as e: + name = x2text.x2text_instance.get_name() + logger.error( + "Text extraction failed: adapter=%s file=%s error=%s", + name, + Path(file_path).name, + str(e), + ) + msg = f"Error from text extractor '{name}'. {e}" + raise ExtractionError(message=msg) from e + + @staticmethod + def _update_exec_metadata( + fs: Any, + execution_source: str, + tool_exec_metadata: dict[str, Any] | None, + execution_data_dir: str | None, + process_response: TextExtractionResult, + ) -> None: + """Write whisper_hash metadata for tool-sourced executions.""" + if execution_source != ExecutionSource.TOOL.value: + return + whisper_hash = process_response.extraction_metadata.whisper_hash + metadata = {X2TextConstants.WHISPER_HASH: whisper_hash} + if tool_exec_metadata is not None: + for key, value in metadata.items(): + tool_exec_metadata[key] = value + metadata_path = str(Path(execution_data_dir) / IKeys.METADATA_FILE) + ToolUtils.dump_json( + file_to_dump=metadata_path, + json_to_dump=metadata, + fs=fs, + ) + + @staticmethod + def _get_indexing_deps(): + """Lazy-import heavy indexing dependencies. + + These imports trigger llama_index/qdrant/protobuf loading, + so they must not happen at module-collection time (tests). + Wrapped in a method so tests can mock it cleanly. + """ + from executor.executors.index import Index + + from unstract.sdk1.embedding import EmbeddingCompat + from unstract.sdk1.vector_db import VectorDB + + return Index, EmbeddingCompat, VectorDB + + def _run_summarize_step( + self, summarize_params: dict, context: ExecutionContext + ) -> ExecutionResult | None: + """Run summarization if not already cached. + + Returns: + ``None`` on success (summary written or cached), or an + ``ExecutionResult`` failure to propagate to the caller. + """ + extract_file_path = summarize_params.get("extract_file_path", "") + summarize_file_path = summarize_params.get("summarize_file_path", "") + platform_api_key = summarize_params.get("platform_api_key", "") + llm_adapter_id = summarize_params.get("llm_adapter_instance_id", "") + summarize_prompt = summarize_params.get("summarize_prompt", "") + prompt_keys = summarize_params.get("prompt_keys", []) + + fs = FileUtils.get_fs_instance(execution_source=context.execution_source) + + # Check cache — skip if summary already exists + if fs.exists(summarize_file_path): + existing = fs.read(path=summarize_file_path, mode="r") + if existing: + return None + + doc_context = fs.read(path=extract_file_path, mode="r") + if not doc_context: + return ExecutionResult.failure( + error="No extracted text found for summarization" + ) + + summarize_ctx = ExecutionContext( + executor_name=context.executor_name, + operation=Operation.SUMMARIZE.value, + run_id=context.run_id, + execution_source=context.execution_source, + organization_id=context.organization_id, + request_id=context.request_id, + log_events_id=context.log_events_id, + executor_params={ + "llm_adapter_instance_id": llm_adapter_id, + "summarize_prompt": summarize_prompt, + "context": doc_context, + "prompt_keys": prompt_keys, + "PLATFORM_SERVICE_API_KEY": platform_api_key, + }, + ) + summarize_result = self._handle_summarize(summarize_ctx) + if not summarize_result.success: + return summarize_result + + summarize_dir = str(Path(summarize_file_path).parent) + fs.mkdir(summarize_dir, create_parents=True) + fs.write( + path=summarize_file_path, + mode="w", + data=summarize_result.data.get("data", ""), + ) + return None + + # ------------------------------------------------------------------ + # Phase 5C — Compound IDE index handler (extract + index) + # ------------------------------------------------------------------ + + def _handle_ide_index(self, context: ExecutionContext) -> ExecutionResult: + """Handle ``Operation.IDE_INDEX`` — compound extract then index. + + This compound operation combines ``_handle_extract`` and + ``_handle_index`` in a single executor invocation, eliminating + the need for the backend Celery worker to block between steps. + + The ``executor_params`` must contain: + - ``extract_params``: Parameters for ``_handle_extract``. + - ``index_params``: Parameters for ``_handle_index``. The + executor injects ``extracted_text`` from the extract step + before calling index. + + Returns: + ExecutionResult with ``data`` containing ``doc_id`` from + the index step. + """ + params = context.executor_params + extract_params = params.get("extract_params") + index_params = params.get("index_params") + + if not extract_params or not index_params: + missing = [] + if not extract_params: + missing.append("extract_params") + if not index_params: + missing.append("index_params") + return ExecutionResult.failure( + error=f"ide_index missing required params: {', '.join(missing)}" + ) + + # Step 1: Extract + extract_ctx = ExecutionContext( + executor_name=context.executor_name, + operation=Operation.EXTRACT.value, + run_id=context.run_id, + execution_source=context.execution_source, + organization_id=context.organization_id, + executor_params=extract_params, + request_id=context.request_id, + log_events_id=context.log_events_id, + ) + extract_result = self._handle_extract(extract_ctx) + if not extract_result.success: + return extract_result + + # Step 2: Optional summarize + summarize_params = params.get("summarize_params") + summarize_file_path = "" + if summarize_params: + summarize_file_path = summarize_params.get("summarize_file_path", "") + result = self._run_summarize_step(summarize_params, context) + if result is not None: + return result + + # Step 3: Index — inject extracted text + extracted_text = extract_result.data.get(IKeys.EXTRACTED_TEXT, "") + index_params[IKeys.EXTRACTED_TEXT] = extracted_text + + index_ctx = ExecutionContext( + executor_name=context.executor_name, + operation=Operation.INDEX.value, + run_id=context.run_id, + execution_source=context.execution_source, + organization_id=context.organization_id, + executor_params=index_params, + request_id=context.request_id, + log_events_id=context.log_events_id, + ) + index_result = self._handle_index(index_ctx) + if not index_result.success: + return index_result + + return ExecutionResult( + success=True, + data={ + IKeys.DOC_ID: index_result.data.get(IKeys.DOC_ID, ""), + "summarize_file_path": summarize_file_path, + }, + ) + + # ------------------------------------------------------------------ + # Phase 5D — Compound structure pipeline handler + # ------------------------------------------------------------------ + + def _handle_structure_pipeline(self, context: ExecutionContext) -> ExecutionResult: + """Handle ``Operation.STRUCTURE_PIPELINE``. + + Runs the full structure-tool pipeline in a single executor + invocation: extract → summarize → index → answer_prompt. + + This eliminates three sequential ``dispatcher.dispatch()`` calls + that would otherwise block a file_processing worker slot. + + Expected ``executor_params`` keys: + + ``extract_params`` + Parameters for ``_handle_extract``. + ``index_template`` + Common indexing params (``tool_id``, ``file_hash``, + ``is_highlight_enabled``, ``platform_api_key``, + ``extracted_file_path``). + ``answer_params`` + Full payload for ``_handle_answer_prompt`` / + ``_handle_single_pass_extraction``. + ``pipeline_options`` + Control flags: ``skip_extraction_and_indexing``, + ``is_summarization_enabled``, ``is_single_pass_enabled``, + ``input_file_path``, ``source_file_name``. + ``summarize_params`` + (Optional) Parameters for ``_handle_summarize`` plus + filesystem paths for caching. + + Returns: + ExecutionResult with ``data`` containing the structured + output dict (``output``, ``metadata``, ``metrics``). + """ + params = context.executor_params + extract_params = params.get("extract_params", {}) + index_template = params.get("index_template", {}) + answer_params = params.get("answer_params", {}) + pipeline_options = params.get("pipeline_options", {}) + summarize_params = params.get("summarize_params") + + skip_extraction = pipeline_options.get("skip_extraction_and_indexing", False) + is_summarization = pipeline_options.get("is_summarization_enabled", False) + is_single_pass = pipeline_options.get("is_single_pass_enabled", False) + input_file_path = pipeline_options.get("input_file_path", "") + source_file_name = pipeline_options.get("source_file_name", "") + + extracted_text = "" + index_metrics: dict = {} + + shim = ExecutorToolShim( + platform_api_key=extract_params.get("platform_api_key", ""), + log_events_id=self._log_events_id, + component=self._log_component, + ) + step = 1 + + # ---- Step 1: Extract ---- + if not skip_extraction: + shim.stream_log(f"Pipeline step {step}: Extracting text from document...") + step += 1 + extract_ctx = ExecutionContext( + executor_name=context.executor_name, + operation=Operation.EXTRACT.value, + run_id=context.run_id, + execution_source=context.execution_source, + organization_id=context.organization_id, + executor_params=extract_params, + request_id=context.request_id, + log_events_id=context.log_events_id, + ) + extract_result = self._handle_extract(extract_ctx) + if not extract_result.success: + return extract_result + extracted_text = extract_result.data.get(IKeys.EXTRACTED_TEXT, "") + + # ---- Step 2: Summarize (if enabled) ---- + if is_summarization: + shim.stream_log(f"Pipeline step {step}: Summarizing extracted text...") + step += 1 + summarize_result = self._run_pipeline_summarize( + context=context, + summarize_params=summarize_params or {}, + answer_params=answer_params, + ) + if not summarize_result.success: + return summarize_result + # answer_params file_path/hash updated in-place by helper + elif skip_extraction: + # Smart table: use original source file + answer_params["file_path"] = input_file_path + elif not is_single_pass: + # ---- Step 3: Index per output with dedup ---- + shim.stream_log( + f"Pipeline step {step}: Indexing document into vector store..." + ) + step += 1 + index_metrics = self._run_pipeline_index( + context=context, + index_template=index_template, + answer_params=answer_params, + extracted_text=extracted_text, + ) + + # ---- Step 4: Table settings injection ---- + if not is_single_pass: + self._inject_table_settings( + answer_params=answer_params, + index_template=index_template, + skip_extraction=skip_extraction, + input_file_path=input_file_path, + ) + + # ---- Step 5: Answer prompt / Single pass ---- + mode_label = "single pass" if is_single_pass else "prompt" + shim.stream_log(f"Pipeline step {step}: Running {mode_label} execution...") + operation = ( + Operation.SINGLE_PASS_EXTRACTION.value + if is_single_pass + else Operation.ANSWER_PROMPT.value + ) + answer_ctx = ExecutionContext( + executor_name=context.executor_name, + operation=operation, + run_id=context.run_id, + execution_source=context.execution_source, + organization_id=context.organization_id, + executor_params=answer_params, + request_id=context.request_id, + log_events_id=context.log_events_id, + ) + if is_single_pass: + answer_result = self._handle_single_pass_extraction(answer_ctx) + else: + answer_result = self._handle_answer_prompt(answer_ctx) + if not answer_result.success: + return answer_result + + # ---- Step 6: Merge results ---- + structured_output = answer_result.data + self._finalize_pipeline_result( + structured_output=structured_output, + source_file_name=source_file_name, + extracted_text=extracted_text, + index_metrics=index_metrics, + ) + + shim.stream_log("Pipeline completed successfully") + return ExecutionResult(success=True, data=structured_output) + + @staticmethod + def _inject_table_settings( + answer_params: dict, + index_template: dict, + skip_extraction: bool, + input_file_path: str, + ) -> None: + """Inject table settings file paths into each output that has them.""" + outputs = answer_params.get("outputs", []) + extracted_file_path = index_template.get("extracted_file_path", "") + for output in outputs: + if "table_settings" not in output: + continue + table_settings = output["table_settings"] + is_dir = table_settings.get("is_directory_mode", False) + if skip_extraction: + table_settings["input_file"] = input_file_path + answer_params["file_path"] = input_file_path + else: + table_settings["input_file"] = extracted_file_path + table_settings["is_directory_mode"] = is_dir + output["table_settings"] = table_settings + + def _finalize_pipeline_result( + self, + structured_output: dict, + source_file_name: str, + extracted_text: str, + index_metrics: dict, + ) -> None: + """Populate metadata/metrics in structured_output after pipeline completion.""" + if "metadata" not in structured_output: + structured_output["metadata"] = {} + structured_output["metadata"]["file_name"] = source_file_name + if extracted_text: + structured_output["metadata"]["extracted_text"] = extracted_text + if index_metrics: + existing_metrics = structured_output.get("metrics", {}) + structured_output["metrics"] = self._merge_pipeline_metrics( + existing_metrics, index_metrics + ) + + def _run_pipeline_summarize( + self, + context: ExecutionContext, + summarize_params: dict, + answer_params: dict, + ) -> ExecutionResult: + """Run the summarize step of the structure pipeline. + + Handles filesystem caching: if a cached summary exists, uses it. + Otherwise calls ``_handle_summarize`` and writes the result. + Updates ``answer_params`` in-place with new file_path and + file_hash. + """ + extract_file_path = summarize_params.get("extract_file_path", "") + summarize_file_path = summarize_params.get("summarize_file_path", "") + platform_api_key = summarize_params.get("platform_api_key", "") + llm_adapter_id = summarize_params.get("llm_adapter_instance_id", "") + summarize_prompt = summarize_params.get("summarize_prompt", "") + prompt_keys = summarize_params.get("prompt_keys", []) + outputs = answer_params.get("outputs", []) + + fs = FileUtils.get_fs_instance(execution_source=context.execution_source) + + # Set chunk_size=0 for all outputs when summarizing + embedding = answer_params.get("tool_settings", {}).get("embedding", "") + vector_db = answer_params.get("tool_settings", {}).get("vector-db", "") + x2text = answer_params.get("tool_settings", {}).get("x2text_adapter", "") + for output in outputs: + output["embedding"] = embedding + output["vector-db"] = vector_db + output["x2text_adapter"] = x2text + output["chunk-size"] = 0 + output["chunk-overlap"] = 0 + + # Check cache + summarized_context = "" + if fs.exists(summarize_file_path): + summarized_context = fs.read(path=summarize_file_path, mode="r") + + if not summarized_context: + # Read extracted text + doc_context = fs.read(path=extract_file_path, mode="r") + if not doc_context: + return ExecutionResult.failure( + error="No extracted text found for summarization" + ) + + summarize_ctx = ExecutionContext( + executor_name=context.executor_name, + operation=Operation.SUMMARIZE.value, + run_id=context.run_id, + execution_source=context.execution_source, + organization_id=context.organization_id, + request_id=context.request_id, + log_events_id=context.log_events_id, + executor_params={ + "llm_adapter_instance_id": llm_adapter_id, + "summarize_prompt": summarize_prompt, + "context": doc_context, + "prompt_keys": prompt_keys, + "PLATFORM_SERVICE_API_KEY": platform_api_key, + }, + ) + summarize_result = self._handle_summarize(summarize_ctx) + if not summarize_result.success: + return summarize_result + + summarized_context = summarize_result.data.get("data", "") + fs.write( + path=summarize_file_path, + mode="w", + data=summarized_context, + ) + + # Update answer_params + summarize_file_hash = fs.get_hash_from_file(path=summarize_file_path) + answer_params["file_hash"] = summarize_file_hash + answer_params["file_path"] = str(summarize_file_path) + + return ExecutionResult(success=True, data={}) + + def _run_pipeline_index( + self, + context: ExecutionContext, + index_template: dict, + answer_params: dict, + extracted_text: str, + ) -> dict: + """Run per-output indexing with dedup for the structure pipeline. + + Returns: + Dict of index metrics keyed by output name. + """ + import datetime + + tool_settings = answer_params.get("tool_settings", {}) + outputs = answer_params.get("outputs", []) + tool_id = index_template.get("tool_id", "") + file_hash = index_template.get("file_hash", "") + is_highlight = index_template.get("is_highlight_enabled", False) + platform_api_key = index_template.get("platform_api_key", "") + extracted_file_path = index_template.get("extracted_file_path", "") + + index_metrics: dict = {} + seen_params: set = set() + + for output in outputs: + chunk_size = output.get("chunk-size", 0) + chunk_overlap = output.get("chunk-overlap", 0) + vector_db = tool_settings.get("vector-db", "") + embedding = tool_settings.get("embedding", "") + x2text = tool_settings.get("x2text_adapter", "") + + param_key = ( + f"chunk_size={chunk_size}_" + f"chunk_overlap={chunk_overlap}_" + f"vector_db={vector_db}_" + f"embedding={embedding}_" + f"x2text={x2text}" + ) + + if chunk_size != 0 and param_key not in seen_params: + seen_params.add(param_key) + + indexing_start = datetime.datetime.now() + logger.info( + "Pipeline indexing: chunk_size=%s chunk_overlap=%s vector_db=%s", + chunk_size, + chunk_overlap, + vector_db, + ) + + index_ctx = ExecutionContext( + executor_name=context.executor_name, + operation=Operation.INDEX.value, + run_id=context.run_id, + execution_source=context.execution_source, + organization_id=context.organization_id, + request_id=context.request_id, + log_events_id=context.log_events_id, + executor_params={ + "embedding_instance_id": embedding, + "vector_db_instance_id": vector_db, + "x2text_instance_id": x2text, + "chunk_size": chunk_size, + "chunk_overlap": chunk_overlap, + "file_path": extracted_file_path, + "reindex": True, + "tool_id": tool_id, + "file_hash": file_hash, + "enable_highlight": is_highlight, + "extracted_text": extracted_text, + "platform_api_key": platform_api_key, + }, + ) + index_result = self._handle_index(index_ctx) + if not index_result.success: + logger.warning( + "Pipeline indexing failed for %s: %s", + param_key, + index_result.error, + ) + + elapsed = (datetime.datetime.now() - indexing_start).total_seconds() + output_name = output.get("name", "") + index_metrics[output_name] = {"indexing": {"time_taken(s)": elapsed}} + + return index_metrics + + @staticmethod + def _merge_pipeline_metrics(metrics1: dict, metrics2: dict) -> dict: + """Merge two metrics dicts, combining sub-dicts for shared keys.""" + merged: dict = {} + all_keys = set(metrics1) | set(metrics2) + for key in all_keys: + if ( + key in metrics1 + and key in metrics2 + and isinstance(metrics1[key], dict) + and isinstance(metrics2[key], dict) + ): + merged[key] = {**metrics1[key], **metrics2[key]} + elif key in metrics1: + merged[key] = metrics1[key] + else: + merged[key] = metrics2[key] + return merged + + # ------------------------------------------------------------------ + # Phase 2C — Index handler + # ------------------------------------------------------------------ + + def _handle_index(self, context: ExecutionContext) -> ExecutionResult: + """Handle ``Operation.INDEX`` — vector DB indexing. + + Migrated from ``IndexingService.index()`` in + ``prompt-service/.../services/indexing.py``. + + Returns: + ExecutionResult with ``data`` containing ``doc_id``. + """ + params: dict[str, Any] = context.executor_params + + # Required params + embedding_instance_id: str = params.get(IKeys.EMBEDDING_INSTANCE_ID, "") + vector_db_instance_id: str = params.get(IKeys.VECTOR_DB_INSTANCE_ID, "") + x2text_instance_id: str = params.get(IKeys.X2TEXT_INSTANCE_ID, "") + file_path: str = params.get(IKeys.FILE_PATH, "") + extracted_text: str = params.get(IKeys.EXTRACTED_TEXT, "") + platform_api_key: str = params.get("platform_api_key", "") + + missing = [] + if not embedding_instance_id: + missing.append(IKeys.EMBEDDING_INSTANCE_ID) + if not vector_db_instance_id: + missing.append(IKeys.VECTOR_DB_INSTANCE_ID) + if not x2text_instance_id: + missing.append(IKeys.X2TEXT_INSTANCE_ID) + if not file_path: + missing.append(IKeys.FILE_PATH) + if missing: + return ExecutionResult.failure( + error=f"Missing required params: {', '.join(missing)}" + ) + + # Optional params + tool_id: str = params.get(IKeys.TOOL_ID, "") + file_hash: str | None = params.get(IKeys.FILE_HASH) + chunk_size: int = params.get(IKeys.CHUNK_SIZE, 512) + chunk_overlap: int = params.get(IKeys.CHUNK_OVERLAP, 128) + reindex: bool = params.get(IKeys.REINDEX, False) + enable_highlight: bool = params.get(IKeys.ENABLE_HIGHLIGHT, False) + enable_word_confidence: bool = params.get(IKeys.ENABLE_WORD_CONFIDENCE, False) + usage_kwargs: dict[Any, Any] = params.get(IKeys.USAGE_KWARGS, {}) + tags: list[str] | None = params.get(IKeys.TAGS) + execution_source: str = context.execution_source + + instance_ids = InstanceIdentifiers( + embedding_instance_id=embedding_instance_id, + vector_db_instance_id=vector_db_instance_id, + x2text_instance_id=x2text_instance_id, + tool_id=tool_id, + tags=tags, + llm_instance_id=None, + ) + file_info = FileInfo(file_path=file_path, file_hash=file_hash) + processing_options = ProcessingOptions( + reindex=reindex, + enable_highlight=enable_highlight, + enable_word_confidence=enable_word_confidence, + usage_kwargs=usage_kwargs, + ) + + shim = ExecutorToolShim( + platform_api_key=platform_api_key, + log_events_id=self._log_events_id, + component=self._log_component, + ) + fs_instance = FileUtils.get_fs_instance(execution_source=execution_source) + + logger.info( + "Starting indexing: chunk_size=%d chunk_overlap=%d " + "reindex=%s file=%s run_id=%s", + chunk_size, + chunk_overlap, + reindex, + Path(file_path).name, + context.run_id, + ) + shim.stream_log("Initializing indexing pipeline...") + + # Skip indexing when chunk_size is 0 — no vector operations needed. + # ChunkingConfig raises ValueError for 0, so handle before DTO. + if chunk_size == 0: + from unstract.sdk1.utils.indexing import IndexingUtils + + doc_id = IndexingUtils.generate_index_key( + vector_db=vector_db_instance_id, + embedding=embedding_instance_id, + x2text=x2text_instance_id, + chunk_size=str(chunk_size), + chunk_overlap=str(chunk_overlap), + tool=shim, + file_path=file_path, + file_hash=file_hash, + fs=fs_instance, + ) + logger.info("Skipping indexing for chunk_size=0. Doc ID: %s", doc_id) + return ExecutionResult(success=True, data={IKeys.DOC_ID: doc_id}) + + chunking_config = ChunkingConfig( + chunk_size=chunk_size, chunk_overlap=chunk_overlap + ) + shim.stream_log( + f"Configured chunking: size={chunk_size}, overlap={chunk_overlap}" + ) + + index_cls, embedding_compat, vector_db_cls = self._get_indexing_deps() + + vector_db = None + try: + index = index_cls( + tool=shim, + run_id=context.run_id, + capture_metrics=True, + instance_identifiers=instance_ids, + chunking_config=chunking_config, + processing_options=processing_options, + ) + doc_id = index.generate_index_key(file_info=file_info, fs=fs_instance) + logger.debug("Generated index key: doc_id=%s", doc_id) + shim.stream_log("Checking document index status...") + + embedding = embedding_compat( + adapter_instance_id=embedding_instance_id, + tool=shim, + kwargs={**usage_kwargs}, + ) + vector_db = vector_db_cls( + tool=shim, + adapter_instance_id=vector_db_instance_id, + embedding=embedding, + ) + shim.stream_log("Initialized embedding and vector DB adapters") + + doc_id_found = index.is_document_indexed( + doc_id=doc_id, embedding=embedding, vector_db=vector_db + ) + logger.info( + "Index status: doc_id=%s found=%s reindex=%s", + doc_id, + doc_id_found, + reindex, + ) + if doc_id_found and reindex: + shim.stream_log("Document already indexed, re-indexing...") + elif not doc_id_found: + shim.stream_log("Indexing document for the first time...") + shim.stream_log("Indexing document into vector store...") + index.perform_indexing( + vector_db=vector_db, + doc_id=doc_id, + extracted_text=extracted_text, + doc_id_found=doc_id_found, + ) + logger.info( + "Indexing completed: doc_id=%s file=%s", + doc_id, + Path(file_path).name, + ) + shim.stream_log("Document indexing completed") + return ExecutionResult(success=True, data={IKeys.DOC_ID: doc_id}) + except Exception as e: + logger.error( + "Indexing failed: file=%s error=%s", + Path(file_path).name, + str(e), + ) + status_code = getattr(e, "status_code", 500) + raise LegacyExecutorError( + message=f"Error while indexing: {e}", code=status_code + ) from e + finally: + if vector_db is not None: + vector_db.close() + + @staticmethod + def _get_prompt_deps(): + """Lazy-import heavy dependencies for answer_prompt processing. + + These imports trigger llama_index/protobuf loading so they must + not happen at module-collection time (tests). + """ + from executor.executors.answer_prompt import AnswerPromptService + from executor.executors.index import Index + from executor.executors.retrieval import RetrievalService + from executor.executors.variable_replacement import ( + VariableReplacementService, + ) + + from unstract.sdk1.embedding import EmbeddingCompat + from unstract.sdk1.llm import LLM + from unstract.sdk1.vector_db import VectorDB + + return ( + AnswerPromptService, + RetrievalService, + VariableReplacementService, + Index, + LLM, + EmbeddingCompat, + VectorDB, + ) + + @staticmethod + def _sanitize_dict_values(d: dict[str, Any]) -> None: + """Replace 'NA' string values with None inside a dict in-place.""" + for k, v in d.items(): + if isinstance(v, str) and v.lower() == "na": + d[k] = None + + @staticmethod + def _sanitize_null_values( + structured_output: dict[str, Any], + ) -> dict[str, Any]: + """Replace 'NA' strings with None in structured output.""" + for k, v in structured_output.items(): + if isinstance(v, str) and v.lower() == "na": + structured_output[k] = None + elif isinstance(v, list): + for i, item in enumerate(v): + if isinstance(item, str) and item.lower() == "na": + v[i] = None + elif isinstance(item, dict): + LegacyExecutor._sanitize_dict_values(item) + elif isinstance(v, dict): + LegacyExecutor._sanitize_dict_values(v) + return structured_output + + def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: + """Handle ``Operation.ANSWER_PROMPT`` — multi-prompt extraction. + + Migrated from ``prompt_processor()`` in the prompt-service + ``answer_prompt`` controller. Processes all prompts in the + payload: variable replacement, context retrieval, LLM + completion, and type-specific post-processing. + + Returns: + ExecutionResult with ``data`` containing:: + + {"output": dict, "metadata": dict, "metrics": dict} + """ + from executor.executors.constants import ( + PromptServiceConstants as PSKeys, + ) + + params: dict[str, Any] = context.executor_params + + # ---- Unpack payload ------------------------------------------------ + tool_settings = params.get(PSKeys.TOOL_SETTINGS, {}) + prompts = params.get(PSKeys.OUTPUTS, []) + tool_id: str = params.get(PSKeys.TOOL_ID, "") + run_id: str = context.run_id + file_path = params.get(PSKeys.FILE_PATH) + doc_name = str(params.get(PSKeys.FILE_NAME, "")) + execution_source = params.get(PSKeys.EXECUTION_SOURCE, context.execution_source) + platform_api_key: str = params.get(PSKeys.PLATFORM_SERVICE_API_KEY, "") + + structured_output: dict[str, Any] = {} + metadata: dict[str, Any] = { + PSKeys.RUN_ID: run_id, + PSKeys.FILE_NAME: doc_name, + PSKeys.CONTEXT: {}, + PSKeys.REQUIRED_FIELDS: {}, + } + metrics: dict[str, Any] = {} + variable_names: list[str] = [] + context_retrieval_metrics: dict[str, Any] = {} + + logger.info( + "Starting answer_prompt: tool_id=%s prompt_count=%d file=%s run_id=%s", + tool_id, + len(prompts), + doc_name, + run_id, + ) + + # Lazy imports + ( + answer_prompt_svc, + retrieval_svc, + variable_replacement_svc, + _index_cls, # unused — doc_id via IndexingUtils + llm_cls, + embedding_compat_cls, + vector_db_cls, + ) = self._get_prompt_deps() + + # ---- Initialize highlight plugin (if enabled + installed) ---------- + process_text_fn = None + enable_highlight = tool_settings.get(PSKeys.ENABLE_HIGHLIGHT, False) + enable_word_confidence = tool_settings.get(PSKeys.ENABLE_WORD_CONFIDENCE, False) + pipeline_shim = ExecutorToolShim( + platform_api_key=platform_api_key, + log_events_id=self._log_events_id, + component=self._log_component, + ) + if enable_highlight: + from executor.executors.plugins import ExecutorPluginLoader + + highlight_cls = ExecutorPluginLoader.get("highlight-data") + if highlight_cls: + from executor.executors.file_utils import FileUtils + + fs_instance = FileUtils.get_fs_instance(execution_source=execution_source) + highlight_instance = highlight_cls( + file_path=file_path, + fs_instance=fs_instance, + enable_word_confidence=enable_word_confidence, + ) + process_text_fn = highlight_instance.run + logger.info( + "Highlight plugin initialized for file=%s", + doc_name, + ) + pipeline_shim.stream_log("Highlight data plugin ready") + else: + logger.warning( + "Highlight is enabled but highlight-data plugin is not " + "installed. Coordinates will not be produced. Install " + "the plugin via: pip install -e " + ) + pipeline_shim.stream_log("Highlight data plugin not available") + + # ---- Merge tool_settings as defaults into each prompt output -------- + # Single-pass payloads carry adapter IDs and chunk config in + # tool_settings only (not per-prompt), while answer_prompt payloads + # carry them per-prompt. Merging tool_settings as a base ensures + # both paths work. + _ts_defaults = { + k: v + for k, v in tool_settings.items() + if k + in { + PSKeys.CHUNK_SIZE, + PSKeys.CHUNK_OVERLAP, + PSKeys.LLM, + PSKeys.VECTOR_DB, + PSKeys.EMBEDDING, + PSKeys.X2TEXT_ADAPTER, + PSKeys.RETRIEVAL_STRATEGY, + PSKeys.SIMILARITY_TOP_K, + } + } + if _ts_defaults: + prompts = [{**_ts_defaults, **p} for p in prompts] + + # ---- First pass: collect variable names + required fields ---------- + for output in prompts: + variable_names.append(output[PSKeys.NAME]) + metadata[PSKeys.REQUIRED_FIELDS][output[PSKeys.NAME]] = output.get( + PSKeys.REQUIRED, None + ) + + # ---- Process each prompt ------------------------------------------- + _deps = ( + answer_prompt_svc, + retrieval_svc, + variable_replacement_svc, + llm_cls, + embedding_compat_cls, + vector_db_cls, + ) + for output in prompts: + self._execute_single_prompt( + output=output, + context=context, + structured_output=structured_output, + metadata=metadata, + metrics=metrics, + variable_names=variable_names, + context_retrieval_metrics=context_retrieval_metrics, + deps=_deps, + tool_settings=tool_settings, + process_text_fn=process_text_fn, + ) + + pipeline_shim.stream_log(f"All {len(prompts)} prompts processed successfully") + logger.info( + "All prompts processed: tool_id=%s prompt_count=%d file=%s", + tool_id, + len(prompts), + doc_name, + ) + + # ---- Sanitize null values ------------------------------------------ + structured_output = self._sanitize_null_values(structured_output) + + return ExecutionResult( + success=True, + data={ + PSKeys.OUTPUT: structured_output, + PSKeys.METADATA: metadata, + PSKeys.METRICS: metrics, + }, + ) + + @staticmethod + def _convert_number_answer(answer: str, llm: Any, answer_prompt_svc: Any) -> Any: + """Run LLM number extraction and return float or None.""" + if answer.lower() == "na": + return None + prompt = ( + f"Extract the number from the following " + f"text:\n{answer}\n\nOutput just the number. " + f"If the number is expressed in millions " + f"or thousands, expand the number to its numeric value " + f"The number should be directly assignable " + f"to a numeric variable. " + f"It should not have any commas, " + f"percentages or other grouping " + f"characters. No explanation is required. " + f"If you cannot extract the number, output 0." + ) + raw = answer_prompt_svc.run_completion(llm=llm, prompt=prompt) + try: + return float(raw) + except Exception: + return None + + @staticmethod + def _convert_scalar_answer( + answer: str, llm: Any, answer_prompt_svc: Any, prompt: str + ) -> str | None: + """Run LLM extraction for a scalar (email/date) and return result or None.""" + if answer.lower() == "na": + return None + return answer_prompt_svc.run_completion(llm=llm, prompt=prompt) + + def _run_challenge_if_enabled( + self, + tool_settings: dict[str, Any], + output: dict[str, Any], + structured_output: dict[str, Any], + context_list: list[str], + llm: Any, + llm_cls: Any, + usage_kwargs: dict[str, Any], + run_id: str, + platform_api_key: str, + metadata: dict[str, Any], + shim: Any, + prompt_name: str, + ) -> None: + """Run challenge verification plugin if enabled and available.""" + from executor.executors.constants import PromptServiceConstants as PSKeys + from executor.executors.plugins import ExecutorPluginLoader + + if not tool_settings.get(PSKeys.ENABLE_CHALLENGE): + return + challenge_cls = ExecutorPluginLoader.get("challenge") + if not challenge_cls: + return + challenge_llm_id = tool_settings.get(PSKeys.CHALLENGE_LLM) + if not challenge_llm_id: + return + shim.stream_log(f"Running challenge for: {prompt_name}") + challenge_llm = llm_cls( + adapter_instance_id=challenge_llm_id, + tool=shim, + usage_kwargs={**usage_kwargs, PSKeys.LLM_USAGE_REASON: PSKeys.CHALLENGE}, + capture_metrics=True, + ) + challenger = challenge_cls( + llm=llm, + challenge_llm=challenge_llm, + context="\n".join(context_list), + tool_settings=tool_settings, + output=output, + structured_output=structured_output, + run_id=run_id, + platform_key=platform_api_key, + metadata=metadata, + ) + challenger.run() + shim.stream_log(f"Challenge verification completed for: {prompt_name}") + logger.info("Challenge completed: prompt=%s", prompt_name) + + @staticmethod + def _run_evaluation_if_enabled( + output: dict[str, Any], + context_list: list[str], + structured_output: dict[str, Any], + platform_api_key: str, + shim: Any, + prompt_name: str, + ) -> None: + """Run evaluation plugin if enabled and available.""" + from executor.executors.constants import PromptServiceConstants as PSKeys + from executor.executors.plugins import ExecutorPluginLoader + + eval_settings = output.get(PSKeys.EVAL_SETTINGS, {}) + if not eval_settings.get(PSKeys.EVAL_SETTINGS_EVALUATE): + return + evaluator_cls = ExecutorPluginLoader.get("evaluation") + if not evaluator_cls: + return + shim.stream_log(f"Running evaluation for: {prompt_name}") + evaluator = evaluator_cls( + query=output.get(PSKeys.COMBINED_PROMPT, ""), + context="\n".join(context_list), + response=structured_output.get(prompt_name), + reference_answer=output.get("reference_answer", ""), + prompt=output, + structured_output=structured_output, + platform_key=platform_api_key, + ) + evaluator.run() + logger.info("Evaluation completed: prompt=%s", prompt_name) + + def _execute_single_prompt( + self, + output: dict[str, Any], + context: ExecutionContext, + structured_output: dict[str, Any], + metadata: dict[str, Any], + metrics: dict[str, Any], + variable_names: list[str], + context_retrieval_metrics: dict[str, Any], + deps: tuple, + tool_settings: dict[str, Any], + process_text_fn: Any, + ) -> None: + """Execute one prompt: variable replacement, retrieval, LLM, post-process.""" + from executor.executors.constants import PromptServiceConstants as PSKeys + from executor.executors.constants import RetrievalStrategy + + from unstract.sdk1.utils.indexing import IndexingUtils + + ( + answer_prompt_svc, + retrieval_svc, + variable_replacement_svc, + llm_cls, + embedding_compat_cls, + vector_db_cls, + ) = deps + + params = context.executor_params + run_id = context.run_id + execution_id = params.get(PSKeys.EXECUTION_ID, "") + file_hash = params.get(PSKeys.FILE_HASH) + file_path = params.get(PSKeys.FILE_PATH) + doc_name = str(params.get(PSKeys.FILE_NAME, "")) + log_events_id = params.get(PSKeys.LOG_EVENTS_ID, "") + tool_id = params.get(PSKeys.TOOL_ID, "") + custom_data = params.get(PSKeys.CUSTOM_DATA, {}) + execution_source = params.get(PSKeys.EXECUTION_SOURCE, context.execution_source) + platform_api_key = params.get(PSKeys.PLATFORM_SERVICE_API_KEY, "") + + prompt_name = output[PSKeys.NAME] + prompt_text = output[PSKeys.PROMPT] + chunk_size = output[PSKeys.CHUNK_SIZE] + + logger.debug( + "Prompt config: name=%s chunk_size=%d type=%s", + prompt_name, + chunk_size, + output.get(PSKeys.TYPE, "TEXT"), + ) + + shim = ExecutorToolShim( + platform_api_key=platform_api_key, + log_events_id=self._log_events_id, + component={**self._log_component, "prompt_key": prompt_name}, + ) + shim.stream_log(f"Processing prompt: {prompt_name}") + + if variable_replacement_svc.is_variables_present(prompt_text=prompt_text): + prompt_text = variable_replacement_svc.replace_variables_in_prompt( + prompt=output, + structured_output=structured_output, + log_events_id=log_events_id, + tool_id=tool_id, + prompt_name=prompt_name, + doc_name=doc_name, + custom_data=custom_data, + is_ide=execution_source == "ide", + ) + shim.stream_log(f"Resolved template variables for: {prompt_name}") + + logger.info( + "Executing prompt: tool_id=%s name=%s run_id=%s", tool_id, prompt_name, run_id + ) + + output[PSKeys.PROMPTX] = answer_prompt_svc.extract_variable( + structured_output, variable_names, output, prompt_text + ) + + doc_id = IndexingUtils.generate_index_key( + vector_db=output[PSKeys.VECTOR_DB], + embedding=output[PSKeys.EMBEDDING], + x2text=output[PSKeys.X2TEXT_ADAPTER], + chunk_size=str(output[PSKeys.CHUNK_SIZE]), + chunk_overlap=str(output[PSKeys.CHUNK_OVERLAP]), + tool=shim, + file_hash=file_hash, + file_path=file_path, + ) + + if output.get(PSKeys.TYPE) in (PSKeys.TABLE, PSKeys.RECORD): + self._run_table_extraction( + output=output, + context=context, + structured_output=structured_output, + metrics=metrics, + run_id=run_id, + execution_id=execution_id, + execution_source=execution_source, + platform_api_key=platform_api_key, + tool_id=tool_id, + doc_name=doc_name, + prompt_name=prompt_name, + shim=shim, + ) + return + + if output.get(PSKeys.TYPE) == PSKeys.LINE_ITEM: + raise LegacyExecutorError(message="LINE_ITEM extraction is not supported.") + + usage_kwargs = {"run_id": run_id, "execution_id": execution_id} + try: + llm = llm_cls( + adapter_instance_id=output[PSKeys.LLM], + tool=shim, + usage_kwargs={**usage_kwargs, PSKeys.LLM_USAGE_REASON: PSKeys.EXTRACTION}, + capture_metrics=True, + ) + vector_db = None + if chunk_size > 0: + embedding = embedding_compat_cls( + adapter_instance_id=output[PSKeys.EMBEDDING], + tool=shim, + kwargs={**usage_kwargs}, + ) + vector_db = vector_db_cls( + tool=shim, + adapter_instance_id=output[PSKeys.VECTOR_DB], + embedding=embedding, + ) + shim.stream_log(f"Initialized LLM and retrieval adapters for: {prompt_name}") + except Exception as e: + msg = f"Couldn't fetch adapter. {e}" + logger.error(msg) + raise LegacyExecutorError( + message=msg, code=getattr(e, "status_code", None) or 500 + ) from e + + context_list: list[str] = [] + try: + answer = "NA" + retrieval_strategy = output.get(PSKeys.RETRIEVAL_STRATEGY) + valid_strategies = {s.value for s in RetrievalStrategy} + if retrieval_strategy in valid_strategies: + shim.stream_log(f"Retrieving context for: {prompt_name}") + logger.info( + "Performing retrieval: prompt=%s strategy=%s chunk_size=%d", + prompt_name, + retrieval_strategy, + chunk_size, + ) + if chunk_size == 0: + context_list = retrieval_svc.retrieve_complete_context( + execution_source=execution_source, + file_path=file_path, + context_retrieval_metrics=context_retrieval_metrics, + prompt_key=prompt_name, + ) + else: + context_list = retrieval_svc.run_retrieval( + output=output, + doc_id=doc_id, + llm=llm, + vector_db=vector_db, + retrieval_type=retrieval_strategy, + context_retrieval_metrics=context_retrieval_metrics, + ) + metadata[PSKeys.CONTEXT][prompt_name] = context_list + shim.stream_log( + f"Retrieved {len(context_list)} context chunks for: {prompt_name}" + ) + logger.debug( + "Retrieved %d context chunks for prompt: %s", + len(context_list), + prompt_name, + ) + shim.stream_log(f"Running LLM completion for: {prompt_name}") + answer = answer_prompt_svc.construct_and_run_prompt( + tool_settings=tool_settings, + output=output, + llm=llm, + context="\n".join(context_list), + prompt=PSKeys.PROMPTX, + metadata=metadata, + execution_source=execution_source, + file_path=file_path, + process_text=process_text_fn, + ) + else: + logger.warning( + "Skipping retrieval: invalid strategy=%s for prompt=%s", + retrieval_strategy, + prompt_name, + ) + + self._apply_type_conversion( + output=output, + answer=answer, + structured_output=structured_output, + llm=llm, + tool_settings=tool_settings, + metadata=metadata, + execution_source=execution_source, + file_path=file_path, + log_events_id=log_events_id, + tool_id=tool_id, + doc_name=doc_name, + ) + shim.stream_log(f"Applied type conversion for: {prompt_name}") + + self._run_challenge_if_enabled( + tool_settings=tool_settings, + output=output, + structured_output=structured_output, + context_list=context_list, + llm=llm, + llm_cls=llm_cls, + usage_kwargs=usage_kwargs, + run_id=run_id, + platform_api_key=platform_api_key, + metadata=metadata, + shim=shim, + prompt_name=prompt_name, + ) + self._run_evaluation_if_enabled( + output=output, + context_list=context_list, + structured_output=structured_output, + platform_api_key=platform_api_key, + shim=shim, + prompt_name=prompt_name, + ) + shim.stream_log(f"Completed prompt: {prompt_name}") + + val = structured_output.get(prompt_name) + if isinstance(val, str): + structured_output[prompt_name] = val.rstrip("\n") + finally: + metrics.setdefault(prompt_name, {}).update( + { + "context_retrieval": context_retrieval_metrics.get(prompt_name, {}), + f"{llm.get_usage_reason()}_llm": llm.get_metrics(), + } + ) + if vector_db: + vector_db.close() + + def _run_table_extraction( + self, + output: dict[str, Any], + context: ExecutionContext, + structured_output: dict[str, Any], + metrics: dict[str, Any], + run_id: str, + execution_id: str, + execution_source: str, + platform_api_key: str, + tool_id: str, + doc_name: str, + prompt_name: str, + shim: Any, + ) -> None: + """Delegate TABLE/RECORD prompt to the table executor plugin.""" + from executor.executors.constants import PromptServiceConstants as PSKeys + + try: + table_executor = ExecutorRegistry.get("table") + except KeyError: + raise LegacyExecutorError( + message=( + "TABLE extraction requires the table executor " + "plugin. Install the table_extractor plugin." + ) + ) + table_ctx = ExecutionContext( + executor_name="table", + operation="table_extract", + run_id=run_id, + execution_source=execution_source, + organization_id=context.organization_id, + request_id=context.request_id, + executor_params={ + "llm_adapter_instance_id": output.get(PSKeys.LLM, ""), + "table_settings": output.get(PSKeys.TABLE_SETTINGS, {}), + "prompt": output.get(PSKeys.PROMPT, ""), + "PLATFORM_SERVICE_API_KEY": platform_api_key, + "execution_id": execution_id, + "tool_id": tool_id, + "file_name": doc_name, + }, + ) + table_ctx._log_component = self._log_component + table_ctx.log_events_id = self._log_events_id + + shim.stream_log(f"Running table extraction for: {prompt_name}") + table_result = table_executor.execute(table_ctx) + + if table_result.success: + structured_output[prompt_name] = table_result.data.get("output", "") + table_metrics = table_result.data.get("metadata", {}).get("metrics", {}) + metrics.setdefault(prompt_name, {}).update( + {"table_extraction": table_metrics} + ) + shim.stream_log(f"Table extraction completed for: {prompt_name}") + logger.info("TABLE extraction completed: prompt=%s", prompt_name) + else: + structured_output[prompt_name] = "" + logger.error( + "TABLE extraction failed for prompt=%s: %s", + prompt_name, + table_result.error, + ) + shim.stream_log(f"Completed prompt: {prompt_name}") + + @staticmethod + def _apply_type_conversion( + output: dict[str, Any], + answer: str, + structured_output: dict[str, Any], + llm: Any, + tool_settings: dict[str, Any], + metadata: dict[str, Any], + execution_source: str, + file_path: str, + log_events_id: str = "", + tool_id: str = "", + doc_name: str = "", + ) -> None: + """Apply type-specific conversion to the LLM answer. + + Handles NUMBER, EMAIL, DATE, BOOLEAN, JSON, and TEXT types. + """ + from executor.executors.answer_prompt import ( + AnswerPromptService as answer_prompt_svc, + ) + from executor.executors.constants import PromptServiceConstants as PSKeys + + prompt_name = output[PSKeys.NAME] + output_type = output[PSKeys.TYPE] + + if output_type == PSKeys.NUMBER: + structured_output[prompt_name] = LegacyExecutor._convert_number_answer( + answer, llm, answer_prompt_svc + ) + + elif output_type == PSKeys.EMAIL: + email_prompt = ( + f"Extract the email from the following text:\n{answer}" + f"\n\nOutput just the email. " + f"The email should be directly assignable to a string " + f"variable. No explanation is required. If you cannot " + f'extract the email, output "NA".' + ) + structured_output[prompt_name] = LegacyExecutor._convert_scalar_answer( + answer, llm, answer_prompt_svc, email_prompt + ) + + elif output_type == PSKeys.DATE: + date_prompt = ( + f"Extract the date from the following text:\n{answer}" + f"\n\nOutput just the date. " + f"The date should be in ISO date time format. " + f"No explanation is required. The date should be " + f"directly assignable to a date variable. " + f"If you cannot convert the string into a date, " + f'output "NA".' + ) + structured_output[prompt_name] = LegacyExecutor._convert_scalar_answer( + answer, llm, answer_prompt_svc, date_prompt + ) + + elif output_type == PSKeys.BOOLEAN: + if answer.lower() == "na": + structured_output[prompt_name] = None + else: + bool_prompt = ( + f"Extract yes/no from the following text:\n{answer}\n\n" + f"Output in single word. " + f"If the context is trying to convey that the answer " + f'is true, then return "yes", else return "no".' + ) + raw = answer_prompt_svc.run_completion(llm=llm, prompt=bool_prompt) + structured_output[prompt_name] = raw.lower() == "yes" + + elif output_type == PSKeys.JSON: + answer_prompt_svc.handle_json( + answer=answer, + structured_output=structured_output, + output=output, + llm=llm, + enable_highlight=tool_settings.get(PSKeys.ENABLE_HIGHLIGHT, False), + enable_word_confidence=tool_settings.get( + PSKeys.ENABLE_WORD_CONFIDENCE, False + ), + execution_source=execution_source, + metadata=metadata, + file_path=file_path, + log_events_id=log_events_id, + tool_id=tool_id, + doc_name=doc_name, + ) + + else: + # TEXT or any other type — store raw answer + structured_output[prompt_name] = answer + + def _handle_single_pass_extraction( + self, context: ExecutionContext + ) -> ExecutionResult: + """Handle ``Operation.SINGLE_PASS_EXTRACTION``. + + Delegates to the cloud single_pass_extraction plugin if + available (reads file ONCE, builds ONE combined prompt, makes + ONE LLM call). Falls back to ``_handle_answer_prompt`` if the + plugin is not installed. + + Returns: + ExecutionResult with ``data`` containing:: + + {"output": dict, "metadata": dict, "metrics": dict} + """ + try: + from unstract.sdk1.execution.registry import ExecutorRegistry + + executor = ExecutorRegistry.get("single_pass_extraction") + logger.info( + "Delegating single_pass_extraction to cloud plugin (run_id=%s)", + context.run_id, + ) + return executor.execute(context) + except KeyError: + logger.info( + "No single_pass_extraction plugin; falling back to " + "answer_prompt (run_id=%s)", + context.run_id, + ) + return self._handle_answer_prompt(context) + + def _handle_summarize(self, context: ExecutionContext) -> ExecutionResult: + """Handle ``Operation.SUMMARIZE`` — document summarization. + + Called by the structure tool when ``summarize_as_source`` is + enabled. Takes the full extracted document text and a + user-provided summarize prompt, runs LLM completion, and + returns the summarized text. + + Expected ``executor_params`` keys: + - ``llm_adapter_instance_id`` — LLM adapter to use + - ``summarize_prompt`` — user's summarize instruction + - ``context`` — full document text to summarize + - ``prompt_keys`` — list of field names to focus on + - ``PLATFORM_SERVICE_API_KEY`` — auth key for adapters + + Returns: + ExecutionResult with ``data`` containing:: + + {"data": str} # summarized text + """ + from executor.executors.constants import PromptServiceConstants as PSKeys + + params: dict[str, Any] = context.executor_params + + llm_adapter_id: str = params.get("llm_adapter_instance_id", "") + summarize_prompt: str = params.get("summarize_prompt", "") + doc_context: str = params.get(PSKeys.CONTEXT, "") + prompt_keys: list[str] = params.get("prompt_keys", []) + platform_api_key: str = params.get(PSKeys.PLATFORM_SERVICE_API_KEY, "") + + if not llm_adapter_id: + return ExecutionResult.failure( + error="Missing required param: llm_adapter_instance_id" + ) + if not doc_context: + return ExecutionResult.failure(error="Missing required param: context") + + logger.info( + "Starting summarization: prompt_keys=%s run_id=%s", + prompt_keys, + context.run_id, + ) + + # Build the summarize prompt + prompt = f"{summarize_prompt}\n\n" + if prompt_keys: + prompt += f"Focus on these fields: {', '.join(prompt_keys)}\n\n" + prompt += ( + f"Context:\n---------------\n{doc_context}\n-----------------\n\nSummary:" + ) + + shim = ExecutorToolShim( + platform_api_key=platform_api_key, + log_events_id=self._log_events_id, + component=self._log_component, + ) + usage_kwargs = { + "run_id": context.run_id, + PSKeys.LLM_USAGE_REASON: PSKeys.SUMMARIZE, + } + + _, _, _, _, llm_cls, _, _ = self._get_prompt_deps() + + shim.stream_log("Initializing LLM for summarization...") + try: + llm = llm_cls( + adapter_instance_id=llm_adapter_id, + tool=shim, + usage_kwargs={**usage_kwargs}, + ) + from executor.executors.answer_prompt import ( + AnswerPromptService as answer_prompt_svc, + ) + + shim.stream_log("Running document summarization...") + summary = answer_prompt_svc.run_completion(llm=llm, prompt=prompt) + logger.info("Summarization completed: run_id=%s", context.run_id) + shim.stream_log("Summarization completed") + return ExecutionResult( + success=True, + data={"data": summary}, + ) + except Exception as e: + logger.error("Summarization failed: error=%s", str(e)) + status_code = getattr(e, "status_code", None) or 500 + raise LegacyExecutorError( + message=f"Error during summarization: {e}", + code=status_code, + ) from e diff --git a/workers/executor/executors/plugins/__init__.py b/workers/executor/executors/plugins/__init__.py new file mode 100644 index 0000000000..b730ff12b6 --- /dev/null +++ b/workers/executor/executors/plugins/__init__.py @@ -0,0 +1,3 @@ +from executor.executors.plugins.loader import ExecutorPluginLoader + +__all__ = ["ExecutorPluginLoader"] diff --git a/workers/executor/executors/plugins/loader.py b/workers/executor/executors/plugins/loader.py new file mode 100644 index 0000000000..3f2a54c92a --- /dev/null +++ b/workers/executor/executors/plugins/loader.py @@ -0,0 +1,81 @@ +"""Entry-point-based discovery for cloud plugins and executors. + +Two entry point groups are used: + +- ``unstract.executor.plugins`` + Utility plugins (highlight-data, challenge, evaluation). + Loaded lazily on first ``get()`` call and cached. + +- ``unstract.executor.executors`` + Executor classes that self-register via ``@ExecutorRegistry.register``. + Loaded eagerly at worker startup from ``executors/__init__.py``. +""" + +import logging + +logger = logging.getLogger(__name__) + + +class ExecutorPluginLoader: + """Discovers cloud plugins and executors via setuptools entry points.""" + + _plugins: dict[str, type] | None = None + + @classmethod + def get(cls, name: str) -> type | None: + """Get a plugin class by name. Returns None if not installed.""" + if cls._plugins is None: + cls._discover_plugins() + if cls._plugins is None: + return None + return cls._plugins.get(name) + + @classmethod + def discover_executors(cls) -> list[str]: + """Load cloud executor classes via entry points. + + Importing each entry point's class triggers + ``@ExecutorRegistry.register``. Called once at worker startup. + + Returns: + List of discovered executor entry point names. + """ + from importlib.metadata import entry_points + + discovered: list[str] = [] + eps = entry_points(group="unstract.executor.executors") + for ep in eps: + try: + ep.load() # import triggers @ExecutorRegistry.register + discovered.append(ep.name) + logger.info("Loaded cloud executor: %s", ep.name) + except Exception: + logger.warning( + "Failed to load cloud executor: %s", + ep.name, + exc_info=True, + ) + return discovered + + @classmethod + def _discover_plugins(cls) -> None: + """Discover utility plugins from entry points (lazy, first use).""" + from importlib.metadata import entry_points + + cls._plugins = {} + eps = entry_points(group="unstract.executor.plugins") + for ep in eps: + try: + cls._plugins[ep.name] = ep.load() + logger.info("Loaded executor plugin: %s", ep.name) + except Exception: + logger.warning( + "Failed to load executor plugin: %s", + ep.name, + exc_info=True, + ) + + @classmethod + def clear(cls) -> None: + """Reset cached state. Intended for tests only.""" + cls._plugins = None diff --git a/workers/executor/executors/plugins/protocols.py b/workers/executor/executors/plugins/protocols.py new file mode 100644 index 0000000000..fb4d676b37 --- /dev/null +++ b/workers/executor/executors/plugins/protocols.py @@ -0,0 +1,51 @@ +"""Protocol classes defining contracts for cloud executor plugins. + +Cloud plugins must satisfy these protocols. The OSS repo never imports +cloud code — only these protocols and ``ExecutorPluginLoader.get(name)`` +are used to interact with plugins. +""" + +from typing import Any, Protocol, runtime_checkable + + +@runtime_checkable +class HighlightDataProtocol(Protocol): + """Cross-cutting: source attribution from LLMWhisperer metadata. + + Matches the cloud ``HighlightData`` plugin constructor which + accepts ``enable_word_confidence`` (not ``execution_source``). + The filesystem instance is determined by the caller and passed in. + """ + + def __init__( + self, + file_path: str, + fs_instance: Any = None, + enable_word_confidence: bool = False, + **kwargs: Any, + ) -> None: ... + + def run( + self, + response: Any = None, + is_json: bool = False, + original_text: str = "", + **kwargs: Any, + ) -> dict: ... + + @staticmethod + def extract_word_confidence(original_text: str, is_json: bool = False) -> dict: ... + + +@runtime_checkable +class ChallengeProtocol(Protocol): + """Legacy executor: quality verification with a second LLM.""" + + def run(self) -> None: ... + + +@runtime_checkable +class EvaluationProtocol(Protocol): + """Legacy executor: prompt evaluation.""" + + def run(self, **kwargs: Any) -> dict: ... diff --git a/workers/executor/executors/plugins/text_processor.py b/workers/executor/executors/plugins/text_processor.py new file mode 100644 index 0000000000..472d9dc828 --- /dev/null +++ b/workers/executor/executors/plugins/text_processor.py @@ -0,0 +1,19 @@ +"""Pure-function text utilities used by the highlight-data plugin.""" + + +def add_hex_line_numbers(text: str) -> str: + """Add hex line numbers to extracted text for coordinate tracking. + + Each line is prefixed with ``0x: `` where ```` is the + zero-based line index. The hex width auto-adjusts to the total + number of lines. + + Args: + text: Multi-line string to number. + + Returns: + The same text with hex line-number prefixes. + """ + lines = text.split("\n") + hex_width = max(len(hex(len(lines))) - 2, 1) + return "\n".join(f"0x{i:0{hex_width}X}: {line}" for i, line in enumerate(lines)) diff --git a/workers/executor/executors/postprocessor.py b/workers/executor/executors/postprocessor.py new file mode 100644 index 0000000000..bf14a56698 --- /dev/null +++ b/workers/executor/executors/postprocessor.py @@ -0,0 +1,119 @@ +"""Webhook postprocessor for structured output. + +Copied from prompt-service/.../helpers/postprocessor.py — already Flask-free. +""" + +import json +import logging +from typing import Any + +import requests + +logger = logging.getLogger(__name__) + + +def _validate_structured_output(data: Any) -> bool: + """Validate that structured output is a dict or list.""" + return isinstance(data, (dict, list)) + + +def _validate_highlight_data(updated_data: Any, original_data: Any) -> Any: + """Validate highlight data and return appropriate value.""" + if ( + updated_data is not None + and updated_data != original_data + and not isinstance(updated_data, list) + ): + logger.warning( + "Ignoring webhook highlight_data due to invalid type (expected list)" + ) + return original_data + return updated_data + + +def _process_successful_response( + response_data: dict, parsed_data: dict, highlight_data: list | None +) -> tuple[dict[str, Any], list | None]: + """Process successful webhook response.""" + if "structured_output" not in response_data: + logger.warning("Response missing 'structured_output' key") + return parsed_data, highlight_data + + updated_parsed_data = response_data["structured_output"] + + if not _validate_structured_output(updated_parsed_data): + logger.warning("Ignoring postprocessing due to invalid structured_output type") + return parsed_data, highlight_data + + updated_highlight_data = response_data.get("highlight_data", highlight_data) + updated_highlight_data = _validate_highlight_data( + updated_highlight_data, highlight_data + ) + + return updated_parsed_data, updated_highlight_data + + +def _make_webhook_request( + webhook_url: str, payload: dict, timeout: float +) -> tuple[dict[str, Any], list | None] | None: + """Make webhook request and return processed response or None on failure.""" + try: + response = requests.post( + webhook_url, + json=payload, + timeout=timeout, + headers={"Content-Type": "application/json"}, + allow_redirects=False, # Prevent redirect-based SSRF + ) + + if response.status_code != 200: + logger.warning( + f"Postprocessing server returned status code: {response.status_code}" + ) + return None + + return response.json() + + except json.JSONDecodeError as e: + logger.warning(f"Invalid JSON response from postprocessing server: {e}") + except requests.exceptions.Timeout: + logger.warning(f"Postprocessing server request timed out after {timeout}s") + except requests.exceptions.RequestException as e: + logger.warning(f"Postprocessing server request failed: {e}") + except Exception as e: + logger.warning(f"Unexpected error during postprocessing: {e}") + + return None + + +def postprocess_data( + parsed_data: dict[str, Any], + webhook_enabled: bool = False, + webhook_url: str | None = None, + timeout: float = 2.0, + highlight_data: list | None = None, +) -> tuple[dict[str, Any], list | None]: + """Post-process parsed data by sending it to an external server. + + Args: + parsed_data: The parsed data to be post-processed + webhook_enabled: Whether webhook postprocessing is enabled + webhook_url: URL endpoint for the webhook + timeout: Request timeout in seconds (default: 2.0) + highlight_data: Highlight data from metadata to send to webhook + + Returns: + tuple: (postprocessed_data, updated_highlight_data) + """ + if not webhook_enabled or not webhook_url: + return parsed_data, highlight_data + + payload = {"structured_output": parsed_data} + if highlight_data is not None: + payload["highlight_data"] = highlight_data + + response_data = _make_webhook_request(webhook_url, payload, timeout) + if response_data is None: + return parsed_data, highlight_data + + return _process_successful_response(response_data, parsed_data, highlight_data) diff --git a/workers/executor/executors/retrieval.py b/workers/executor/executors/retrieval.py new file mode 100644 index 0000000000..3b4cd1da0a --- /dev/null +++ b/workers/executor/executors/retrieval.py @@ -0,0 +1,113 @@ +"""Retrieval service — factory for retriever strategies. + +Lazy-imports retriever classes to avoid llama_index/protobuf conflicts +at test-collection time. Same pattern as _get_indexing_deps() in Phase 2C. +""" + +import datetime +import logging +from typing import Any + +from executor.executors.constants import RetrievalStrategy + +logger = logging.getLogger(__name__) + + +class RetrievalService: + @staticmethod + def _get_retriever_map() -> dict: + """Lazy-import all retriever classes. + + Returns dict mapping strategy string to class. + Wrapped in a method so tests can mock it. + """ + from executor.executors.retrievers.automerging import AutomergingRetriever + from executor.executors.retrievers.fusion import FusionRetriever + from executor.executors.retrievers.keyword_table import KeywordTableRetriever + from executor.executors.retrievers.recursive import RecursiveRetrieval + from executor.executors.retrievers.router import RouterRetriever + from executor.executors.retrievers.simple import SimpleRetriever + from executor.executors.retrievers.subquestion import SubquestionRetriever + + return { + RetrievalStrategy.SIMPLE.value: SimpleRetriever, + RetrievalStrategy.SUBQUESTION.value: SubquestionRetriever, + RetrievalStrategy.FUSION.value: FusionRetriever, + RetrievalStrategy.RECURSIVE.value: RecursiveRetrieval, + RetrievalStrategy.ROUTER.value: RouterRetriever, + RetrievalStrategy.KEYWORD_TABLE.value: KeywordTableRetriever, + RetrievalStrategy.AUTOMERGING.value: AutomergingRetriever, + } + + @staticmethod + def run_retrieval( + output: dict[str, Any], + doc_id: str, + llm: Any, + vector_db: Any, + retrieval_type: str, + context_retrieval_metrics: dict[str, Any] | None = None, + ) -> list[str]: + """Factory: instantiate and execute the retriever for the given strategy.""" + from executor.executors.constants import PromptServiceConstants as PSKeys + + prompt = output[PSKeys.PROMPTX] + top_k = output[PSKeys.SIMILARITY_TOP_K] + prompt_key = output.get(PSKeys.NAME, "") + start = datetime.datetime.now() + + retriever_map = RetrievalService._get_retriever_map() + retriever_class = retriever_map.get(retrieval_type) + if not retriever_class: + raise ValueError(f"Unknown retrieval type: {retrieval_type}") + + retriever = retriever_class( + vector_db=vector_db, + doc_id=doc_id, + prompt=prompt, + top_k=top_k, + llm=llm, + ) + context = retriever.retrieve() + + elapsed = (datetime.datetime.now() - start).total_seconds() + if context_retrieval_metrics is not None: + context_retrieval_metrics[prompt_key] = {"time_taken(s)": elapsed} + + logger.info( + "[Retrieval] prompt='%s' doc_id=%s strategy='%s' top_k=%d " + "chunks=%d time=%.3fs", + prompt_key, + doc_id, + retrieval_type, + top_k, + len(context), + elapsed, + ) + return list(context) + + @staticmethod + def retrieve_complete_context( + execution_source: str, + file_path: str, + context_retrieval_metrics: dict[str, Any] | None = None, + prompt_key: str = "", + ) -> list[str]: + """Load full file content for chunk_size=0 retrieval.""" + from executor.executors.file_utils import FileUtils + + fs = FileUtils.get_fs_instance(execution_source=execution_source) + start = datetime.datetime.now() + content = fs.read(path=file_path, mode="r") + elapsed = (datetime.datetime.now() - start).total_seconds() + + if context_retrieval_metrics is not None: + context_retrieval_metrics[prompt_key] = {"time_taken(s)": elapsed} + + logger.info( + "[Retrieval] prompt='%s' complete_context chars=%d time=%.3fs", + prompt_key, + len(content), + elapsed, + ) + return [content] diff --git a/workers/executor/executors/retrievers/__init__.py b/workers/executor/executors/retrievers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/workers/executor/executors/retrievers/automerging.py b/workers/executor/executors/retrievers/automerging.py new file mode 100644 index 0000000000..7df911f44f --- /dev/null +++ b/workers/executor/executors/retrievers/automerging.py @@ -0,0 +1,101 @@ +import logging + +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever +from llama_index.core import VectorStoreIndex +from llama_index.core.retrievers import AutoMergingRetriever as LlamaAutoMergingRetriever +from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters + +logger = logging.getLogger(__name__) + + +class AutomergingRetriever(BaseRetriever): + """Automerging retrieval using LlamaIndex's native AutoMergingRetriever. + + This retriever merges smaller chunks into larger ones when the smaller chunks + don't contain enough information, providing better context for answers. + """ + + def retrieve(self) -> set[str]: + """Retrieve text chunks using LlamaIndex's native AutoMergingRetriever. + + Returns: + set[str]: A set of text chunks retrieved from the database. + """ + try: + logger.info( + f"Retrieving chunks for {self.doc_id} using LlamaIndex AutoMergingRetriever." + ) + + # Get the vector store index + vector_store_index: VectorStoreIndex = self.vector_db.get_vector_store_index() + + # Create base vector retriever with metadata filters + base_retriever = vector_store_index.as_retriever( + similarity_top_k=self.top_k, + filters=MetadataFilters( + filters=[ + ExactMatchFilter(key="doc_id", value=self.doc_id), + ], + ), + ) + + # Try to use native AutoMergingRetriever + try: + # Create AutoMergingRetriever with the base retriever + auto_merging_retriever = LlamaAutoMergingRetriever( + base_retriever, + storage_context=self.vector_db.get_storage_context() + if hasattr(self.vector_db, "get_storage_context") + else None, + verbose=False, + ) + + # Retrieve nodes using auto-merging + nodes = auto_merging_retriever.retrieve(self.prompt) + + except Exception as e: + logger.error( + "AutoMergingRetriever failed: %s: %s", + type(e).__name__, + e, + exc_info=True, + ) + raise RetrievalError( + f"AutoMergingRetriever failed: {type(e).__name__}: {e}" + ) from e + + # Extract unique text chunks + chunks: set[str] = set() + for node in nodes: + if node.score > 0: + chunks.add(node.get_content()) + else: + logger.info( + f"Node score is less than 0. " + f"Ignored: {node.node_id} with score {node.score}" + ) + + logger.info( + f"Successfully retrieved {len(chunks)} chunks using AutoMergingRetriever." + ) + return chunks + + except (ValueError, AttributeError, KeyError, ImportError) as e: + logger.error( + "Error during auto-merging retrieval for %s: %s: %s", + self.doc_id, + type(e).__name__, + e, + exc_info=True, + ) + raise RetrievalError(f"{type(e).__name__}: {e}") from e + except Exception as e: + logger.error( + "Unexpected error during auto-merging retrieval for %s: %s: %s", + self.doc_id, + type(e).__name__, + e, + exc_info=True, + ) + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/base_retriever.py b/workers/executor/executors/retrievers/base_retriever.py new file mode 100644 index 0000000000..7c3094ef56 --- /dev/null +++ b/workers/executor/executors/retrievers/base_retriever.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from unstract.sdk1.llm import LLM + from unstract.sdk1.vector_db import VectorDB + +from executor.executors.retrievers.retriever_llm import RetrieverLLM + + +class BaseRetriever: + def __init__( + self, + vector_db: VectorDB, + prompt: str, + doc_id: str, + top_k: int, + llm: LLM | None = None, + ): + """Initialize the Retrieval class. + + Args: + vector_db (VectorDB): The vector database instance. + prompt (str): The query prompt. + doc_id (str): Document identifier for query context. + top_k (int): Number of top results to retrieve. + """ + self.vector_db = vector_db + self.prompt = prompt + self.doc_id = doc_id + self.top_k = top_k + self._llm: LLM | None = llm + self._retriever_llm: RetrieverLLM | None = None + + @property + def llm(self) -> RetrieverLLM | None: + """Return a llama-index compatible LLM, lazily created on first access. + + Avoids the cost of RetrieverLLM construction for retrievers that + never use the LLM (Simple, Automerging, Recursive). + """ + if self._llm is None: + return None + if self._retriever_llm is None: + self._retriever_llm = RetrieverLLM(llm=self._llm) + return self._retriever_llm + + def require_llm(self) -> RetrieverLLM: + """Return the llama-index LLM or raise if not configured. + + Call this in retrievers that need an LLM (KeywordTable, Fusion, + Subquestion) to fail early with a clear message instead of + letting llama-index silently fall back to its default OpenAI LLM. + """ + llm = self.llm + if llm is None: + raise ValueError( + f"{type(self).__name__} requires an LLM. " + "Pass llm= when constructing the retriever." + ) + return llm + + @staticmethod + def retrieve() -> set[str]: + return set() diff --git a/workers/executor/executors/retrievers/fusion.py b/workers/executor/executors/retrievers/fusion.py new file mode 100644 index 0000000000..7449ddb8e6 --- /dev/null +++ b/workers/executor/executors/retrievers/fusion.py @@ -0,0 +1,104 @@ +import logging + +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever +from llama_index.core import VectorStoreIndex +from llama_index.core.retrievers import QueryFusionRetriever +from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters + +logger = logging.getLogger(__name__) + + +class FusionRetriever(BaseRetriever): + """Fusion retrieval class using LlamaIndex's native QueryFusionRetriever. + + This technique generates multiple query variations and combines results + using reciprocal rank fusion for improved relevance. + """ + + def retrieve(self) -> set[str]: + """Retrieve text chunks using LlamaIndex's QueryFusionRetriever. + + Returns: + set[str]: A set of text chunks retrieved from the database. + """ + try: + llm = self.require_llm() + logger.info( + f"Retrieving chunks for {self.doc_id} using LlamaIndex QueryFusionRetriever." + ) + + # Get the vector store index + vector_store_index: VectorStoreIndex = self.vector_db.get_vector_store_index() + + # Create multiple retrievers with different parameters for true fusion + filters = MetadataFilters( + filters=[ + ExactMatchFilter(key="doc_id", value=self.doc_id), + ], + ) + + # Retriever 1: Standard similarity search + retriever_1 = vector_store_index.as_retriever( + similarity_top_k=self.top_k, + filters=filters, + ) + + # Retriever 2: Broader search with more candidates + retriever_2 = vector_store_index.as_retriever( + similarity_top_k=self.top_k * 2, + filters=filters, + ) + + # Retriever 3: Focused search with fewer candidates + retriever_3 = vector_store_index.as_retriever( + similarity_top_k=max(1, self.top_k // 2), + filters=filters, + ) + + # Create LlamaIndex QueryFusionRetriever with multiple retrievers + fusion_retriever = QueryFusionRetriever( + [retriever_1, retriever_2, retriever_3], # Multiple retrievers for fusion + similarity_top_k=self.top_k, + num_queries=4, # Generate multiple query variations + mode="simple", # Use simple fusion mode (reciprocal rank fusion) + use_async=False, + verbose=True, + llm=llm, + ) + + # Retrieve nodes using fusion technique + nodes = fusion_retriever.retrieve(self.prompt) + + # Extract unique text chunks + chunks: set[str] = set() + for node in nodes: + if node.score > 0: + chunks.add(node.get_content()) + else: + logger.info( + f"Node score is less than 0. " + f"Ignored: {node.node_id} with score {node.score}" + ) + + logger.info(f"Successfully retrieved {len(chunks)} chunks using fusion.") + return chunks + + except (ValueError, AttributeError, KeyError, ImportError) as e: + logger.error( + "Error during fusion retrieval for %s: %s: %s", + self.doc_id, + type(e).__name__, + e, + exc_info=True, + ) + raise RetrievalError(f"{type(e).__name__}: {e}") from e + except Exception as e: + logger.error( + "Unexpected error during fusion retrieval for %s: %s: %s", + self.doc_id, + type(e).__name__, + e, + exc_info=True, + ) + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/keyword_table.py b/workers/executor/executors/retrievers/keyword_table.py new file mode 100644 index 0000000000..912b2fb7f8 --- /dev/null +++ b/workers/executor/executors/retrievers/keyword_table.py @@ -0,0 +1,89 @@ +import logging + +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever +from llama_index.core import VectorStoreIndex +from llama_index.core.indices.keyword_table import KeywordTableIndex +from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters + +logger = logging.getLogger(__name__) + + +class KeywordTableRetriever(BaseRetriever): + """Keyword table retrieval using LlamaIndex's native KeywordTableIndex.""" + + def retrieve(self) -> set[str]: + """Retrieve text chunks using LlamaIndex's native KeywordTableIndex. + + Returns: + set[str]: A set of text chunks retrieved from the database. + """ + try: + llm = self.require_llm() + logger.info( + f"Retrieving chunks for {self.doc_id} using LlamaIndex KeywordTableIndex." + ) + + # Get documents from vector index for keyword indexing + vector_store_index: VectorStoreIndex = self.vector_db.get_vector_store_index() + + # Get all nodes for the document + all_retriever = vector_store_index.as_retriever( + similarity_top_k=1000, # Get all nodes + filters=MetadataFilters( + filters=[ + ExactMatchFilter(key="doc_id", value=self.doc_id), + ], + ), + ) + + # Retrieve all nodes to build keyword index + all_nodes = all_retriever.retrieve(" ") + + if not all_nodes: + logger.warning(f"No nodes found for doc_id: {self.doc_id}") + return set() + + # Create KeywordTableIndex from nodes using our provided LLM + keyword_index = KeywordTableIndex( + nodes=[node.node for node in all_nodes], + show_progress=True, + llm=llm, + ) + + # Create retriever from keyword index + keyword_retriever = keyword_index.as_retriever( + similarity_top_k=self.top_k, + ) + + # Retrieve nodes using keyword matching + nodes = keyword_retriever.retrieve(self.prompt) + + # Extract unique text chunks + chunks: set[str] = set() + for node in nodes: + chunks.add(node.get_content()) + + logger.info( + f"Successfully retrieved {len(chunks)} chunks using KeywordTableIndex." + ) + return chunks + + except (ValueError, AttributeError, KeyError, ImportError) as e: + logger.error( + "Error during keyword retrieval for %s: %s: %s", + self.doc_id, + type(e).__name__, + e, + exc_info=True, + ) + raise RetrievalError(f"{type(e).__name__}: {e}") from e + except Exception as e: + logger.error( + "Unexpected error during keyword retrieval for %s: %s: %s", + self.doc_id, + type(e).__name__, + e, + exc_info=True, + ) + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/recursive.py b/workers/executor/executors/retrievers/recursive.py new file mode 100644 index 0000000000..0ad09a6b78 --- /dev/null +++ b/workers/executor/executors/retrievers/recursive.py @@ -0,0 +1,86 @@ +import logging + +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever +from llama_index.core import VectorStoreIndex +from llama_index.core.retrievers import RecursiveRetriever +from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters + +logger = logging.getLogger(__name__) + + +class RecursiveRetrieval(BaseRetriever): + """Recursive retrieval using LlamaIndex's native RecursiveRetriever. + + This retriever performs recursive retrieval by breaking down queries + and refining results through multiple retrieval steps. + """ + + def retrieve(self) -> set[str]: + """Retrieve text chunks using LlamaIndex's native RecursiveRetriever. + + Returns: + set[str]: A set of text chunks retrieved from the database. + """ + try: + logger.info( + f"Retrieving chunks for {self.doc_id} using LlamaIndex RecursiveRetriever." + ) + + # Get the vector store index + vector_store_index: VectorStoreIndex = self.vector_db.get_vector_store_index() + + # Create base retriever with metadata filters + base_retriever = vector_store_index.as_retriever( + similarity_top_k=self.top_k, + filters=MetadataFilters( + filters=[ + ExactMatchFilter(key="doc_id", value=self.doc_id), + ], + ), + ) + + # Create RecursiveRetriever + recursive_retriever = RecursiveRetriever( + "vector", # root retriever key + retriever_dict={"vector": base_retriever}, + verbose=True, + ) + + # Retrieve nodes using RecursiveRetriever + nodes = recursive_retriever.retrieve(self.prompt) + + # Extract unique text chunks + chunks: set[str] = set() + for node in nodes: + if node.score > 0: + chunks.add(node.get_content()) + else: + logger.info( + f"Node score is less than 0. " + f"Ignored: {node.node_id} with score {node.score}" + ) + + logger.info( + f"Successfully retrieved {len(chunks)} chunks using RecursiveRetriever." + ) + return chunks + + except (ValueError, AttributeError, KeyError, ImportError) as e: + logger.error( + "Error during recursive retrieval for %s: %s: %s", + self.doc_id, + type(e).__name__, + e, + exc_info=True, + ) + raise RetrievalError(f"{type(e).__name__}: {e}") from e + except Exception as e: + logger.error( + "Unexpected error during recursive retrieval for %s: %s: %s", + self.doc_id, + type(e).__name__, + e, + exc_info=True, + ) + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/retriever_llm.py b/workers/executor/executors/retrievers/retriever_llm.py new file mode 100644 index 0000000000..c2038f9181 --- /dev/null +++ b/workers/executor/executors/retrievers/retriever_llm.py @@ -0,0 +1,126 @@ +from collections.abc import Sequence +from typing import Any + +from llama_index.core.base.llms.types import ( + ChatMessage, + ChatResponse, + ChatResponseAsyncGen, + ChatResponseGen, + CompletionResponse, + CompletionResponseAsyncGen, + CompletionResponseGen, + LLMMetadata, + MessageRole, +) +from llama_index.core.llms.llm import LLM as LlamaIndexBaseLLM # noqa: N811 +from pydantic import PrivateAttr + +from unstract.sdk1.llm import LLM, LLMCompat + + +class RetrieverLLM(LlamaIndexBaseLLM): + """Bridges SDK1's LLMCompat with llama-index's LLM for retriever use. + + Llama-index's ``resolve_llm()`` asserts ``isinstance(llm, LLM)`` + where ``LLM`` is ``llama_index.core.llms.llm.LLM``. Since SDK1's + ``LLMCompat`` is a plain class without llama-index inheritance, + it fails this check. + + ``RetrieverLLM`` inherits from llama-index's ``LLM`` base class + (passing the isinstance check) and delegates all LLM calls to an + internal ``LLMCompat`` instance. + """ + + _compat: LLMCompat = PrivateAttr() + + def __init__(self, llm: LLM, **kwargs: Any) -> None: # noqa: ANN401 + """Initialize with an SDK1 LLM instance.""" + super().__init__(**kwargs) + self._compat = LLMCompat.from_llm(llm) + + @property + def metadata(self) -> LLMMetadata: + return LLMMetadata( + is_chat_model=True, + model_name=self._compat.get_model_name(), + ) + + # ── Sync ───────────────────────────────────────────────────────────────── + + def chat( + self, + messages: Sequence[ChatMessage], + **kwargs: Any, # noqa: ANN401 + ) -> ChatResponse: + result = self._compat.chat(messages, **kwargs) + return ChatResponse( + message=ChatMessage( + role=MessageRole.ASSISTANT, + content=result.message.content, + ), + raw=result.raw, + ) + + def complete( + self, + prompt: str, + formatted: bool = False, + **kwargs: Any, # noqa: ANN401 + ) -> CompletionResponse: + result = self._compat.complete(prompt, formatted=formatted, **kwargs) + return CompletionResponse(text=result.text, raw=result.raw) + + def stream_chat( + self, + messages: Sequence[ChatMessage], + **kwargs: Any, # noqa: ANN401 + ) -> ChatResponseGen: + raise NotImplementedError("stream_chat is not supported.") + + def stream_complete( + self, + prompt: str, + formatted: bool = False, + **kwargs: Any, # noqa: ANN401 + ) -> CompletionResponseGen: + raise NotImplementedError("stream_complete is not supported.") + + # ── Async ──────────────────────────────────────────────────────────────── + + async def achat( + self, + messages: Sequence[ChatMessage], + **kwargs: Any, # noqa: ANN401 + ) -> ChatResponse: + result = await self._compat.achat(messages, **kwargs) + return ChatResponse( + message=ChatMessage( + role=MessageRole.ASSISTANT, + content=result.message.content, + ), + raw=result.raw, + ) + + async def acomplete( + self, + prompt: str, + formatted: bool = False, + **kwargs: Any, # noqa: ANN401 + ) -> CompletionResponse: + result = await self._compat.acomplete(prompt, formatted=formatted, **kwargs) + return CompletionResponse(text=result.text, raw=result.raw) + + async def astream_chat( + self, + messages: Sequence[ChatMessage], + **kwargs: Any, # noqa: ANN401 + ) -> ChatResponseAsyncGen: + raise NotImplementedError("astream_chat is not supported.") + + async def astream_complete( + self, + prompt: str, + formatted: bool = False, + **kwargs: Any, # noqa: ANN401 + ) -> CompletionResponseAsyncGen: + raise NotImplementedError("astream_complete is not supported.") diff --git a/workers/executor/executors/retrievers/router.py b/workers/executor/executors/retrievers/router.py new file mode 100644 index 0000000000..f645350c30 --- /dev/null +++ b/workers/executor/executors/retrievers/router.py @@ -0,0 +1,173 @@ +import logging + +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever +from llama_index.core import VectorStoreIndex +from llama_index.core.query_engine import RouterQueryEngine +from llama_index.core.selectors import LLMSingleSelector +from llama_index.core.tools import QueryEngineTool, ToolMetadata +from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters + +logger = logging.getLogger(__name__) + + +class RouterRetriever(BaseRetriever): + """Router retrieval class using LlamaIndex's native RouterQueryEngine. + + This technique intelligently routes queries to different retrieval strategies + based on query analysis. + """ + + def _create_metadata_filters(self): + """Create metadata filters for doc_id.""" + return MetadataFilters( + filters=[ + ExactMatchFilter(key="doc_id", value=self.doc_id), + ], + ) + + def _create_base_query_engine(self, vector_store_index, filters, llm): + """Create the base vector query engine.""" + return vector_store_index.as_query_engine( + similarity_top_k=self.top_k, + filters=filters, + llm=llm, + ) + + def _add_keyword_search_tool( + self, query_engine_tools, vector_store_index, filters, llm + ): + """Add keyword search tool to query engine tools list.""" + try: + keyword_query_engine = vector_store_index.as_query_engine( + similarity_top_k=self.top_k * 2, + filters=filters, + llm=llm, + ) + query_engine_tools.append( + QueryEngineTool( + query_engine=keyword_query_engine, + metadata=ToolMetadata( + name="keyword_search", + description=( + "Best for finding specific terms, names, numbers, dates, " + "or exact phrases. Use when looking for precise matches." + ), + ), + ) + ) + except Exception as e: + logger.debug(f"Could not create keyword search engine: {e}") + + def _add_broad_search_tool( + self, query_engine_tools, vector_store_index, filters, llm + ): + """Add broad search tool to query engine tools list.""" + try: + broad_query_engine = vector_store_index.as_query_engine( + similarity_top_k=self.top_k * 3, + filters=filters, + llm=llm, + ) + query_engine_tools.append( + QueryEngineTool( + query_engine=broad_query_engine, + metadata=ToolMetadata( + name="broad_search", + description=( + "Useful for general questions, exploratory queries, " + "or when you need comprehensive information on a topic." + ), + ), + ) + ) + except Exception as e: + logger.debug(f"Could not create broad search engine: {e}") + + def _extract_chunks_from_response(self, response): + """Extract chunks from router query response.""" + chunks: set[str] = set() + if hasattr(response, "source_nodes"): + for node in response.source_nodes: + if node.score > 0: + chunks.add(node.get_content()) + else: + logger.info( + f"Node score is less than 0. " + f"Ignored: {node.node_id} with score {node.score}" + ) + return chunks + + def retrieve(self) -> set[str]: + """Retrieve text chunks using LlamaIndex's RouterQueryEngine. + + Returns: + set[str]: A set of text chunks retrieved from the database. + """ + try: + logger.info( + f"Retrieving chunks for {self.doc_id} using LlamaIndex RouterQueryEngine." + ) + + llm = self.require_llm() + + vector_store_index: VectorStoreIndex = self.vector_db.get_vector_store_index() + filters = self._create_metadata_filters() + vector_query_engine = self._create_base_query_engine( + vector_store_index, filters, llm + ) + + # Create base query engine tools + query_engine_tools = [ + QueryEngineTool( + query_engine=vector_query_engine, + metadata=ToolMetadata( + name="vector_search", + description=( + "Useful for semantic similarity search, conceptual questions, " + "and finding information based on meaning and context." + ), + ), + ), + ] + + # Add additional search strategies + self._add_keyword_search_tool( + query_engine_tools, vector_store_index, filters, llm + ) + self._add_broad_search_tool( + query_engine_tools, vector_store_index, filters, llm + ) + + # Create and execute router query + router_query_engine = RouterQueryEngine.from_defaults( + selector=LLMSingleSelector.from_defaults(llm=llm), + query_engine_tools=query_engine_tools, + verbose=True, + llm=llm, + ) + + response = router_query_engine.query(self.prompt) + chunks = self._extract_chunks_from_response(response) + + logger.info(f"Successfully retrieved {len(chunks)} chunks using router.") + return chunks + + except (ValueError, AttributeError, KeyError, ImportError) as e: + logger.error( + "Error during router retrieval for %s: %s: %s", + self.doc_id, + type(e).__name__, + e, + exc_info=True, + ) + raise RetrievalError(f"{type(e).__name__}: {e}") from e + except Exception as e: + logger.error( + "Unexpected error during router retrieval for %s: %s: %s", + self.doc_id, + type(e).__name__, + e, + exc_info=True, + ) + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/simple.py b/workers/executor/executors/retrievers/simple.py new file mode 100644 index 0000000000..5e533e72b3 --- /dev/null +++ b/workers/executor/executors/retrievers/simple.py @@ -0,0 +1,52 @@ +import logging +import time + +from executor.executors.retrievers.base_retriever import BaseRetriever +from llama_index.core import VectorStoreIndex +from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters + +logger = logging.getLogger(__name__) + + +class SimpleRetriever(BaseRetriever): + def retrieve(self) -> set[str]: + context = self._simple_retrieval() + if not context: + # UN-1288 For Pinecone, we are seeing an inconsistent case where + # query with doc_id fails even though indexing just happened. + # This causes the following retrieve to return no text. + # To rule out any lag on the Pinecone vector DB write, + # the following sleep is added + # Note: This will not fix the issue. Since this issue is inconsistent + # and not reproducible easily, this is just a safety net. + logger.info( + f"[doc_id: {self.doc_id}] Could not retrieve context, " + "retrying after 2 secs to handle issues due to lag" + ) + time.sleep(2) + context = self._simple_retrieval() + return context + + def _simple_retrieval(self): + vector_query_engine: VectorStoreIndex = self.vector_db.get_vector_store_index() + retriever = vector_query_engine.as_retriever( + similarity_top_k=self.top_k, + filters=MetadataFilters( + filters=[ + ExactMatchFilter(key="doc_id", value=self.doc_id), + ], + ), + ) + nodes = retriever.retrieve(self.prompt) + context: set[str] = set() + for node in nodes: + # May have to fine-tune this value for node score or keep it + # configurable at the adapter level + if node.score > 0: + context.add(node.get_content()) + else: + logger.info( + "Node score is less than 0. " + f"Ignored: {node.node_id} with score {node.score}" + ) + return context diff --git a/workers/executor/executors/retrievers/subquestion.py b/workers/executor/executors/retrievers/subquestion.py new file mode 100644 index 0000000000..3163ba97fc --- /dev/null +++ b/workers/executor/executors/retrievers/subquestion.py @@ -0,0 +1,76 @@ +import logging + +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever +from llama_index.core.query_engine import SubQuestionQueryEngine +from llama_index.core.question_gen.llm_generators import LLMQuestionGenerator +from llama_index.core.schema import QueryBundle +from llama_index.core.tools import QueryEngineTool, ToolMetadata + +logger = logging.getLogger(__name__) + + +class SubquestionRetriever(BaseRetriever): + """SubquestionRetrieval class for querying VectorDB using LlamaIndex's + SubQuestionQueryEngine. + """ + + def retrieve(self) -> set[str]: + """Retrieve text chunks from the VectorDB based on the provided prompt. + + Returns: + set[str]: A set of text chunks retrieved from the database. + """ + try: + llm = self.require_llm() + logger.info("Initialising vector query engine...") + vector_query_engine = self.vector_db.get_vector_store_index().as_query_engine( + llm=llm, similarity_top_k=self.top_k + ) + logger.info( + f"Retrieving chunks for {self.doc_id} using SubQuestionQueryEngine." + ) + query_engine_tools = [ + QueryEngineTool( + query_engine=vector_query_engine, + metadata=ToolMetadata( + name=self.doc_id, description=f"Nodes for {self.doc_id}" + ), + ), + ] + query_bundle = QueryBundle(query_str=self.prompt) + + question_gen = LLMQuestionGenerator.from_defaults( + llm=llm, + ) + query_engine = SubQuestionQueryEngine.from_defaults( + query_engine_tools=query_engine_tools, + question_gen=question_gen, + use_async=True, + llm=llm, + ) + + response = query_engine.query(str_or_query_bundle=query_bundle) + + chunks: set[str] = {node.text for node in response.source_nodes} + logger.info(f"Successfully retrieved {len(chunks)} chunks.") + return chunks + + except (ValueError, AttributeError, KeyError, ImportError) as e: + logger.error( + "Error during retrieving chunks %s: %s: %s", + self.doc_id, + type(e).__name__, + e, + exc_info=True, + ) + raise RetrievalError(f"{type(e).__name__}: {e}") from e + except Exception as e: + logger.error( + "Unexpected error during retrieving chunks %s: %s: %s", + self.doc_id, + type(e).__name__, + e, + exc_info=True, + ) + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/usage.py b/workers/executor/executors/usage.py new file mode 100644 index 0000000000..ab6296eaeb --- /dev/null +++ b/workers/executor/executors/usage.py @@ -0,0 +1,81 @@ +"""Usage tracking helper for the executor worker. + +Ported from prompt-service/.../helpers/usage.py. +Flask/DB dependencies removed — usage data is pushed via the SDK1 +``Audit`` class (HTTP to platform API) and returned directly in +``ExecutionResult.metadata`` instead of querying the DB. + +Note: The SDK1 adapters (LLM, EmbeddingCompat) already call +``Audit().push_usage_data()`` internally. This helper is for +explicit push calls outside of adapter operations (e.g. rent rolls). +""" + +import logging +from typing import Any + +logger = logging.getLogger(__name__) + + +class UsageHelper: + @staticmethod + def push_usage_data( + event_type: str, + kwargs: dict[str, Any], + platform_api_key: str, + token_counter: Any = None, + model_name: str = "", + ) -> bool: + """Push usage data to the audit service. + + Wraps ``Audit().push_usage_data()`` with validation and + error handling. + + Args: + event_type: Type of usage event (e.g. "llm", "embedding"). + kwargs: Context dict (run_id, execution_id, etc.). + platform_api_key: API key for platform service auth. + token_counter: Token counter with usage metrics. + model_name: Name of the model used. + + Returns: + True if successful, False otherwise. + """ + if not kwargs or not isinstance(kwargs, dict): + logger.error("Invalid kwargs provided to push_usage_data") + return False + + if not platform_api_key or not isinstance(platform_api_key, str): + logger.error("Invalid platform_api_key provided to push_usage_data") + return False + + try: + from unstract.sdk1.audit import Audit + + logger.debug( + "Pushing usage data for event_type=%s model=%s", + event_type, + model_name, + ) + + Audit().push_usage_data( + platform_api_key=platform_api_key, + token_counter=token_counter, + model_name=model_name, + event_type=event_type, + kwargs=kwargs, + ) + + logger.info("Successfully pushed usage data for %s", model_name) + return True + except Exception: + logger.exception("Error pushing usage data") + return False + + @staticmethod + def format_float_positional(value: float, precision: int = 10) -> str: + """Format a float without scientific notation. + + Removes trailing zeros for clean display of cost values. + """ + formatted: str = f"{value:.{precision}f}" + return formatted.rstrip("0").rstrip(".") if "." in formatted else formatted diff --git a/workers/executor/executors/variable_replacement.py b/workers/executor/executors/variable_replacement.py new file mode 100644 index 0000000000..023d958569 --- /dev/null +++ b/workers/executor/executors/variable_replacement.py @@ -0,0 +1,264 @@ +"""Variable replacement for prompt templates. + +Ported from prompt-service variable_replacement service + helper. +Flask dependencies (app.logger, publish_log) replaced with standard logging. +""" + +import json +import logging +import re +from functools import lru_cache +from typing import Any + +import requests as pyrequests +from executor.executors.constants import VariableConstants, VariableType +from executor.executors.exceptions import CustomDataError, LegacyExecutorError +from requests.exceptions import RequestException + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# VariableReplacementHelper — low-level replacement logic +# --------------------------------------------------------------------------- + + +class VariableReplacementHelper: + @staticmethod + def replace_static_variable( + prompt: str, structured_output: dict[str, Any], variable: str + ) -> str: + output_value = VariableReplacementHelper.check_static_variable_run_status( + structure_output=structured_output, variable=variable + ) + if not output_value: + return prompt + static_variable_marker_string = "".join(["{{", variable, "}}"]) + replaced_prompt: str = VariableReplacementHelper.replace_generic_string_value( + prompt=prompt, variable=static_variable_marker_string, value=output_value + ) + return replaced_prompt + + @staticmethod + def check_static_variable_run_status( + structure_output: dict[str, Any], variable: str + ) -> Any: + output = None + try: + output = structure_output[variable] + except KeyError: + logger.warning( + "Prompt with %s is not executed yet. Unable to replace the variable", + variable, + ) + return output + + @staticmethod + def replace_generic_string_value(prompt: str, variable: str, value: Any) -> str: + formatted_value: str = value + if not isinstance(value, str): + formatted_value = VariableReplacementHelper.handle_json_and_str_types(value) + replaced_prompt = prompt.replace(variable, formatted_value) + return replaced_prompt + + @staticmethod + def handle_json_and_str_types(value: Any) -> str: + try: + formatted_value = json.dumps(value) + except ValueError: + formatted_value = str(value) + return formatted_value + + @staticmethod + def identify_variable_type(variable: str) -> VariableType: + custom_data_pattern = re.compile(VariableConstants.CUSTOM_DATA_VARIABLE_REGEX) + if re.findall(custom_data_pattern, variable): + return VariableType.CUSTOM_DATA + + dynamic_pattern = re.compile(VariableConstants.DYNAMIC_VARIABLE_URL_REGEX) + if re.findall(dynamic_pattern, variable): + return VariableType.DYNAMIC + + return VariableType.STATIC + + @staticmethod + def replace_dynamic_variable( + prompt: str, variable: str, structured_output: dict[str, Any] + ) -> str: + url = re.search(VariableConstants.DYNAMIC_VARIABLE_URL_REGEX, variable).group(0) + data = re.findall(VariableConstants.DYNAMIC_VARIABLE_DATA_REGEX, variable)[0] + output_value = VariableReplacementHelper.check_static_variable_run_status( + structure_output=structured_output, variable=data + ) + if not output_value: + return prompt + api_response: Any = VariableReplacementHelper.fetch_dynamic_variable_value( + url=url, data=output_value + ) + formatted_api_response: str = VariableReplacementHelper.handle_json_and_str_types( + api_response + ) + static_variable_marker_string = "".join(["{{", variable, "}}"]) + replaced_prompt: str = VariableReplacementHelper.replace_generic_string_value( + prompt=prompt, + variable=static_variable_marker_string, + value=formatted_api_response, + ) + return replaced_prompt + + @staticmethod + def replace_custom_data_variable( + prompt: str, + variable: str, + custom_data: dict[str, Any], + is_ide: bool = True, + ) -> str: + custom_data_match = re.search( + VariableConstants.CUSTOM_DATA_VARIABLE_REGEX, variable + ) + if not custom_data_match: + error_msg = "Invalid variable format." + logger.error("%s: %s", error_msg, variable) + raise CustomDataError(variable=variable, reason=error_msg, is_ide=is_ide) + + path_str = custom_data_match.group(1) + path_parts = path_str.split(".") + + if not custom_data: + error_msg = "Custom data is not configured." + logger.error(error_msg) + raise CustomDataError(variable=path_str, reason=error_msg, is_ide=is_ide) + + try: + value = custom_data + for part in path_parts: + value = value[part] + except (KeyError, TypeError) as e: + error_msg = f"Key '{path_str}' not found in custom data." + logger.error(error_msg) + raise CustomDataError( + variable=path_str, reason=error_msg, is_ide=is_ide + ) from e + + variable_marker_string = "".join(["{{", variable, "}}"]) + replaced_prompt = VariableReplacementHelper.replace_generic_string_value( + prompt=prompt, + variable=variable_marker_string, + value=value, + ) + return replaced_prompt + + @staticmethod + @lru_cache(maxsize=128) + def _extract_variables_cached(prompt_text: str) -> tuple[str, ...]: + return tuple(re.findall(VariableConstants.VARIABLE_REGEX, prompt_text)) + + @staticmethod + def extract_variables_from_prompt(prompt_text: str) -> list[str]: + result = VariableReplacementHelper._extract_variables_cached(prompt_text) + return list(result) + + @staticmethod + def fetch_dynamic_variable_value(url: str, data: str) -> Any: + """Fetch dynamic variable value from an external URL. + + Ported from prompt-service make_http_request — simplified to direct + requests.post since we don't need Flask error classes. + """ + headers = {"Content-Type": "text/plain"} + try: + response = pyrequests.post(url, data=data, headers=headers, timeout=30) + response.raise_for_status() + if response.headers.get("content-type") == "application/json": + return response.json() + return response.text + except RequestException as e: + logger.error("HTTP request error fetching dynamic variable: %s", e) + status_code = None + if getattr(e, "response", None) is not None: + status_code = getattr(e.response, "status_code", None) + raise LegacyExecutorError( + message=f"HTTP POST to {url} failed: {e!s}", + code=status_code or 500, + ) from e + + +# --------------------------------------------------------------------------- +# VariableReplacementService — high-level orchestration +# --------------------------------------------------------------------------- + + +class VariableReplacementService: + @staticmethod + def is_variables_present(prompt_text: str) -> bool: + return bool( + len(VariableReplacementHelper.extract_variables_from_prompt(prompt_text)) + ) + + @staticmethod + def replace_variables_in_prompt( + prompt: dict[str, Any], + structured_output: dict[str, Any], + prompt_name: str, + tool_id: str = "", + log_events_id: str = "", + doc_name: str = "", + custom_data: dict[str, Any] | None = None, + is_ide: bool = True, + ) -> str: + from executor.executors.constants import PromptServiceConstants as PSKeys + + logger.info("[%s] Replacing variables in prompt: %s", tool_id, prompt_name) + + prompt_text = prompt[PSKeys.PROMPT] + try: + variable_map = prompt[PSKeys.VARIABLE_MAP] + prompt_text = VariableReplacementService._execute_variable_replacement( + prompt_text=prompt[PSKeys.PROMPT], + variable_map=variable_map, + custom_data=custom_data, + is_ide=is_ide, + ) + except KeyError: + prompt_text = VariableReplacementService._execute_variable_replacement( + prompt_text=prompt_text, + variable_map=structured_output, + custom_data=custom_data, + is_ide=is_ide, + ) + return prompt_text + + @staticmethod + def _execute_variable_replacement( + prompt_text: str, + variable_map: dict[str, Any], + custom_data: dict[str, Any] | None = None, + is_ide: bool = True, + ) -> str: + variables: list[str] = VariableReplacementHelper.extract_variables_from_prompt( + prompt_text=prompt_text + ) + for variable in variables: + variable_type = VariableReplacementHelper.identify_variable_type( + variable=variable + ) + if variable_type == VariableType.STATIC: + prompt_text = VariableReplacementHelper.replace_static_variable( + prompt=prompt_text, + structured_output=variable_map, + variable=variable, + ) + elif variable_type == VariableType.DYNAMIC: + prompt_text = VariableReplacementHelper.replace_dynamic_variable( + prompt=prompt_text, + variable=variable, + structured_output=variable_map, + ) + elif variable_type == VariableType.CUSTOM_DATA: + prompt_text = VariableReplacementHelper.replace_custom_data_variable( + prompt=prompt_text, + variable=variable, + custom_data=custom_data or {}, + is_ide=is_ide, + ) + return prompt_text diff --git a/workers/executor/tasks.py b/workers/executor/tasks.py new file mode 100644 index 0000000000..77d5ecaebd --- /dev/null +++ b/workers/executor/tasks.py @@ -0,0 +1,107 @@ +"""Executor Worker Tasks + +Defines the execute_extraction Celery task that receives an +ExecutionContext dict, runs the appropriate executor via +ExecutionOrchestrator, and returns an ExecutionResult dict. +""" + +from celery import shared_task +from shared.enums.task_enums import TaskName +from shared.infrastructure.logging import WorkerLogger + +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.result import ExecutionResult + +logger = WorkerLogger.get_logger(__name__) + + +@shared_task( + bind=True, + name=TaskName.EXECUTE_EXTRACTION, + autoretry_for=(ConnectionError, TimeoutError, OSError), + retry_backoff=True, + retry_backoff_max=60, + max_retries=3, + retry_jitter=True, +) +def execute_extraction(self, execution_context_dict: dict) -> dict: + """Execute an extraction operation via the executor framework. + + This is the single Celery task entry point for all extraction + operations. Both the workflow path (structure tool task) and + the IDE path (PromptStudioHelper) dispatch to this task. + + Args: + execution_context_dict: Serialized ExecutionContext. + + Returns: + Serialized ExecutionResult dict. + """ + request_id = execution_context_dict.get("request_id", "") + logger.info( + "Received execute_extraction task: " + "celery_task_id=%s request_id=%s executor=%s " + "operation=%s execution_source=%s run_id=%s", + self.request.id, + request_id, + execution_context_dict.get("executor_name"), + execution_context_dict.get("operation"), + execution_context_dict.get("execution_source"), + execution_context_dict.get("run_id"), + ) + + try: + context = ExecutionContext.from_dict(execution_context_dict) + except (KeyError, ValueError) as exc: + logger.error("Invalid execution context: %s", exc, exc_info=True) + return ExecutionResult.failure( + error=f"Invalid execution context: {exc}" + ).to_dict() + + # Build component dict for log correlation when streaming to + # the frontend. Attached as a transient attribute (not serialized). + if context.log_events_id: + params = context.executor_params + # For compound operations, extract nested params for log + # correlation. + if context.operation == "ide_index": + index_params = params.get("index_params", {}) + extract_params = params.get("extract_params", {}) + usage_kwargs = extract_params.get("usage_kwargs", {}) + context._log_component = { + "tool_id": index_params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(usage_kwargs.get("file_name", "")), + "operation": context.operation, + } + elif context.operation == "structure_pipeline": + answer_params = params.get("answer_params", {}) + pipeline_opts = params.get("pipeline_options", {}) + context._log_component = { + "tool_id": answer_params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(pipeline_opts.get("source_file_name", "")), + "operation": context.operation, + } + else: + context._log_component = { + "tool_id": params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": context.operation, + } + else: + context._log_component = {} + + orchestrator = ExecutionOrchestrator() + result = orchestrator.execute(context) + + logger.info( + "execute_extraction complete: celery_task_id=%s request_id=%s success=%s", + self.request.id, + context.request_id, + result.success, + ) + + return result.to_dict() diff --git a/workers/executor/worker.py b/workers/executor/worker.py new file mode 100644 index 0000000000..ecef4e6873 --- /dev/null +++ b/workers/executor/worker.py @@ -0,0 +1,81 @@ +"""Executor Worker + +Celery worker for the pluggable executor system. +Routes execute_extraction tasks to registered executors. +""" + +import logging + +from shared.enums.worker_enums import WorkerType +from shared.infrastructure.config.builder import WorkerBuilder +from shared.infrastructure.config.registry import WorkerRegistry +from shared.infrastructure.logging import WorkerLogger + +# Setup worker +logger = WorkerLogger.setup(WorkerType.EXECUTOR) +app, config = WorkerBuilder.build_celery_app(WorkerType.EXECUTOR) + +# Suppress Celery trace logging of task return values. +# The trace logger prints the full result dict on task success, which +# can contain sensitive customer data (extracted text, summaries, etc.). +logging.getLogger("celery.app.trace").setLevel(logging.WARNING) + + +def check_executor_health(): + """Custom health check for executor worker.""" + from shared.infrastructure.monitoring.health import ( + HealthCheckResult, + HealthStatus, + ) + + try: + from unstract.sdk1.execution.registry import ( + ExecutorRegistry, + ) + + executors = ExecutorRegistry.list_executors() + + return HealthCheckResult( + name="executor_health", + status=HealthStatus.HEALTHY, + message="Executor worker is healthy", + details={ + "worker_type": "executor", + "registered_executors": executors, + "executor_count": len(executors), + "queues": ["celery_executor_legacy"], + }, + ) + + except Exception as e: + return HealthCheckResult( + name="executor_health", + status=HealthStatus.DEGRADED, + message=f"Health check failed: {e}", + details={"error": str(e)}, + ) + + +# Register health check +WorkerRegistry.register_health_check( + WorkerType.EXECUTOR, + "executor_health", + check_executor_health, +) + + +@app.task(bind=True) +def healthcheck(self): + """Health check task for monitoring systems.""" + return { + "status": "healthy", + "worker_type": "executor", + "task_id": self.request.id, + "worker_name": (config.worker_name if config else "executor-worker"), + } + + +# Import tasks so shared_task definitions bind to this app. +# Import executors to trigger @ExecutorRegistry.register at import time. +import executor.executors # noqa: E402, F401 +import executor.tasks # noqa: E402, F401 diff --git a/workers/file_processing/__init__.py b/workers/file_processing/__init__.py index b3f8b74a97..b2b8ece391 100644 --- a/workers/file_processing/__init__.py +++ b/workers/file_processing/__init__.py @@ -4,6 +4,7 @@ direct Django ORM access, implementing the hybrid approach for tool execution. """ +from .structure_tool_task import execute_structure_tool from .tasks import ( process_file_batch, process_file_batch_api, @@ -13,6 +14,7 @@ __all__ = [ "celery_app", + "execute_structure_tool", "process_file_batch", "process_file_batch_api", "process_file_batch_resilient", diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py new file mode 100644 index 0000000000..75bfb29d95 --- /dev/null +++ b/workers/file_processing/structure_tool_task.py @@ -0,0 +1,674 @@ +"""Structure tool Celery task — Phase 3 of executor migration. + +Replaces the Docker-container-based StructureTool.run() with a Celery +task that runs in the file_processing worker. Instead of PromptTool +HTTP calls to prompt-service, it uses ExecutionDispatcher to send +operations to the executor worker via Celery. + +Before (Docker-based): + File Processing Worker → WorkflowExecutionService → ToolSandbox + → Docker container → StructureTool.run() → PromptTool (HTTP) → prompt-service + +After (Celery-based): + File Processing Worker → WorkerWorkflowExecutionService + → execute_structure_tool task → ExecutionDispatcher + → executor worker → LegacyExecutor +""" + +import json +import logging +import os +import time +from pathlib import Path +from typing import Any + +from file_processing.worker import app +from shared.enums.task_enums import TaskName + +from unstract.sdk1.constants import ToolEnv, UsageKwargs +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.result import ExecutionResult + +logger = logging.getLogger(__name__) + +# Timeout for executor worker calls (seconds). +# Reads from EXECUTOR_RESULT_TIMEOUT env, defaults to 3600. +EXECUTOR_TIMEOUT = int(os.environ.get("EXECUTOR_RESULT_TIMEOUT", 3600)) + + +# ----------------------------------------------------------------------- +# Constants mirrored from tools/structure/src/constants.py +# These are the keys used in tool_metadata and payload dicts. +# ----------------------------------------------------------------------- + + +class _SK: + """SettingsKeys subset needed by the structure tool task.""" + + PROMPT_REGISTRY_ID = "prompt_registry_id" + TOOL_METADATA = "tool_metadata" + TOOL_ID = "tool_id" + OUTPUTS = "outputs" + TOOL_SETTINGS = "tool_settings" + NAME = "name" + ACTIVE = "active" + PROMPT = "prompt" + CHUNK_SIZE = "chunk-size" + CHUNK_OVERLAP = "chunk-overlap" + VECTOR_DB = "vector-db" + EMBEDDING = "embedding" + X2TEXT_ADAPTER = "x2text_adapter" + LLM = "llm" + CHALLENGE_LLM = "challenge_llm" + ENABLE_CHALLENGE = "enable_challenge" + ENABLE_SINGLE_PASS_EXTRACTION = "enable_single_pass_extraction" + SUMMARIZE_AS_SOURCE = "summarize_as_source" + ENABLE_HIGHLIGHT = "enable_highlight" + ENABLE_WORD_CONFIDENCE = "enable_word_confidence" + SUMMARIZE_PROMPT = "summarize_prompt" + TABLE_SETTINGS = "table_settings" + INPUT_FILE = "input_file" + IS_DIRECTORY_MODE = "is_directory_mode" + RUN_ID = "run_id" + EXECUTION_ID = "execution_id" + FILE_HASH = "file_hash" + FILE_NAME = "file_name" + FILE_PATH = "file_path" + EXECUTION_SOURCE = "execution_source" + TOOL = "tool" + EXTRACT = "EXTRACT" + SUMMARIZE = "SUMMARIZE" + METADATA = "metadata" + METRICS = "metrics" + INDEXING = "indexing" + OUTPUT = "output" + CONTEXT = "context" + DATA = "data" + LLM_ADAPTER_INSTANCE_ID = "llm_adapter_instance_id" + PROMPT_KEYS = "prompt_keys" + LLM_PROFILE_ID = "llm_profile_id" + CUSTOM_DATA = "custom_data" + SINGLE_PASS_EXTRACTION_MODE = "single_pass_extraction_mode" + CHALLENGE_LLM_ADAPTER_ID = "challenge_llm_adapter_id" + + +# ----------------------------------------------------------------------- +# Standalone helper functions (extracted from StructureTool methods) +# ----------------------------------------------------------------------- + + +def _apply_profile_overrides(tool_metadata: dict, profile_data: dict) -> list[str]: + """Apply profile overrides to tool metadata. + + Standalone version of StructureTool._apply_profile_overrides. + """ + changes: list[str] = [] + + profile_to_tool_mapping = { + "chunk_overlap": "chunk-overlap", + "chunk_size": "chunk-size", + "embedding_model_id": "embedding", + "llm_id": "llm", + "similarity_top_k": "similarity-top-k", + "vector_store_id": "vector-db", + "x2text_id": "x2text_adapter", + "retrieval_strategy": "retrieval-strategy", + } + + if "tool_settings" in tool_metadata: + changes.extend( + _override_section( + tool_metadata["tool_settings"], + profile_data, + profile_to_tool_mapping, + "tool_settings", + ) + ) + + if "outputs" in tool_metadata: + for i, output in enumerate(tool_metadata["outputs"]): + output_name = output.get("name", f"output_{i}") + changes.extend( + _override_section( + output, + profile_data, + profile_to_tool_mapping, + f"output[{output_name}]", + ) + ) + + return changes + + +def _override_section( + section: dict, + profile_data: dict, + mapping: dict, + section_name: str = "section", +) -> list[str]: + """Override values in a section using profile data.""" + changes: list[str] = [] + for profile_key, section_key in mapping.items(): + if profile_key in profile_data and section_key in section: + old_value = section[section_key] + new_value = profile_data[profile_key] + if old_value != new_value: + section[section_key] = new_value + change_desc = f"{section_name}.{section_key}: {old_value} -> {new_value}" + changes.append(change_desc) + logger.info("Overrode %s", change_desc) + return changes + + +def _should_skip_extraction_for_smart_table( + outputs: list[dict[str, Any]], +) -> bool: + """Check if extraction and indexing should be skipped for smart table. + + Standalone version of StructureTool._should_skip_extraction_for_smart_table. + """ + for output in outputs: + if _SK.TABLE_SETTINGS not in output: + continue + prompt = output.get(_SK.PROMPT, "") + if not prompt or not isinstance(prompt, str): + continue + try: + schema_data = json.loads(prompt) + except ValueError as e: + logger.warning("Failed to parse prompt as JSON for smart table: %s", e) + continue + if isinstance(schema_data, dict) and schema_data: + return True + return False + + +# ----------------------------------------------------------------------- +# Main Celery task +# ----------------------------------------------------------------------- + + +@app.task(bind=True, name=str(TaskName.EXECUTE_STRUCTURE_TOOL)) +def execute_structure_tool(self, params: dict) -> dict: + """Execute structure tool as a Celery task. + + Replicates StructureTool.run() from tools/structure/src/main.py + but uses ExecutionDispatcher instead of PromptTool HTTP calls. + + Args: + params: Dict with keys described in the Phase 3 plan. + + Returns: + Dict with {"success": bool, "data": dict, "error": str|None}. + """ + try: + return _execute_structure_tool_impl(params) + except Exception as e: + logger.error("Structure tool task failed: %s", e, exc_info=True) + return ExecutionResult.failure(error=f"Structure tool failed: {e}").to_dict() + + +def _execute_structure_tool_impl(params: dict) -> dict: + """Implementation of the structure tool pipeline. + + Separated from the task function for testability. + + Phase 5E: Uses a single ``structure_pipeline`` dispatch instead of + 3 sequential ``dispatcher.dispatch()`` calls. The executor worker + handles the full extract → summarize → index → answer_prompt + pipeline internally, freeing the file_processing worker slot. + """ + # ---- Unpack params ---- + organization_id = params["organization_id"] + execution_id = params.get("execution_id", "") + file_execution_id = params["file_execution_id"] + tool_instance_metadata = params["tool_instance_metadata"] + platform_service_api_key = params["platform_service_api_key"] + input_file_path = params["input_file_path"] + output_dir_path = params["output_dir_path"] + source_file_name = params["source_file_name"] + execution_data_dir = params["execution_data_dir"] + file_hash = params.get("file_hash", "") + exec_metadata = params.get("exec_metadata", {}) + + # ---- Step 1: Setup ---- + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim(platform_api_key=platform_service_api_key) + + platform_helper = _create_platform_helper(shim, file_execution_id) + dispatcher = ExecutionDispatcher(celery_app=app) + fs = _get_file_storage() + + # ---- Step 2: Fetch tool metadata ---- + prompt_registry_id = tool_instance_metadata.get(_SK.PROMPT_REGISTRY_ID, "") + logger.info("Fetching exported tool with UUID '%s'", prompt_registry_id) + + tool_metadata, is_agentic = _fetch_tool_metadata(platform_helper, prompt_registry_id) + + # ---- Route agentic vs regular ---- + if is_agentic: + return _run_agentic_extraction( + tool_metadata=tool_metadata, + input_file_path=input_file_path, + output_dir_path=output_dir_path, + tool_instance_metadata=tool_instance_metadata, + dispatcher=dispatcher, + shim=shim, + file_execution_id=file_execution_id, + organization_id=organization_id, + source_file_name=source_file_name, + fs=fs, + execution_data_dir=execution_data_dir, + ) + + # ---- Step 3: Profile overrides ---- + _handle_profile_overrides(exec_metadata, platform_helper, tool_metadata) + + # ---- Extract settings from tool_metadata ---- + settings = tool_instance_metadata + is_challenge_enabled = settings.get(_SK.ENABLE_CHALLENGE, False) + is_summarization_enabled = settings.get(_SK.SUMMARIZE_AS_SOURCE, False) + is_single_pass_enabled = settings.get(_SK.SINGLE_PASS_EXTRACTION_MODE, False) + challenge_llm = settings.get(_SK.CHALLENGE_LLM_ADAPTER_ID, "") + is_highlight_enabled = settings.get(_SK.ENABLE_HIGHLIGHT, False) + is_word_confidence_enabled = settings.get(_SK.ENABLE_WORD_CONFIDENCE, False) + logger.info( + "HIGHLIGHT_DEBUG structure_tool: is_highlight_enabled=%s " + "is_word_confidence_enabled=%s from settings keys=%s", + is_highlight_enabled, + is_word_confidence_enabled, + list(settings.keys()), + ) + + tool_id = tool_metadata[_SK.TOOL_ID] + tool_settings = tool_metadata[_SK.TOOL_SETTINGS] + outputs = tool_metadata[_SK.OUTPUTS] + + # Inject workflow-level settings into tool_settings + tool_settings[_SK.CHALLENGE_LLM] = challenge_llm + tool_settings[_SK.ENABLE_CHALLENGE] = is_challenge_enabled + tool_settings[_SK.ENABLE_SINGLE_PASS_EXTRACTION] = is_single_pass_enabled + tool_settings[_SK.SUMMARIZE_AS_SOURCE] = is_summarization_enabled + tool_settings[_SK.ENABLE_HIGHLIGHT] = is_highlight_enabled + tool_settings[_SK.ENABLE_WORD_CONFIDENCE] = is_word_confidence_enabled + + _, file_name = os.path.split(input_file_path) + if is_summarization_enabled: + file_name = _SK.SUMMARIZE + + execution_run_data_folder = Path(execution_data_dir) + extracted_input_file = str(execution_run_data_folder / _SK.EXTRACT) + + # ---- Step 4: Smart table detection ---- + skip_extraction_and_indexing = _should_skip_extraction_for_smart_table(outputs) + if skip_extraction_and_indexing: + logger.info( + "Skipping extraction and indexing for Excel table with valid JSON schema" + ) + + # ---- Step 5: Build pipeline params ---- + usage_kwargs: dict[Any, Any] = {} + if not skip_extraction_and_indexing: + usage_kwargs[UsageKwargs.RUN_ID] = file_execution_id + usage_kwargs[UsageKwargs.FILE_NAME] = source_file_name + usage_kwargs[UsageKwargs.EXECUTION_ID] = execution_id + + custom_data = exec_metadata.get(_SK.CUSTOM_DATA, {}) + answer_params = { + _SK.RUN_ID: file_execution_id, + _SK.EXECUTION_ID: execution_id, + _SK.TOOL_SETTINGS: tool_settings, + _SK.OUTPUTS: outputs, + _SK.TOOL_ID: tool_id, + _SK.FILE_HASH: file_hash, + _SK.FILE_NAME: file_name, + _SK.FILE_PATH: extracted_input_file, + _SK.EXECUTION_SOURCE: _SK.TOOL, + _SK.CUSTOM_DATA: custom_data, + "PLATFORM_SERVICE_API_KEY": platform_service_api_key, + } + + extract_params = { + "x2text_instance_id": tool_settings[_SK.X2TEXT_ADAPTER], + "file_path": input_file_path, + "enable_highlight": is_highlight_enabled, + "output_file_path": str(execution_run_data_folder / _SK.EXTRACT), + "platform_api_key": platform_service_api_key, + "usage_kwargs": usage_kwargs, + "tags": exec_metadata.get("tags"), + "tool_execution_metadata": exec_metadata, + "execution_data_dir": str(execution_run_data_folder), + } + + index_template = { + "tool_id": tool_id, + "file_hash": file_hash, + "is_highlight_enabled": is_highlight_enabled, + "platform_api_key": platform_service_api_key, + "extracted_file_path": extracted_input_file, + } + + pipeline_options = { + "skip_extraction_and_indexing": skip_extraction_and_indexing, + "is_summarization_enabled": is_summarization_enabled, + "is_single_pass_enabled": is_single_pass_enabled, + "input_file_path": input_file_path, + "source_file_name": source_file_name, + } + + # Build summarize params if enabled + summarize_params = None + if is_summarization_enabled: + prompt_keys = [o[_SK.NAME] for o in outputs] + summarize_params = { + "llm_adapter_instance_id": tool_settings[_SK.LLM], + "summarize_prompt": tool_settings.get(_SK.SUMMARIZE_PROMPT, ""), + "extract_file_path": str(execution_run_data_folder / _SK.EXTRACT), + "summarize_file_path": str(execution_run_data_folder / _SK.SUMMARIZE), + "platform_api_key": platform_service_api_key, + "prompt_keys": prompt_keys, + } + + # ---- Step 6: Single dispatch to executor ---- + logger.info( + "Dispatching structure_pipeline: tool_id=%s " + "skip_extract=%s summarize=%s single_pass=%s", + tool_id, + skip_extraction_and_indexing, + is_summarization_enabled, + is_single_pass_enabled, + ) + + pipeline_ctx = ExecutionContext( + executor_name="legacy", + operation="structure_pipeline", + run_id=file_execution_id, + execution_source="tool", + organization_id=organization_id, + request_id=file_execution_id, + executor_params={ + "extract_params": extract_params, + "index_template": index_template, + "answer_params": answer_params, + "pipeline_options": pipeline_options, + "summarize_params": summarize_params, + }, + ) + pipeline_start = time.monotonic() + pipeline_result = dispatcher.dispatch(pipeline_ctx, timeout=EXECUTOR_TIMEOUT) + pipeline_elapsed = time.monotonic() - pipeline_start + + if not pipeline_result.success: + return pipeline_result.to_dict() + + structured_output = pipeline_result.data + + # ---- Step 7: Write output files ---- + # (metadata/metrics merging already done by executor pipeline) + try: + output_path = Path(output_dir_path) / f"{Path(source_file_name).stem}.json" + logger.info("Writing output to %s", output_path) + fs.json_dump(path=output_path, data=structured_output) + + # Overwrite INFILE with JSON output (matches Docker-based tool behavior). + # The destination connector reads from INFILE and checks MIME type — + # if we don't overwrite it, INFILE still has the original PDF. + logger.info("Overwriting INFILE with structured output: %s", input_file_path) + fs.json_dump(path=input_file_path, data=structured_output) + + logger.info("Output written successfully to workflow storage") + except (OSError, json.JSONDecodeError) as e: + return ExecutionResult.failure(error=f"Error writing output file: {e}").to_dict() + + # Write tool result + tool_metadata to METADATA.json + # (destination connector reads output_type from tool_metadata) + _write_tool_result(fs, execution_data_dir, structured_output, pipeline_elapsed) + + return ExecutionResult(success=True, data=structured_output).to_dict() + + +# ----------------------------------------------------------------------- +# Helper functions for the pipeline steps +# ----------------------------------------------------------------------- + + +def _create_platform_helper(shim, request_id: str): + """Create PlatformHelper using env vars for host/port.""" + from unstract.sdk1.platform import PlatformHelper + + return PlatformHelper( + tool=shim, + platform_host=os.environ.get(ToolEnv.PLATFORM_HOST, ""), + platform_port=os.environ.get(ToolEnv.PLATFORM_PORT, ""), + request_id=request_id, + ) + + +def _get_file_storage(): + """Get workflow execution file storage instance.""" + from unstract.filesystem import FileStorageType, FileSystem + + return FileSystem(FileStorageType.WORKFLOW_EXECUTION).get_file_storage() + + +def _fetch_tool_metadata(platform_helper, prompt_registry_id: str) -> tuple[dict, bool]: + """Fetch tool metadata from platform, trying prompt studio then agentic. + + Returns: + Tuple of (tool_metadata dict, is_agentic bool). + + Raises: + RuntimeError: If neither registry returns valid metadata. + """ + exported_tool = None + try: + exported_tool = platform_helper.get_prompt_studio_tool( + prompt_registry_id=prompt_registry_id + ) + except Exception as e: + logger.info("Not found as prompt studio project, trying agentic: %s", e) + + if exported_tool and _SK.TOOL_METADATA in exported_tool: + tool_metadata = exported_tool[_SK.TOOL_METADATA] + tool_metadata["is_agentic"] = False + return tool_metadata, False + + # Try agentic registry + try: + agentic_tool = platform_helper.get_agentic_studio_tool( + agentic_registry_id=prompt_registry_id + ) + if not agentic_tool or _SK.TOOL_METADATA not in agentic_tool: + raise RuntimeError( + f"Registry returned empty response for {prompt_registry_id}" + ) + tool_metadata = agentic_tool[_SK.TOOL_METADATA] + tool_metadata["is_agentic"] = True + logger.info( + "Retrieved agentic project: %s", + tool_metadata.get("name", prompt_registry_id), + ) + return tool_metadata, True + except Exception as agentic_error: + raise RuntimeError( + f"Error fetching project from both registries " + f"for ID '{prompt_registry_id}': {agentic_error}" + ) from agentic_error + + +def _handle_profile_overrides( + exec_metadata: dict, platform_helper, tool_metadata: dict +) -> None: + """Apply LLM profile overrides if configured.""" + llm_profile_id = exec_metadata.get(_SK.LLM_PROFILE_ID) + if not llm_profile_id: + return + + try: + llm_profile = platform_helper.get_llm_profile(llm_profile_id) + if llm_profile: + profile_name = llm_profile.get("profile_name", llm_profile_id) + logger.info( + "Applying profile overrides from profile: %s", + profile_name, + ) + changes = _apply_profile_overrides(tool_metadata, llm_profile) + if changes: + logger.info( + "Profile overrides applied. Changes: %s", + "; ".join(changes), + ) + else: + logger.info("Profile overrides applied - no changes needed") + except Exception as e: + raise RuntimeError(f"Error applying profile overrides: {e}") from e + + +def _run_agentic_extraction( + tool_metadata: dict, + input_file_path: str, + output_dir_path: str, + tool_instance_metadata: dict, + dispatcher: ExecutionDispatcher, + shim: Any, + file_execution_id: str, + organization_id: str, + source_file_name: str, + fs: Any, + execution_data_dir: str = "", +) -> dict: + """Execute agentic extraction pipeline via dispatcher. + + Unpacks metadata, extracts document text via X2Text, then dispatches + with flat executor_params matching what AgenticPromptStudioExecutor + expects (adapter_instance_id, document_text, etc.). + """ + from unstract.sdk1.x2txt import X2Text + + # 1. Unpack agentic project metadata (matches registry_helper export format) + adapter_config = tool_metadata.get("adapter_config", {}) + prompt_text = tool_metadata.get("prompt_text", "") + json_schema = tool_metadata.get("json_schema", {}) + enable_highlight = tool_instance_metadata.get( + "enable_highlight", + tool_metadata.get("enable_highlight", False), + ) + + # 2. Get adapter IDs: workflow UI overrides → exported defaults + # (mirrors tools/structure/src/main.py) + extractor_llm = tool_instance_metadata.get( + "extractor_llm_adapter_id", adapter_config.get("extractor_llm", "") + ) + llmwhisperer = tool_instance_metadata.get( + "llmwhisperer_adapter_id", adapter_config.get("llmwhisperer", "") + ) + platform_service_api_key = shim.platform_api_key + + # 3. Extract text from document using X2Text/LLMWhisperer + x2text = X2Text(tool=shim, adapter_instance_id=llmwhisperer) + extraction_result = x2text.process( + input_file_path=input_file_path, + enable_highlight=enable_highlight, + fs=fs, + ) + document_text = extraction_result.extracted_text + + # Parse json_schema if stored as string + if isinstance(json_schema, str): + json_schema = json.loads(json_schema) + + # 4. Dispatch with flat executor_params matching executor expectations + start_time = time.monotonic() + agentic_ctx = ExecutionContext( + executor_name="agentic", + operation="agentic_extract", + run_id=file_execution_id, + execution_source="tool", + organization_id=organization_id, + request_id=file_execution_id, + executor_params={ + "document_id": file_execution_id, + "document_text": document_text, + "prompt_text": prompt_text, + "schema": json_schema, + "adapter_instance_id": extractor_llm, + "PLATFORM_SERVICE_API_KEY": platform_service_api_key, + "include_source_refs": enable_highlight, + }, + ) + agentic_result = dispatcher.dispatch(agentic_ctx, timeout=EXECUTOR_TIMEOUT) + + if not agentic_result.success: + return agentic_result.to_dict() + + structured_output = agentic_result.data + elapsed = time.monotonic() - start_time + + # Write output files (matches regular pipeline path) + try: + output_path = Path(output_dir_path) / f"{Path(source_file_name).stem}.json" + logger.info("Writing agentic output to %s", output_path) + fs.json_dump(path=output_path, data=structured_output) + + # Overwrite INFILE with JSON output so destination connector reads JSON, not PDF + logger.info("Overwriting INFILE with agentic output: %s", input_file_path) + fs.json_dump(path=input_file_path, data=structured_output) + except Exception as e: + return ExecutionResult.failure( + error=f"Error writing agentic output: {e}" + ).to_dict() + + # Write tool result + tool_metadata to METADATA.json + _write_tool_result(fs, execution_data_dir, structured_output, elapsed) + + return ExecutionResult(success=True, data=structured_output).to_dict() + + +def _write_tool_result( + fs: Any, execution_data_dir: str, _data: dict, elapsed_time: float = 0.0 +) -> None: + """Write tool result and tool_metadata to METADATA.json. + + Matches BaseTool._update_exec_metadata(): + - tool_metadata: list of dicts with tool_name, output_type, elapsed_time + (destination connector reads output_type from here) + - total_elapsed_time: cumulative elapsed time + """ + try: + metadata_path = Path(execution_data_dir) / "METADATA.json" + + # Read existing metadata if present + existing: dict = {} + if fs.exists(metadata_path): + try: + existing_raw = fs.read(path=metadata_path, mode="r") + if existing_raw: + existing = json.loads(existing_raw) + except Exception: + pass + + # Add tool_metadata (matches BaseTool._update_exec_metadata) + # The destination connector reads output_type from tool_metadata[-1] + tool_meta_entry = { + "tool_name": "structure_tool", + "output_type": "JSON", + "elapsed_time": elapsed_time, + } + if "tool_metadata" not in existing: + existing["tool_metadata"] = [tool_meta_entry] + else: + existing["tool_metadata"].append(tool_meta_entry) + + existing["total_elapsed_time"] = ( + existing.get("total_elapsed_time", 0.0) + elapsed_time + ) + + fs.write( + path=metadata_path, + mode="w", + data=json.dumps(existing, indent=2), + ) + except Exception as e: + logger.warning("Failed to write tool result to METADATA.json: %s", e) diff --git a/workers/ide_callback/__init__.py b/workers/ide_callback/__init__.py new file mode 100644 index 0000000000..12d0f95d3e --- /dev/null +++ b/workers/ide_callback/__init__.py @@ -0,0 +1,5 @@ +"""IDE Callback Worker + +Handles post-execution callbacks for Prompt Studio IDE operations. +Replaces the 4 callback tasks that previously ran on the Django backend image. +""" diff --git a/workers/ide_callback/tasks.py b/workers/ide_callback/tasks.py new file mode 100644 index 0000000000..e352da830d --- /dev/null +++ b/workers/ide_callback/tasks.py @@ -0,0 +1,451 @@ +"""IDE Callback Worker Tasks + +Post-execution callbacks for Prompt Studio IDE operations. +These tasks run on the workers image (no Django) and use InternalAPIClient +to persist state through the backend's internal API endpoints. + +Task names are preserved exactly to maintain Celery routing compatibility. +""" + +import json +import logging +import time +import uuid +from datetime import date, datetime +from typing import Any + +from celery import current_app as app +from shared.clients.prompt_studio_client import PromptStudioAPIClient + +logger = logging.getLogger(__name__) + +PROMPT_STUDIO_RESULT_EVENT = "prompt_studio_result" + +# WebSocket emission endpoint (relative to internal API base) +_EMIT_WEBSOCKET_ENDPOINT = "emit-websocket/" + + +class _SafeEncoder(json.JSONEncoder): + """JSON encoder that converts uuid.UUID and datetime objects to strings.""" + + def default(self, obj: Any) -> Any: + if isinstance(obj, uuid.UUID): + return str(obj) + if isinstance(obj, (datetime, date)): + return obj.isoformat() + return super().default(obj) + + +def _json_safe(data: Any) -> Any: + """Round-trip through JSON to convert non-serializable types.""" + return json.loads(json.dumps(data, cls=_SafeEncoder)) + + +def _get_api_client() -> PromptStudioAPIClient: + """Create a PromptStudioAPIClient for internal API calls.""" + return PromptStudioAPIClient() + + +def _emit_websocket( + api_client: PromptStudioAPIClient, + room: str, + event: str, + data: dict[str, Any], +) -> None: + """Emit a WebSocket event via the backend's internal emit-websocket endpoint.""" + try: + payload = {"room": room, "event": event, "data": data} + api_client.post(_EMIT_WEBSOCKET_ENDPOINT, data=payload) + except Exception as e: + logger.error("Failed to emit WebSocket event: %s", e) + + +def _emit_event( + api_client: PromptStudioAPIClient, + log_events_id: str, + task_id: str, + operation: str, + tool_id: str = "", + extra: dict[str, Any] | None = None, + **event_fields: Any, +) -> None: + """Push a Socket.IO event (success or failure) to the frontend. + + Common fields (task_id, operation, tool_id) are always included. + Pass ``status="completed", result=...`` for success events, or + ``status="failed", error=...`` for failure events via *event_fields*. + """ + payload: dict[str, Any] = { + "task_id": task_id, + "operation": operation, + "tool_id": tool_id, + **event_fields, + } + if extra: + payload.update(extra) + _emit_websocket( + api_client, + room=log_events_id, + event=PROMPT_STUDIO_RESULT_EVENT, + data=_json_safe(payload), + ) + + +def _get_task_error(failed_task_id: str, default: str) -> str: + """Retrieve the error message from a failed Celery task's result backend.""" + try: + from celery.result import AsyncResult + + res = AsyncResult(failed_task_id, app=app) + if res.result: + return str(res.result) + except Exception: + pass + return default + + +# ------------------------------------------------------------------ +# IDE Callback Tasks +# +# These are fire-and-forget callbacks invoked by Celery link/link_error +# after the executor worker finishes. They run on the ide_callback queue +# and use InternalAPIClient for ORM persistence. +# ------------------------------------------------------------------ + + +@app.task(name="ide_index_complete") +def ide_index_complete( + result_dict: dict[str, Any], + callback_kwargs: dict[str, Any] | None = None, +) -> dict[str, Any]: + """Celery link callback after successful ide_index execution. + + Performs post-indexing bookkeeping via internal API and pushes + a socket event to the frontend. + """ + cb = callback_kwargs or {} + log_events_id = cb.get("log_events_id", "") + org_id = cb.get("org_id", "") + user_id = cb.get("user_id", "") + document_id = cb.get("document_id", "") + doc_id_key = cb.get("doc_id_key", "") + profile_manager_id = cb.get("profile_manager_id") + executor_task_id = cb.get("executor_task_id", "") + tool_id = cb.get("tool_id", "") + + api = _get_api_client() + + try: + # Check executor-level failure + if not result_dict.get("success", False): + error_msg = result_dict.get("error", "Unknown executor error") + logger.error("ide_index executor reported failure: %s", error_msg) + api.remove_document_indexing( + org_id=org_id, + user_id=user_id, + doc_id_key=doc_id_key, + organization_id=org_id, + ) + _emit_event( + api, + log_events_id, + executor_task_id, + "index_document", + tool_id=tool_id, + extra={"document_id": document_id}, + status="failed", + error=error_msg, + ) + return {"status": "failed", "error": error_msg} + + doc_id = result_dict.get("data", {}).get("doc_id", doc_id_key) + + # Mark document as indexed in cache + api.mark_document_indexed( + org_id=org_id, + user_id=user_id, + doc_id_key=doc_id_key, + doc_id=doc_id, + organization_id=org_id, + ) + + # Update index manager ORM record + if profile_manager_id: + try: + api.update_index_manager( + document_id=document_id, + profile_manager_id=profile_manager_id, + doc_id=doc_id, + organization_id=org_id, + ) + except Exception: + logger.warning( + "Failed to update index manager for profile %s; " + "primary indexing succeeded.", + profile_manager_id, + ) + + # Handle summary index tracking via backend endpoint + # (requires PromptIdeBaseTool + IndexingUtils which need Django ORM) + summary_profile_id = cb.get("summary_profile_id", "") + summarize_file_path = cb.get("summarize_file_path", "") + + if summary_profile_id and summarize_file_path: + try: + resp = api.get_summary_index_key( + summary_profile_id=summary_profile_id, + summarize_file_path=summarize_file_path, + org_id=org_id, + organization_id=org_id, + ) + if resp.get("success"): + summarize_doc_id = resp["data"]["doc_id"] + api.update_index_manager( + document_id=document_id, + profile_manager_id=summary_profile_id, + doc_id=summarize_doc_id, + is_summary=True, + organization_id=org_id, + ) + except Exception: + logger.exception( + "Failed to update summary index manager for document %s; " + "primary indexing succeeded.", + document_id, + ) + + result: dict[str, Any] = { + "message": "Document indexed successfully.", + "document_id": document_id, + } + _emit_event( + api, + log_events_id, + executor_task_id, + "index_document", + tool_id=tool_id, + status="completed", + result=result, + ) + return result + + except Exception as e: + logger.exception("ide_index_complete callback failed") + _emit_event( + api, + log_events_id, + executor_task_id, + "index_document", + tool_id=tool_id, + extra={"document_id": document_id}, + status="failed", + error=str(e), + ) + raise + + +@app.task(name="ide_index_error") +def ide_index_error( + failed_task_id: str, + callback_kwargs: dict[str, Any] | None = None, +) -> None: + """Celery link_error callback when an ide_index task fails. + + Cleans up the indexing-in-progress flag and pushes an error socket event. + """ + cb = callback_kwargs or {} + log_events_id = cb.get("log_events_id", "") + org_id = cb.get("org_id", "") + user_id = cb.get("user_id", "") + document_id = cb.get("document_id", "") + doc_id_key = cb.get("doc_id_key", "") + executor_task_id = cb.get("executor_task_id", "") + tool_id = cb.get("tool_id", "") + + api = _get_api_client() + + try: + error_msg = _get_task_error(failed_task_id, default="Indexing failed") + + # Clean up the indexing-in-progress flag + if doc_id_key: + api.remove_document_indexing( + org_id=org_id, + user_id=user_id, + doc_id_key=doc_id_key, + organization_id=org_id, + ) + + _emit_event( + api, + log_events_id, + executor_task_id, + "index_document", + tool_id=tool_id, + extra={"document_id": document_id}, + status="failed", + error=error_msg, + ) + except Exception: + logger.exception("ide_index_error callback failed") + + +@app.task(name="ide_prompt_complete") +def ide_prompt_complete( + result_dict: dict[str, Any], + callback_kwargs: dict[str, Any] | None = None, +) -> dict[str, Any]: + """Celery link callback after successful answer_prompt / single_pass execution. + + Persists prompt outputs via internal API and pushes a socket event. + """ + cb = callback_kwargs or {} + log_events_id = cb.get("log_events_id", "") + org_id = cb.get("org_id", "") + operation = cb.get("operation", "fetch_response") + run_id = cb.get("run_id", "") + document_id = cb.get("document_id", "") + prompt_ids = cb.get("prompt_ids", []) + profile_manager_id = cb.get("profile_manager_id") + is_single_pass = cb.get("is_single_pass", False) + executor_task_id = cb.get("executor_task_id", "") + tool_id = cb.get("tool_id", "") + dispatch_time = cb.get("dispatch_time", 0) + + api = _get_api_client() + + try: + # Check executor-level failure + if not result_dict.get("success", False): + error_msg = result_dict.get("error", "Unknown executor error") + logger.error("ide_prompt executor reported failure: %s", error_msg) + _emit_event( + api, + log_events_id, + executor_task_id, + operation, + tool_id=tool_id, + extra={ + "prompt_ids": prompt_ids, + "document_id": document_id, + "profile_manager_id": profile_manager_id, + }, + status="failed", + error=error_msg, + ) + return {"status": "failed", "error": error_msg} + + data = result_dict.get("data", {}) + outputs = _json_safe(data.get("output", {})) + metadata = _json_safe(data.get("metadata", {})) + + logger.info( + "ide_prompt_complete: operation=%s output_keys=%s prompt_ids=%s " + "doc=%s profile=%s", + operation, + list(outputs.keys()) if isinstance(outputs, dict) else type(outputs).__name__, + prompt_ids, + document_id, + profile_manager_id, + ) + + # Persist outputs via internal API + resp = api.update_prompt_output( + run_id=run_id, + prompt_ids=prompt_ids, + outputs=outputs, + document_id=document_id, + is_single_pass_extract=is_single_pass, + metadata=metadata, + profile_manager_id=profile_manager_id, + organization_id=org_id, + ) + response = resp.get("data", []) if resp.get("success") else [] + + # Fire HubSpot event if applicable + hubspot_user_id = cb.get("hubspot_user_id") + if hubspot_user_id: + try: + api.notify_hubspot( + user_id=hubspot_user_id, + event_name="PROMPT_RUN", + is_first_for_org=cb.get("is_first_prompt_run", False), + action_label="prompt run", + organization_id=org_id, + ) + except Exception: + logger.warning("Failed to send HubSpot PROMPT_RUN event", exc_info=True) + + _emit_event( + api, + log_events_id, + executor_task_id, + operation, + tool_id=tool_id, + extra={ + "prompt_ids": prompt_ids, + "document_id": document_id, + "profile_manager_id": profile_manager_id, + "elapsed": int(time.time() - dispatch_time) if dispatch_time else 0, + }, + status="completed", + result=response, + ) + # Return minimal status to avoid logging sensitive extracted data + return {"status": "completed", "operation": operation} + + except Exception as e: + logger.exception("ide_prompt_complete callback failed") + _emit_event( + api, + log_events_id, + executor_task_id, + operation, + tool_id=tool_id, + extra={ + "prompt_ids": prompt_ids, + "document_id": document_id, + "profile_manager_id": profile_manager_id, + }, + status="failed", + error=str(e), + ) + raise + + +@app.task(name="ide_prompt_error") +def ide_prompt_error( + failed_task_id: str, + callback_kwargs: dict[str, Any] | None = None, +) -> None: + """Celery link_error callback when an answer_prompt / single_pass task fails. + + Pushes an error socket event to the frontend. + """ + cb = callback_kwargs or {} + log_events_id = cb.get("log_events_id", "") + operation = cb.get("operation", "fetch_response") + executor_task_id = cb.get("executor_task_id", "") + tool_id = cb.get("tool_id", "") + + api = _get_api_client() + + try: + error_msg = _get_task_error(failed_task_id, default="Prompt execution failed") + + _emit_event( + api, + log_events_id, + executor_task_id, + operation, + tool_id=tool_id, + extra={ + "prompt_ids": cb.get("prompt_ids", []), + "document_id": cb.get("document_id", ""), + "profile_manager_id": cb.get("profile_manager_id"), + }, + status="failed", + error=error_msg, + ) + except Exception: + logger.exception("ide_prompt_error callback failed") diff --git a/workers/run-worker-docker.sh b/workers/run-worker-docker.sh index cdf7e9538d..10e9c32196 100755 --- a/workers/run-worker-docker.sh +++ b/workers/run-worker-docker.sh @@ -22,6 +22,9 @@ WORKERS_DIR="/app" # Default environment file ENV_FILE="/app/.env" +# Worker type constant for the executor worker +readonly EXECUTOR_WORKER_TYPE="executor" + # Available core workers (OSS) declare -A WORKERS=( ["api"]="api_deployment" @@ -35,6 +38,7 @@ declare -A WORKERS=( ["log-consumer"]="log_consumer" ["scheduler"]="scheduler" ["schedule"]="scheduler" + ["${EXECUTOR_WORKER_TYPE}"]="${EXECUTOR_WORKER_TYPE}" ["all"]="all" ) @@ -51,6 +55,7 @@ declare -A WORKER_QUEUES=( ["notification"]="notifications,notifications_webhook,notifications_email,notifications_sms,notifications_priority" ["log_consumer"]="celery_log_task_queue" ["scheduler"]="scheduler" + ["${EXECUTOR_WORKER_TYPE}"]="celery_executor_legacy" ) # Worker health ports @@ -62,6 +67,7 @@ declare -A WORKER_HEALTH_PORTS=( ["log_consumer"]="8084" ["notification"]="8085" ["scheduler"]="8087" + ["${EXECUTOR_WORKER_TYPE}"]="8088" ) # Function to print colored output @@ -196,6 +202,7 @@ detect_worker_type_from_args() { *"notifications"*) echo "notification" ;; *"celery_log_task_queue"*) echo "log_consumer" ;; *"scheduler"*) echo "scheduler" ;; + *"${EXECUTOR_WORKER_TYPE}"*) echo "${EXECUTOR_WORKER_TYPE}" ;; *"celery"*) echo "general" ;; *) echo "general" ;; # fallback esac @@ -259,6 +266,9 @@ run_worker() { "scheduler") queues="${CELERY_QUEUES_SCHEDULER:-$queues}" ;; + "${EXECUTOR_WORKER_TYPE}") + queues="${CELERY_QUEUES_EXECUTOR:-$queues}" + ;; esac # Get health port @@ -294,6 +304,10 @@ run_worker() { export SCHEDULER_HEALTH_PORT="${health_port}" export SCHEDULER_METRICS_PORT="${health_port}" ;; + "${EXECUTOR_WORKER_TYPE}") + export EXECUTOR_HEALTH_PORT="${health_port}" + export EXECUTOR_METRICS_PORT="${health_port}" + ;; *) # Default for pluggable workers local worker_type_upper=$(echo "$worker_type" | tr '[:lower:]' '[:upper:]' | tr '-' '_') @@ -326,6 +340,9 @@ run_worker() { "scheduler") concurrency="${WORKER_SCHEDULER_CONCURRENCY:-2}" ;; + "${EXECUTOR_WORKER_TYPE}") + concurrency="${WORKER_EXECUTOR_CONCURRENCY:-2}" + ;; *) # Default for pluggable workers or unknown types local worker_type_upper=$(echo "$worker_type" | tr '[:lower:]' '[:upper:]' | tr '-' '_') @@ -534,6 +551,10 @@ if [[ "$1" == *"celery"* ]] || [[ "$1" == *".venv"* ]]; then export SCHEDULER_HEALTH_PORT="8087" export SCHEDULER_METRICS_PORT="8087" ;; + "${EXECUTOR_WORKER_TYPE}") + export EXECUTOR_HEALTH_PORT="8088" + export EXECUTOR_METRICS_PORT="8088" + ;; *) # Default for pluggable workers - use dynamic port from WORKER_HEALTH_PORTS health_port="${WORKER_HEALTH_PORTS[$WORKER_TYPE]:-8090}" diff --git a/workers/run-worker.sh b/workers/run-worker.sh index 152a72d859..27d9fc8893 100755 --- a/workers/run-worker.sh +++ b/workers/run-worker.sh @@ -21,6 +21,9 @@ WORKERS_DIR="$SCRIPT_DIR" # Default environment file ENV_FILE="$WORKERS_DIR/.env" +# Worker type constant for the executor worker +readonly EXECUTOR_WORKER_TYPE="executor" + # Available workers declare -A WORKERS=( ["api"]="api-deployment" @@ -37,6 +40,7 @@ declare -A WORKERS=( ["notify"]="notification" ["scheduler"]="scheduler" ["schedule"]="scheduler" + ["${EXECUTOR_WORKER_TYPE}"]="${EXECUTOR_WORKER_TYPE}" ["all"]="all" ) @@ -52,6 +56,7 @@ declare -A WORKER_QUEUES=( ["log_consumer"]="celery_log_task_queue" ["notification"]="notifications,notifications_webhook,notifications_email,notifications_sms,notifications_priority" ["scheduler"]="scheduler" + ["${EXECUTOR_WORKER_TYPE}"]="celery_executor_legacy" ) # Worker health ports @@ -63,6 +68,7 @@ declare -A WORKER_HEALTH_PORTS=( ["log_consumer"]="8084" ["notification"]="8085" ["scheduler"]="8087" + ["${EXECUTOR_WORKER_TYPE}"]="8088" ) # Function to display usage @@ -80,6 +86,7 @@ WORKER_TYPE: log, log-consumer Run log consumer worker notification, notify Run notification worker scheduler, schedule Run scheduler worker (scheduled pipeline tasks) + executor Run executor worker (extraction execution tasks) all Run all workers (in separate processes, includes auto-discovered pluggable workers) Note: Pluggable workers in pluggable_worker/ directory are automatically discovered and can be run by name. @@ -147,6 +154,7 @@ HEALTH CHECKS: - Log Consumer: http://localhost:8084/health - Notification: http://localhost:8085/health - Scheduler: http://localhost:8087/health + - Executor: http://localhost:8088/health - Pluggable workers: http://localhost:8090+/health (auto-assigned ports) EOF @@ -301,7 +309,7 @@ show_status() { print_status $BLUE "Worker Status:" echo "==============" - local workers_to_check="api-deployment general file_processing callback log_consumer notification scheduler" + local workers_to_check="api-deployment general file_processing callback log_consumer notification scheduler executor" # Add discovered pluggable workers if [[ ${#PLUGGABLE_WORKERS[@]} -gt 0 ]]; then @@ -405,6 +413,9 @@ run_worker() { "scheduler") export SCHEDULER_HEALTH_PORT="$health_port" ;; + "${EXECUTOR_WORKER_TYPE}") + export EXECUTOR_HEALTH_PORT="$health_port" + ;; *) # Handle pluggable workers dynamically if [[ -n "${PLUGGABLE_WORKERS[$worker_type]:-}" ]]; then @@ -478,6 +489,9 @@ run_worker() { "scheduler") cmd_args+=("--concurrency=2") ;; + "${EXECUTOR_WORKER_TYPE}") + cmd_args+=("--concurrency=2") + ;; *) # Default for pluggable and other workers if [[ -n "${PLUGGABLE_WORKERS[$worker_type]:-}" ]]; then @@ -525,7 +539,7 @@ run_all_workers() { print_status $GREEN "Starting all workers..." # Define core workers - local core_workers="api-deployment general file_processing callback log_consumer notification scheduler" + local core_workers="api-deployment general file_processing callback log_consumer notification scheduler executor" # Add discovered pluggable workers if [[ ${#PLUGGABLE_WORKERS[@]} -gt 0 ]]; then diff --git a/workers/sample.env b/workers/sample.env index 22af85255c..5c0bce2d4b 100644 --- a/workers/sample.env +++ b/workers/sample.env @@ -199,6 +199,14 @@ SCHEDULER_WORKER_NAME=scheduler-worker SCHEDULER_HEALTH_PORT=8087 SCHEDULER_AUTOSCALE=2,1 +# Executor Worker +EXECUTOR_WORKER_NAME=executor-worker +EXECUTOR_HEALTH_PORT=8088 +EXECUTOR_AUTOSCALE=2,1 +EXECUTOR_RESULT_TIMEOUT=3600 +EXECUTOR_TASK_TIME_LIMIT=3600 +EXECUTOR_TASK_SOFT_TIME_LIMIT=3300 + # Notification Worker NOTIFICATION_WORKER_NAME=notification-worker NOTIFICATION_HEALTH_PORT=8085 diff --git a/workers/shared/clients/__init__.py b/workers/shared/clients/__init__.py index 59af325b99..a4f79902a4 100644 --- a/workers/shared/clients/__init__.py +++ b/workers/shared/clients/__init__.py @@ -23,6 +23,7 @@ # Manual review client - use null client as default, plugin registry handles dynamic loading from .manual_review_stub import ManualReviewNullClient as ManualReviewAPIClient from .organization_client import OrganizationAPIClient +from .prompt_studio_client import PromptStudioAPIClient from .tool_client import ToolAPIClient from .usage_client import UsageAPIClient from .webhook_client import WebhookAPIClient @@ -32,6 +33,7 @@ "BaseAPIClient", "ExecutionAPIClient", "FileAPIClient", + "PromptStudioAPIClient", "UsageAPIClient", "ManualReviewAPIClient", "WebhookAPIClient", diff --git a/workers/shared/clients/prompt_studio_client.py b/workers/shared/clients/prompt_studio_client.py new file mode 100644 index 0000000000..da2b81f8a7 --- /dev/null +++ b/workers/shared/clients/prompt_studio_client.py @@ -0,0 +1,157 @@ +"""Prompt Studio API Client for IDE Callback Operations + +Specialized API client for prompt studio internal endpoints. +Used by the ide_callback worker to persist ORM state through the backend. +""" + +import logging +from typing import Any + +from .base_client import BaseAPIClient + +logger = logging.getLogger(__name__) + +# Endpoint paths (relative to internal API base) +_OUTPUT_ENDPOINT = "v1/prompt-studio/output/" +_INDEX_ENDPOINT = "v1/prompt-studio/index/" +_INDEXING_STATUS_ENDPOINT = "v1/prompt-studio/indexing-status/" +_PROFILE_ENDPOINT = "v1/prompt-studio/profile/{profile_id}/" +_HUBSPOT_ENDPOINT = "v1/prompt-studio/hubspot-notify/" +_SUMMARY_INDEX_KEY_ENDPOINT = "v1/prompt-studio/summary-index-key/" + + +class PromptStudioAPIClient(BaseAPIClient): + """API client for prompt studio internal endpoints. + + All methods call the backend's internal API endpoints which perform + the actual Django ORM operations. + """ + + def update_prompt_output( + self, + run_id: str, + prompt_ids: list[str], + outputs: dict[str, Any], + document_id: str, + is_single_pass_extract: bool, + metadata: dict[str, Any], + profile_manager_id: str | None = None, + organization_id: str | None = None, + ) -> dict[str, Any]: + """Persist prompt execution output via OutputManagerHelper. + + Returns: + Backend response with serialized output data. + """ + payload = { + "run_id": run_id, + "prompt_ids": prompt_ids, + "outputs": outputs, + "document_id": document_id, + "is_single_pass_extract": is_single_pass_extract, + "profile_manager_id": profile_manager_id, + "metadata": metadata, + } + return self.post(_OUTPUT_ENDPOINT, data=payload, organization_id=organization_id) + + def update_index_manager( + self, + document_id: str, + profile_manager_id: str, + doc_id: str, + is_summary: bool = False, + organization_id: str | None = None, + ) -> dict[str, Any]: + """Update IndexManager after successful indexing.""" + payload = { + "document_id": document_id, + "profile_manager_id": profile_manager_id, + "doc_id": doc_id, + "is_summary": is_summary, + } + return self.post(_INDEX_ENDPOINT, data=payload, organization_id=organization_id) + + def mark_document_indexed( + self, + org_id: str, + user_id: str, + doc_id_key: str, + doc_id: str, + organization_id: str | None = None, + ) -> dict[str, Any]: + """Mark a document as indexed in the cache.""" + payload = { + "action": "mark_indexed", + "org_id": org_id, + "user_id": user_id, + "doc_id_key": doc_id_key, + "doc_id": doc_id, + } + return self.post( + _INDEXING_STATUS_ENDPOINT, data=payload, organization_id=organization_id + ) + + def remove_document_indexing( + self, + org_id: str, + user_id: str, + doc_id_key: str, + organization_id: str | None = None, + ) -> dict[str, Any]: + """Remove the document indexing flag from cache.""" + payload = { + "action": "remove", + "org_id": org_id, + "user_id": user_id, + "doc_id_key": doc_id_key, + } + return self.post( + _INDEXING_STATUS_ENDPOINT, data=payload, organization_id=organization_id + ) + + def get_profile( + self, + profile_id: str, + organization_id: str | None = None, + ) -> dict[str, Any]: + """Get profile manager details (adapter IDs, chunk settings).""" + endpoint = _PROFILE_ENDPOINT.format(profile_id=profile_id) + return self.get(endpoint, organization_id=organization_id) + + def notify_hubspot( + self, + user_id: str, + event_name: str, + is_first_for_org: bool = False, + action_label: str = "", + organization_id: str | None = None, + ) -> dict[str, Any]: + """Fire a HubSpot event notification.""" + payload = { + "user_id": user_id, + "event_name": event_name, + "is_first_for_org": is_first_for_org, + "action_label": action_label, + } + return self.post(_HUBSPOT_ENDPOINT, data=payload, organization_id=organization_id) + + def get_summary_index_key( + self, + summary_profile_id: str, + summarize_file_path: str, + org_id: str, + organization_id: str | None = None, + ) -> dict[str, Any]: + """Compute summary doc_id hash server-side. + + The computation requires PromptIdeBaseTool + IndexingUtils which depend + on Django ORM and are only available on the backend image. + """ + payload = { + "summary_profile_id": summary_profile_id, + "summarize_file_path": summarize_file_path, + "org_id": org_id, + } + return self.post( + _SUMMARY_INDEX_KEY_ENDPOINT, data=payload, organization_id=organization_id + ) diff --git a/workers/shared/enums/task_enums.py b/workers/shared/enums/task_enums.py index 5f57913cd9..240b5c9c74 100644 --- a/workers/shared/enums/task_enums.py +++ b/workers/shared/enums/task_enums.py @@ -33,6 +33,18 @@ class TaskName(str, Enum): # API deployment worker tasks CHECK_API_DEPLOYMENT_STATUS = "check_api_deployment_status" + # Structure tool task (runs in file_processing worker) + EXECUTE_STRUCTURE_TOOL = "execute_structure_tool" + + # Executor worker tasks + EXECUTE_EXTRACTION = "execute_extraction" + + # IDE callback worker tasks (prompt studio post-execution callbacks) + IDE_INDEX_COMPLETE = "ide_index_complete" + IDE_INDEX_ERROR = "ide_index_error" + IDE_PROMPT_COMPLETE = "ide_prompt_complete" + IDE_PROMPT_ERROR = "ide_prompt_error" + def __str__(self): """Return enum value for Celery task naming.""" return self.value diff --git a/workers/shared/enums/worker_enums_base.py b/workers/shared/enums/worker_enums_base.py index babc19512f..215aaa75d0 100644 --- a/workers/shared/enums/worker_enums_base.py +++ b/workers/shared/enums/worker_enums_base.py @@ -23,6 +23,8 @@ class WorkerType(str, Enum): NOTIFICATION = "notification" LOG_CONSUMER = "log_consumer" SCHEDULER = "scheduler" + EXECUTOR = "executor" + IDE_CALLBACK = "ide_callback" @classmethod def from_directory_name(cls, name: str) -> "WorkerType": @@ -110,6 +112,8 @@ def to_health_port(self) -> int: WorkerType.NOTIFICATION: 8085, WorkerType.LOG_CONSUMER: 8086, WorkerType.SCHEDULER: 8087, + WorkerType.EXECUTOR: 8088, + WorkerType.IDE_CALLBACK: 8089, } return port_mapping.get(self, 8080) @@ -147,6 +151,14 @@ class QueueName(str, Enum): # Scheduler queue SCHEDULER = "scheduler" + # Executor queue — queue-per-executor naming convention. + # The dispatcher derives queue names as ``celery_executor_{executor_name}``. + # The "legacy" executor is the default OSS executor. + EXECUTOR = "celery_executor_legacy" + + # IDE callback queue (prompt studio post-execution callbacks) + IDE_CALLBACK = "ide_callback" + def to_env_var_name(self) -> str: """Convert queue name to environment variable name. diff --git a/workers/shared/infrastructure/config/registry.py b/workers/shared/infrastructure/config/registry.py index 37ad1c08b9..b1967ba628 100644 --- a/workers/shared/infrastructure/config/registry.py +++ b/workers/shared/infrastructure/config/registry.py @@ -64,6 +64,12 @@ class WorkerRegistry: WorkerType.SCHEDULER: WorkerQueueConfig( primary_queue=QueueName.SCHEDULER, additional_queues=[QueueName.GENERAL] ), + WorkerType.EXECUTOR: WorkerQueueConfig( + primary_queue=QueueName.EXECUTOR, + ), + WorkerType.IDE_CALLBACK: WorkerQueueConfig( + primary_queue=QueueName.IDE_CALLBACK, + ), } # Pluggable worker configurations loaded dynamically @@ -134,6 +140,23 @@ class WorkerRegistry: TaskRoute("scheduler.tasks.*", QueueName.SCHEDULER), ], ), + WorkerType.EXECUTOR: WorkerTaskRouting( + worker_type=WorkerType.EXECUTOR, + routes=[ + TaskRoute("execute_extraction", QueueName.EXECUTOR), + TaskRoute("executor.tasks.*", QueueName.EXECUTOR), + ], + ), + WorkerType.IDE_CALLBACK: WorkerTaskRouting( + worker_type=WorkerType.IDE_CALLBACK, + routes=[ + TaskRoute("ide_index_complete", QueueName.IDE_CALLBACK), + TaskRoute("ide_index_error", QueueName.IDE_CALLBACK), + TaskRoute("ide_prompt_complete", QueueName.IDE_CALLBACK), + TaskRoute("ide_prompt_error", QueueName.IDE_CALLBACK), + TaskRoute("ide_callback.tasks.*", QueueName.IDE_CALLBACK), + ], + ), } # Pluggable worker task routes loaded dynamically @@ -171,6 +194,12 @@ class WorkerRegistry: WorkerType.SCHEDULER: { "log_level": "INFO", }, + WorkerType.EXECUTOR: { + "log_level": "INFO", + }, + WorkerType.IDE_CALLBACK: { + "log_level": "INFO", + }, } # Pluggable worker logging configs loaded dynamically diff --git a/workers/shared/workflow/execution/service.py b/workers/shared/workflow/execution/service.py index e38e372a91..0f375846ae 100644 --- a/workers/shared/workflow/execution/service.py +++ b/workers/shared/workflow/execution/service.py @@ -971,17 +971,105 @@ def _prepare_workflow_input_file( def _build_and_execute_workflow( self, execution_service: WorkflowExecutionService, file_name: str ) -> None: - """Build and execute the workflow.""" - # Build workflow - execution_service.build_workflow() - logger.info(f"Workflow built successfully for file {file_name}") + """Build and execute the workflow. - # Execute workflow - from unstract.workflow_execution.enums import ExecutionType + Detects structure tool workflows and routes them to the Celery-based + execute_structure_tool task instead of the Docker container flow. + """ + if self._is_structure_tool_workflow(execution_service): + self._execute_structure_tool_workflow(execution_service, file_name) + else: + # Original Docker-based flow (unchanged) + execution_service.build_workflow() + logger.info(f"Workflow built successfully for file {file_name}") - execution_service.execute_workflow(ExecutionType.COMPLETE) + from unstract.workflow_execution.enums import ExecutionType + + execution_service.execute_workflow(ExecutionType.COMPLETE) logger.info(f"Workflow executed successfully for file {file_name}") + def _is_structure_tool_workflow( + self, execution_service: WorkflowExecutionService + ) -> bool: + """Check if workflow uses the structure tool. + + Compares the base image name (last path component without tag) + to handle registry prefixes like gcr.io/project/tool-structure + vs the default unstract/tool-structure. + """ + structure_image = os.environ.get( + "STRUCTURE_TOOL_IMAGE_NAME", "unstract/tool-structure" + ) + structure_base = structure_image.split(":")[0].rsplit("/", 1)[-1] + for ti in execution_service.tool_instances: + ti_name = str(ti.image_name) if ti.image_name else "" + if not ti_name: + continue + ti_base = ti_name.split(":")[0].rsplit("/", 1)[-1] + if ti_name == structure_image or ti_base == structure_base: + logger.info( + "Detected structure tool workflow " + f"(image={ti_name}, expected={structure_image})" + ) + return True + return False + + def _execute_structure_tool_workflow( + self, execution_service: WorkflowExecutionService, file_name: str + ) -> None: + """Execute structure tool as Celery task instead of Docker container. + + Calls execute_structure_tool directly (same process, in-band). + Only the inner ExecutionDispatcher calls go through Celery to + the executor worker. + """ + from file_processing.structure_tool_task import ( + execute_structure_tool as _execute_structure_tool, + ) + + tool_instance = execution_service.tool_instances[0] + file_handler = execution_service.file_handler + + # Read metadata from METADATA.json for file_hash and exec_metadata + metadata = {} + try: + metadata = file_handler.get_workflow_metadata() + except Exception as e: + logger.warning(f"Could not read workflow metadata: {e}") + + # Get API key from the same source used to create execution_service + platform_api_key = self._get_platform_service_api_key( + execution_service.organization_id + ) + + params = { + "organization_id": execution_service.organization_id, + "workflow_id": execution_service.workflow_id, + "execution_id": execution_service.execution_id, + "file_execution_id": execution_service.file_execution_id, + "tool_instance_metadata": tool_instance.metadata, + "platform_service_api_key": platform_api_key, + "input_file_path": str(file_handler.infile), + "output_dir_path": str(file_handler.execution_dir), + "source_file_name": str( + os.path.basename(file_handler.source_file) + if file_handler.source_file + else file_name + ), + "execution_data_dir": str(file_handler.file_execution_dir), + "messaging_channel": getattr(execution_service, "messaging_channel", ""), + "file_hash": metadata.get("source_hash", ""), + "exec_metadata": metadata, + } + + # Call synchronously (same process, in-band) + result = _execute_structure_tool(params) + + if not result.get("success"): + raise RuntimeError( + f"Structure tool failed: {result.get('error', 'Unknown error')}" + ) + def _extract_source_connector_details( self, source_config: dict[str, Any] | None ) -> tuple[str | None, dict[str, Any]]: diff --git a/workers/tests/__init__.py b/workers/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/workers/tests/conftest.py b/workers/tests/conftest.py new file mode 100644 index 0000000000..084a8ef88c --- /dev/null +++ b/workers/tests/conftest.py @@ -0,0 +1,14 @@ +"""Shared fixtures for workers tests. + +Environment variables are loaded from .env.test at module level +BEFORE any shared package imports. This is required because +shared/constants/api_endpoints.py raises ValueError at import +time if INTERNAL_API_BASE_URL is not set. +""" + +from pathlib import Path + +from dotenv import load_dotenv + +_env_test = Path(__file__).resolve().parent.parent / ".env.test" +load_dotenv(_env_test) diff --git a/workers/tests/test_answer_prompt.py b/workers/tests/test_answer_prompt.py new file mode 100644 index 0000000000..ebe1675d3d --- /dev/null +++ b/workers/tests/test_answer_prompt.py @@ -0,0 +1,860 @@ +"""Tests for the answer_prompt pipeline (Phase 2E). + +Tests the _handle_answer_prompt method, AnswerPromptService, +VariableReplacementService, and type conversion logic. +All heavy dependencies (LLM, VectorDB, etc.) are mocked. +""" + +from unittest.mock import MagicMock, patch + +import pytest +from executor.executors.constants import ( + PromptServiceConstants as PSKeys, +) + +from unstract.sdk1.execution.context import ExecutionContext, Operation + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_prompt( + name: str = "field_a", + prompt: str = "What is the revenue?", + output_type: str = "text", + chunk_size: int = 512, + chunk_overlap: int = 128, + retrieval_strategy: str = "simple", + llm_id: str = "llm-1", + embedding_id: str = "emb-1", + vector_db_id: str = "vdb-1", + x2text_id: str = "x2t-1", + similarity_top_k: int = 5, +): + """Build a minimal prompt definition dict.""" + return { + PSKeys.NAME: name, + PSKeys.PROMPT: prompt, + PSKeys.TYPE: output_type, + PSKeys.CHUNK_SIZE: chunk_size, + PSKeys.CHUNK_OVERLAP: chunk_overlap, + PSKeys.RETRIEVAL_STRATEGY: retrieval_strategy, + PSKeys.LLM: llm_id, + PSKeys.EMBEDDING: embedding_id, + PSKeys.VECTOR_DB: vector_db_id, + PSKeys.X2TEXT_ADAPTER: x2text_id, + PSKeys.SIMILARITY_TOP_K: similarity_top_k, + } + + +def _make_context( + prompts=None, + tool_settings=None, + file_hash="abc123", + file_path="/data/doc.txt", + file_name="doc.txt", + execution_source="ide", + platform_api_key="pk-test", + run_id="run-1", +): + """Build an ExecutionContext for answer_prompt.""" + if prompts is None: + prompts = [_make_prompt()] + if tool_settings is None: + tool_settings = {} + + params = { + PSKeys.OUTPUTS: prompts, + PSKeys.TOOL_SETTINGS: tool_settings, + PSKeys.TOOL_ID: "tool-1", + PSKeys.EXECUTION_ID: "exec-1", + PSKeys.FILE_HASH: file_hash, + PSKeys.FILE_PATH: file_path, + PSKeys.FILE_NAME: file_name, + PSKeys.LOG_EVENTS_ID: "", + PSKeys.CUSTOM_DATA: {}, + PSKeys.EXECUTION_SOURCE: execution_source, + PSKeys.PLATFORM_SERVICE_API_KEY: platform_api_key, + } + return ExecutionContext( + executor_name="legacy", + operation=Operation.ANSWER_PROMPT.value, + executor_params=params, + run_id=run_id, + execution_source=execution_source, + ) + + +def _mock_llm(): + """Create a mock LLM that returns a configurable answer.""" + llm = MagicMock(name="llm") + response = MagicMock() + response.text = "test answer" + llm.complete.return_value = { + PSKeys.RESPONSE: response, + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, + PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + llm.get_usage_reason.return_value = "extraction" + llm.get_metrics.return_value = {"tokens": 100} + return llm + + +def _mock_deps(llm=None): + """Return a tuple of mocked prompt deps matching _get_prompt_deps().""" + if llm is None: + llm = _mock_llm() + + # AnswerPromptService — use the real class + from executor.executors.answer_prompt import AnswerPromptService as answer_prompt_svc_cls + + retrieval_svc = MagicMock(name="RetrievalService") + retrieval_svc.run_retrieval.return_value = ["chunk1", "chunk2"] + retrieval_svc.retrieve_complete_context.return_value = ["full content"] + + variable_replacement_svc = MagicMock(name="VariableReplacementService") + variable_replacement_svc.is_variables_present.return_value = False + + index_cls = MagicMock(name="Index") + index_instance = MagicMock() + index_instance.generate_index_key.return_value = "doc-id-1" + index_cls.return_value = index_instance + + llm_cls = MagicMock(name="LLM") + llm_cls.return_value = llm + + embedding_compat_cls = MagicMock(name="EmbeddingCompat") + vector_db_cls = MagicMock(name="VectorDB") + + return ( + answer_prompt_svc_cls, + retrieval_svc, + variable_replacement_svc, + index_cls, + llm_cls, + embedding_compat_cls, + vector_db_cls, + ) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +_PATCH_INDEX_UTILS = ( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key" +) + + +@pytest.fixture(autouse=True) +def _mock_indexing_utils(): + """Mock IndexingUtils.generate_index_key for all answer_prompt tests. + + _handle_answer_prompt calls IndexingUtils.generate_index_key(tool=shim) + which delegates to PlatformHelper.get_adapter_config() — a real HTTP + call. Since tests use a mock shim, the platform URL is invalid. + """ + with patch(_PATCH_INDEX_UTILS, return_value="doc-id-test"): + yield + + +# --------------------------------------------------------------------------- +# Tests — _handle_answer_prompt +# --------------------------------------------------------------------------- + +class TestHandleAnswerPromptText: + """Tests for TEXT type prompts.""" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_text_prompt_returns_success(self, mock_shim_cls, mock_deps): + """Simple TEXT prompt returns success with structured output.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context() + result = executor._handle_answer_prompt(ctx) + + assert result.success is True + assert PSKeys.OUTPUT in result.data + assert PSKeys.METADATA in result.data + assert PSKeys.METRICS in result.data + assert "field_a" in result.data[PSKeys.OUTPUT] + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_text_prompt_answer_stored(self, mock_shim_cls, mock_deps): + """The LLM answer is stored in structured_output.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context() + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] == "test answer" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_trailing_newline_stripped(self, mock_shim_cls, mock_deps): + """Trailing newlines are stripped from text answers.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response = MagicMock() + response.text = "answer with trailing\n" + llm.complete.return_value = { + PSKeys.RESPONSE: response, + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, + PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + result = executor._handle_answer_prompt(_make_context()) + + assert result.data[PSKeys.OUTPUT]["field_a"] == "answer with trailing" + + +class TestHandleAnswerPromptTypes: + """Tests for type-specific post-processing.""" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_number_type_converts_to_float(self, mock_shim_cls, mock_deps): + """NUMBER type converts answer to float.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + # First call: main retrieval answer. Second call: number extraction. + response1 = MagicMock() + response1.text = "revenue is $42.5M" + response2 = MagicMock() + response2.text = "42500000" + llm.complete.side_effect = [ + {PSKeys.RESPONSE: response1, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + {PSKeys.RESPONSE: response2, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + ] + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="number")]) + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] == pytest.approx(42500000.0) + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_number_na_returns_none(self, mock_shim_cls, mock_deps): + """NUMBER type with NA answer returns None.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response = MagicMock() + response.text = "NA" + llm.complete.return_value = { + PSKeys.RESPONSE: response, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: "", + } + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="number")]) + result = executor._handle_answer_prompt(ctx) + + # NA → sanitized to None + assert result.data[PSKeys.OUTPUT]["field_a"] is None + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_boolean_yes(self, mock_shim_cls, mock_deps): + """BOOLEAN type converts 'yes' to True.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response1 = MagicMock() + response1.text = "The document confirms it" + response2 = MagicMock() + response2.text = "yes" + llm.complete.side_effect = [ + {PSKeys.RESPONSE: response1, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + {PSKeys.RESPONSE: response2, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + ] + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="boolean")]) + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] is True + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_boolean_no(self, mock_shim_cls, mock_deps): + """BOOLEAN type converts 'no' to False.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response1 = MagicMock() + response1.text = "not confirmed" + response2 = MagicMock() + response2.text = "no" + llm.complete.side_effect = [ + {PSKeys.RESPONSE: response1, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + {PSKeys.RESPONSE: response2, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + ] + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="boolean")]) + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] is False + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_email_type(self, mock_shim_cls, mock_deps): + """EMAIL type extracts email address.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response1 = MagicMock() + response1.text = "Contact: user@example.com" + response2 = MagicMock() + response2.text = "user@example.com" + llm.complete.side_effect = [ + {PSKeys.RESPONSE: response1, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + {PSKeys.RESPONSE: response2, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + ] + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="email")]) + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] == "user@example.com" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_date_type(self, mock_shim_cls, mock_deps): + """DATE type extracts date in ISO format.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response1 = MagicMock() + response1.text = "The date is January 15, 2024" + response2 = MagicMock() + response2.text = "2024-01-15" + llm.complete.side_effect = [ + {PSKeys.RESPONSE: response1, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + {PSKeys.RESPONSE: response2, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + ] + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="date")]) + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] == "2024-01-15" + + +class TestHandleAnswerPromptJSON: + """Tests for JSON type handling.""" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_json_parsed(self, mock_shim_cls, mock_deps): + """JSON type parses valid JSON from answer.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response = MagicMock() + response.text = '{"key": "value"}' + llm.complete.return_value = { + PSKeys.RESPONSE: response, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: "", + } + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="json")]) + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] == {"key": "value"} + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_json_na_returns_none(self, mock_shim_cls, mock_deps): + """JSON type with NA answer returns None.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response = MagicMock() + response.text = "NA" + llm.complete.return_value = { + PSKeys.RESPONSE: response, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: "", + } + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="json")]) + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] is None + + +class TestHandleAnswerPromptRetrieval: + """Tests for retrieval integration.""" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_chunked_retrieval_uses_run_retrieval( + self, mock_shim_cls, mock_deps + ): + """chunk_size > 0 uses RetrievalService.run_retrieval.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + deps = _mock_deps(llm) + _, retrieval_svc, *_ = deps + mock_deps.return_value = deps + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context( + prompts=[_make_prompt(chunk_size=512)] + ) + result = executor._handle_answer_prompt(ctx) + + retrieval_svc.run_retrieval.assert_called_once() + assert result.success is True + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_complete_context_for_chunk_zero( + self, mock_shim_cls, mock_deps + ): + """chunk_size=0 uses RetrievalService.retrieve_complete_context.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + deps = _mock_deps(llm) + _, retrieval_svc, *_ = deps + mock_deps.return_value = deps + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context( + prompts=[_make_prompt(chunk_size=0)] + ) + result = executor._handle_answer_prompt(ctx) + + retrieval_svc.retrieve_complete_context.assert_called_once() + assert result.success is True + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_context_stored_in_metadata(self, mock_shim_cls, mock_deps): + """Retrieved context is stored in metadata.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + result = executor._handle_answer_prompt(_make_context()) + + metadata = result.data[PSKeys.METADATA] + assert "field_a" in metadata[PSKeys.CONTEXT] + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_invalid_strategy_skips_retrieval( + self, mock_shim_cls, mock_deps + ): + """Invalid retrieval strategy skips retrieval, answer stays NA.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context( + prompts=[_make_prompt(retrieval_strategy="nonexistent")] + ) + result = executor._handle_answer_prompt(ctx) + + # Answer stays "NA" which gets sanitized to None + assert result.data[PSKeys.OUTPUT]["field_a"] is None + + +class TestHandleAnswerPromptMultiPrompt: + """Tests for multi-prompt processing.""" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_multiple_prompts(self, mock_shim_cls, mock_deps): + """Multiple prompts are all processed.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + prompts = [ + _make_prompt(name="revenue"), + _make_prompt(name="date_signed", output_type="text"), + ] + executor = LegacyExecutor() + ctx = _make_context(prompts=prompts) + result = executor._handle_answer_prompt(ctx) + + output = result.data[PSKeys.OUTPUT] + assert "revenue" in output + assert "date_signed" in output + + +class TestHandleAnswerPromptErrors: + """Tests for error handling.""" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_table_type_raises_error(self, mock_shim_cls, mock_deps): + """TABLE type raises LegacyExecutorError (plugins not available).""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context( + prompts=[_make_prompt(output_type="table")] + ) + # TABLE raises LegacyExecutorError which is caught by execute() + result = executor.execute(ctx) + assert result.success is False + assert "TABLE" in result.error + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_line_item_type_raises_error(self, mock_shim_cls, mock_deps): + """LINE_ITEM type raises LegacyExecutorError.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context( + prompts=[_make_prompt(output_type="line-item")] + ) + result = executor.execute(ctx) + assert result.success is False + assert "LINE_ITEM" in result.error + + +class TestHandleAnswerPromptMetrics: + """Tests for metrics collection.""" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_metrics_collected(self, mock_shim_cls, mock_deps): + """Metrics include context_retrieval and LLM metrics.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + result = executor._handle_answer_prompt(_make_context()) + + metrics = result.data[PSKeys.METRICS] + assert "field_a" in metrics + assert "context_retrieval" in metrics["field_a"] + assert "extraction_llm" in metrics["field_a"] + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_vectordb_closed(self, mock_shim_cls, mock_deps): + """VectorDB is closed after processing.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + deps = _mock_deps(llm) + mock_deps.return_value = deps + _, _, _, _, _, _, vector_db_cls = deps + vdb_instance = MagicMock() + vector_db_cls.return_value = vdb_instance + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + executor._handle_answer_prompt(_make_context()) + + vdb_instance.close.assert_called_once() + + +class TestNullSanitization: + """Tests for _sanitize_null_values.""" + + def test_na_string_becomes_none(self): + """Top-level 'NA' string → None.""" + from executor.executors.legacy_executor import LegacyExecutor + + output = {"field": "NA"} + result = LegacyExecutor._sanitize_null_values(output) + assert result["field"] is None + + def test_na_case_insensitive(self): + """'na' (lowercase) → None.""" + from executor.executors.legacy_executor import LegacyExecutor + + output = {"field": "na"} + result = LegacyExecutor._sanitize_null_values(output) + assert result["field"] is None + + def test_nested_list_na(self): + """NA in nested list items → None.""" + from executor.executors.legacy_executor import LegacyExecutor + + output = {"field": ["value", "NA", "other"]} + result = LegacyExecutor._sanitize_null_values(output) + assert result["field"] == ["value", None, "other"] + + def test_nested_dict_in_list_na(self): + """NA in dicts inside lists → None.""" + from executor.executors.legacy_executor import LegacyExecutor + + output = {"field": [{"a": "NA", "b": "ok"}]} + result = LegacyExecutor._sanitize_null_values(output) + assert result["field"] == [{"a": None, "b": "ok"}] + + def test_nested_dict_na(self): + """NA in nested dict values → None.""" + from executor.executors.legacy_executor import LegacyExecutor + + output = {"field": {"a": "NA", "b": "ok"}} + result = LegacyExecutor._sanitize_null_values(output) + assert result["field"] == {"a": None, "b": "ok"} + + def test_non_na_values_untouched(self): + """Non-NA values are not modified.""" + from executor.executors.legacy_executor import LegacyExecutor + + output = {"field": "hello", "num": 42, "flag": True} + result = LegacyExecutor._sanitize_null_values(output) + assert result == {"field": "hello", "num": 42, "flag": True} + + +class TestAnswerPromptServiceUnit: + """Unit tests for AnswerPromptService methods.""" + + def test_extract_variable_replaces_percent_vars(self): + """Replace %var% references in prompt text.""" + from executor.executors.answer_prompt import AnswerPromptService + + structured = {"field_a": "42"} + output = {"prompt": "Original: %field_a%"} + result = AnswerPromptService.extract_variable( + structured, ["field_a"], output, "Value is %field_a%" + ) + assert result == "Value is 42" + + def test_extract_variable_missing_raises(self): + """Missing variable raises ValueError.""" + from executor.executors.answer_prompt import AnswerPromptService + + output = {"prompt": "test"} + with pytest.raises(ValueError, match="not found"): + AnswerPromptService.extract_variable( + {}, ["missing_var"], output, "Value is %missing_var%" + ) + + def test_construct_prompt_includes_all_parts(self): + """Constructed prompt includes preamble, prompt, postamble, context.""" + from executor.executors.answer_prompt import AnswerPromptService + + result = AnswerPromptService.construct_prompt( + preamble="You are a helpful assistant", + prompt="What is the revenue?", + postamble="Be precise", + grammar_list=[], + context="Revenue was $1M", + platform_postamble="", + word_confidence_postamble="", + ) + assert "You are a helpful assistant" in result + assert "What is the revenue?" in result + assert "Be precise" in result + assert "Revenue was $1M" in result + assert "Answer:" in result + + def test_construct_prompt_with_grammar(self): + """Grammar list adds synonym notes.""" + from executor.executors.answer_prompt import AnswerPromptService + + result = AnswerPromptService.construct_prompt( + preamble="", + prompt="Find the amount", + postamble="", + grammar_list=[{"word": "amount", "synonyms": ["sum", "total"]}], + context="test", + platform_postamble="", + word_confidence_postamble="", + ) + assert "amount" in result + assert "sum, total" in result + + +class TestVariableReplacementService: + """Tests for the VariableReplacementService.""" + + def test_is_variables_present_true(self): + """Detects {{variables}} in text.""" + from executor.executors.variable_replacement import ( + VariableReplacementService, + ) + + assert VariableReplacementService.is_variables_present( + "Hello {{name}}" + ) is True + + def test_is_variables_present_false(self): + """Returns False when no variables present.""" + from executor.executors.variable_replacement import ( + VariableReplacementService, + ) + + assert VariableReplacementService.is_variables_present( + "Hello world" + ) is False + + def test_replace_static_variable(self): + """Static variable {{var}} is replaced with structured output value.""" + from executor.executors.variable_replacement import ( + VariableReplacementHelper, + ) + + result = VariableReplacementHelper.replace_static_variable( + prompt="Total is {{revenue}}", + structured_output={"revenue": "$1M"}, + variable="revenue", + ) + assert result == "Total is $1M" + + def test_custom_data_variable(self): + """Custom data variable {{custom_data.key}} is replaced.""" + from executor.executors.variable_replacement import ( + VariableReplacementHelper, + ) + + result = VariableReplacementHelper.replace_custom_data_variable( + prompt="Company: {{custom_data.company_name}}", + variable="custom_data.company_name", + custom_data={"company_name": "Acme Inc"}, + ) + assert result == "Company: Acme Inc" + + def test_custom_data_missing_raises(self): + """Missing custom data key raises CustomDataError.""" + from executor.executors.exceptions import CustomDataError + from executor.executors.variable_replacement import ( + VariableReplacementHelper, + ) + + with pytest.raises(CustomDataError): + VariableReplacementHelper.replace_custom_data_variable( + prompt="{{custom_data.missing}}", + variable="custom_data.missing", + custom_data={"other": "value"}, + ) diff --git a/workers/tests/test_executor_sanity.py b/workers/tests/test_executor_sanity.py new file mode 100644 index 0000000000..8f0c10927a --- /dev/null +++ b/workers/tests/test_executor_sanity.py @@ -0,0 +1,288 @@ +"""Phase 1 Sanity Check — Executor worker integration tests. + +These tests verify the full executor chain works end-to-end. + +Verifies: +1. Worker enums and registry configuration +2. ExecutorToolShim works from workers venv +3. NoOpExecutor registers and executes via orchestrator +4. Celery task wiring (execute_extraction task logic) +5. Full dispatch -> task -> orchestrator -> executor round-trip +6. Retry configuration on the task +""" + +import pytest +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +@pytest.fixture(autouse=True) +def _clean_registry(): + """Ensure a clean executor registry for every test.""" + ExecutorRegistry.clear() + yield + ExecutorRegistry.clear() + + +def _make_context(**overrides): + defaults = { + "executor_name": "noop", + "operation": "extract", + "run_id": "run-sanity-001", + "execution_source": "tool", + "organization_id": "org-test", + "request_id": "req-sanity-001", + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _register_noop(): + """Register a NoOpExecutor for testing.""" + + @ExecutorRegistry.register + class NoOpExecutor(BaseExecutor): + @property + def name(self): + return "noop" + + def execute(self, context): + return ExecutionResult( + success=True, + data={"echo": context.operation, "run_id": context.run_id}, + metadata={"executor": self.name}, + ) + + +# --- 1. Worker enums and registry --- + + +class TestWorkerEnumsAndRegistry: + """Verify executor is properly registered in worker infrastructure.""" + + def test_worker_type_executor_exists(self): + from shared.enums.worker_enums import WorkerType + + assert WorkerType.EXECUTOR.value == "executor" + + def test_queue_name_executor_exists(self): + from shared.enums.worker_enums import QueueName + + assert QueueName.EXECUTOR.value == "celery_executor_legacy" + + def test_task_name_execute_extraction_exists(self): + from shared.enums.task_enums import TaskName + + assert TaskName.EXECUTE_EXTRACTION.value == "execute_extraction" + + def test_health_port_is_8088(self): + from shared.enums.worker_enums import WorkerType + + assert WorkerType.EXECUTOR.to_health_port() == 8088 + + def test_worker_registry_has_executor_config(self): + from shared.enums.worker_enums import WorkerType + from shared.infrastructure.config.registry import WorkerRegistry + + config = WorkerRegistry.get_queue_config(WorkerType.EXECUTOR) + assert "celery_executor_legacy" in config.all_queues() + + def test_task_routing_includes_execute_extraction(self): + from shared.enums.worker_enums import WorkerType + from shared.infrastructure.config.registry import WorkerRegistry + + routing = WorkerRegistry.get_task_routing(WorkerType.EXECUTOR) + patterns = [r.pattern for r in routing.routes] + assert "execute_extraction" in patterns + + +# --- 2. ExecutorToolShim --- + + +class TestExecutorToolShim: + """Verify the real ExecutorToolShim works in the workers venv.""" + + def test_import(self): + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim(platform_api_key="sk-test") + assert shim.platform_api_key == "sk-test" + + def test_platform_key_returned(self): + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim(platform_api_key="sk-real-key") + assert shim.get_env_or_die("PLATFORM_SERVICE_API_KEY") == "sk-real-key" + + def test_env_var_from_environ(self, monkeypatch): + from executor.executor_tool_shim import ExecutorToolShim + + monkeypatch.setenv("TEST_SHIM_VAR", "hello") + shim = ExecutorToolShim(platform_api_key="sk-test") + assert shim.get_env_or_die("TEST_SHIM_VAR") == "hello" + + def test_missing_var_raises(self): + from executor.executor_tool_shim import ExecutorToolShim + from unstract.sdk1.exceptions import SdkError + + shim = ExecutorToolShim(platform_api_key="sk-test") + with pytest.raises(SdkError, match="NONEXISTENT"): + shim.get_env_or_die("NONEXISTENT") + + def test_stream_log_does_not_print_json(self, capsys): + """stream_log routes to logging, not stdout JSON.""" + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim(platform_api_key="sk-test") + shim.stream_log("test message") + captured = capsys.readouterr() + # Should NOT produce JSON on stdout (that's the old protocol) + assert '"type": "LOG"' not in captured.out + + def test_stream_error_raises_sdk_error(self): + from executor.executor_tool_shim import ExecutorToolShim + from unstract.sdk1.exceptions import SdkError + + shim = ExecutorToolShim(platform_api_key="sk-test") + with pytest.raises(SdkError, match="boom"): + shim.stream_error_and_exit("boom") + + +# --- 3. NoOpExecutor via Orchestrator --- + + +class TestNoOpExecutorOrchestrator: + """Verify a NoOpExecutor works through the orchestrator.""" + + def test_noop_executor_round_trip(self): + _register_noop() + + ctx = _make_context(operation="extract") + orchestrator = ExecutionOrchestrator() + result = orchestrator.execute(ctx) + + assert result.success is True + assert result.data == {"echo": "extract", "run_id": "run-sanity-001"} + + def test_unknown_executor_fails_gracefully(self): + orchestrator = ExecutionOrchestrator() + ctx = _make_context(executor_name="nonexistent") + result = orchestrator.execute(ctx) + + assert result.success is False + assert "nonexistent" in result.error + + +# --- 4 & 5. Full chain with Celery eager mode --- +# +# executor/worker.py imports executor/tasks.py which defines +# execute_extraction as a shared_task. We import the real app, +# configure it for eager mode, and exercise the actual task. + + +@pytest.fixture +def eager_app(): + """Configure the real executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + + yield app + + app.conf.update(original) + + +class TestCeleryTaskWiring: + """Verify the execute_extraction task configuration.""" + + def test_task_is_registered(self, eager_app): + assert "execute_extraction" in eager_app.tasks + + def test_task_has_retry_config(self, eager_app): + task = eager_app.tasks["execute_extraction"] + assert task.max_retries == 3 + assert ConnectionError in task.autoretry_for + assert TimeoutError in task.autoretry_for + assert OSError in task.autoretry_for + + def test_task_retry_backoff_enabled(self, eager_app): + task = eager_app.tasks["execute_extraction"] + assert task.retry_backoff is True + assert task.retry_jitter is True + + +class TestFullChainEager: + """End-to-end test using Celery's eager mode. + + task_always_eager=True makes tasks execute inline in the + calling process — full chain without a broker. + """ + + def _run_task(self, eager_app, context_dict): + """Run execute_extraction task via task.apply() (eager-safe).""" + task = eager_app.tasks["execute_extraction"] + result = task.apply(args=[context_dict]) + return result.get() + + def test_eager_dispatch_round_trip(self, eager_app): + """Execute task inline, verify result comes back.""" + _register_noop() + + ctx = _make_context(operation="answer_prompt", run_id="run-eager") + result_dict = self._run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data["echo"] == "answer_prompt" + assert result.data["run_id"] == "run-eager" + assert result.metadata.get("executor") == "noop" + + def test_eager_dispatch_invalid_context(self, eager_app): + """Invalid context dict returns failure result (not exception).""" + result_dict = self._run_task(eager_app, {"bad": "data"}) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "Invalid execution context" in result.error + + def test_eager_dispatch_unknown_executor(self, eager_app): + """Unknown executor returns failure (no unhandled exceptions).""" + ctx = _make_context(executor_name="does_not_exist") + result_dict = self._run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "does_not_exist" in result.error + + def test_result_serialization_round_trip(self, eager_app): + """Verify ExecutionResult survives Celery serialization.""" + _register_noop() + + ctx = _make_context( + operation="single_pass_extraction", + executor_params={"schema": {"name": "str", "age": "int"}}, + ) + result_dict = self._run_task(eager_app, ctx.to_dict()) + + # Verify the raw dict is JSON-compatible + import json + + serialized = json.dumps(result_dict) + deserialized = json.loads(serialized) + + result = ExecutionResult.from_dict(deserialized) + assert result.success is True + assert result.data["echo"] == "single_pass_extraction" diff --git a/workers/tests/test_ide_callback.py b/workers/tests/test_ide_callback.py new file mode 100644 index 0000000000..a95a4371d8 --- /dev/null +++ b/workers/tests/test_ide_callback.py @@ -0,0 +1,649 @@ +"""Unit tests for IDE Callback Worker tasks. + +Tests all 4 callback tasks (ide_index_complete, ide_index_error, +ide_prompt_complete, ide_prompt_error) by mocking the PromptStudioAPIClient +and verifying correct API calls, websocket emissions, return values, +and error handling. + +Tasks are called as plain functions (bypassing Celery task machinery) +since we're testing callback logic, not Celery routing. +""" + +import time +from unittest.mock import MagicMock, call, patch + +import pytest + +# Patch targets +_PATCH_GET_CLIENT = "ide_callback.tasks._get_api_client" +_PATCH_EMIT_WS = "ide_callback.tasks._emit_websocket" +_PATCH_ASYNC_RESULT = "celery.result.AsyncResult" + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def mock_api(): + """Return a mocked PromptStudioAPIClient with default success responses.""" + api = MagicMock() + api.post.return_value = {"success": True} + api.get.return_value = {"success": True, "data": {}} + api.mark_document_indexed.return_value = {"success": True} + api.remove_document_indexing.return_value = {"success": True} + api.update_index_manager.return_value = {"success": True} + api.update_prompt_output.return_value = {"success": True, "data": [{"id": "out1"}]} + api.notify_hubspot.return_value = {"success": True} + api.get_summary_index_key.return_value = { + "success": True, + "data": {"doc_id": "summary-doc-id-hash"}, + } + return api + + +@pytest.fixture +def base_index_kwargs(): + """Standard callback_kwargs for index tasks.""" + return { + "log_events_id": "room-123", + "org_id": "org-1", + "user_id": "user-1", + "document_id": "doc-1", + "doc_id_key": "key-abc", + "profile_manager_id": "profile-1", + "executor_task_id": "task-1", + "tool_id": "tool-1", + } + + +@pytest.fixture +def base_prompt_kwargs(): + """Standard callback_kwargs for prompt tasks.""" + return { + "log_events_id": "room-456", + "org_id": "org-1", + "operation": "fetch_response", + "run_id": "run-1", + "document_id": "doc-1", + "prompt_ids": ["p1", "p2"], + "profile_manager_id": "profile-1", + "is_single_pass": False, + "executor_task_id": "task-2", + "tool_id": "tool-2", + "dispatch_time": 0, + } + + +@pytest.fixture +def success_result(): + """Standard executor success result dict.""" + return { + "success": True, + "data": {"doc_id": "computed-doc-id"}, + } + + +@pytest.fixture +def failure_result(): + """Standard executor failure result dict.""" + return { + "success": False, + "error": "Executor blew up", + } + + +# --------------------------------------------------------------------------- +# TestIdeIndexComplete +# --------------------------------------------------------------------------- + + +class TestIdeIndexComplete: + """Tests for ide_index_complete task.""" + + def _call(self, result_dict, callback_kwargs=None): + from ide_callback.tasks import ide_index_complete + + return ide_index_complete(result_dict, callback_kwargs) + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_success_path(self, mock_get_client, mock_emit_ws, mock_api, base_index_kwargs, success_result): + mock_get_client.return_value = mock_api + + result = self._call(success_result, base_index_kwargs) + + assert result["message"] == "Document indexed successfully." + assert result["document_id"] == "doc-1" + + # mark_document_indexed called with correct args + mock_api.mark_document_indexed.assert_called_once_with( + org_id="org-1", user_id="user-1", doc_id_key="key-abc", + doc_id="computed-doc-id", organization_id="org-1", + ) + + # update_index_manager called for primary profile + mock_api.update_index_manager.assert_called_once_with( + document_id="doc-1", profile_manager_id="profile-1", + doc_id="computed-doc-id", organization_id="org-1", + ) + + # websocket emitted with success + mock_emit_ws.assert_called_once() + ws_call = mock_emit_ws.call_args + assert ws_call[1]["room"] == "room-123" + assert ws_call[1]["event"] == "prompt_studio_result" + ws_data = ws_call[1]["data"] + assert ws_data["status"] == "completed" + assert ws_data["operation"] == "index_document" + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_executor_failure(self, mock_get_client, mock_emit_ws, mock_api, base_index_kwargs, failure_result): + mock_get_client.return_value = mock_api + + result = self._call(failure_result, base_index_kwargs) + + assert result["status"] == "failed" + assert result["error"] == "Executor blew up" + + # Should clean up indexing flag + mock_api.remove_document_indexing.assert_called_once() + + # Should NOT mark as indexed + mock_api.mark_document_indexed.assert_not_called() + + # Should emit error websocket + mock_emit_ws.assert_called_once() + ws_data = mock_emit_ws.call_args[1]["data"] + assert ws_data["status"] == "failed" + assert ws_data["error"] == "Executor blew up" + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_no_profile_manager(self, mock_get_client, mock_emit_ws, mock_api, base_index_kwargs, success_result): + """When profile_manager_id is None, skip update_index_manager.""" + mock_get_client.return_value = mock_api + base_index_kwargs["profile_manager_id"] = None + + result = self._call(success_result, base_index_kwargs) + + assert result["document_id"] == "doc-1" + mock_api.mark_document_indexed.assert_called_once() + mock_api.update_index_manager.assert_not_called() + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_summary_indexing(self, mock_get_client, mock_emit_ws, mock_api, base_index_kwargs, success_result): + """Summary profile triggers get_summary_index_key + update_index_manager.""" + mock_get_client.return_value = mock_api + base_index_kwargs["summary_profile_id"] = "summary-prof-1" + base_index_kwargs["summarize_file_path"] = "/path/to/summary.txt" + + result = self._call(success_result, base_index_kwargs) + + assert result["document_id"] == "doc-1" + + # summary index key fetched via backend endpoint + mock_api.get_summary_index_key.assert_called_once_with( + summary_profile_id="summary-prof-1", + summarize_file_path="/path/to/summary.txt", + org_id="org-1", + organization_id="org-1", + ) + + # update_index_manager called twice: primary + summary + assert mock_api.update_index_manager.call_count == 2 + summary_call = mock_api.update_index_manager.call_args_list[1] + assert summary_call == call( + document_id="doc-1", + profile_manager_id="summary-prof-1", + doc_id="summary-doc-id-hash", + is_summary=True, + organization_id="org-1", + ) + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_summary_indexing_failure_non_fatal(self, mock_get_client, mock_emit_ws, mock_api, base_index_kwargs, success_result): + """Summary index failure doesn't prevent success return.""" + mock_get_client.return_value = mock_api + base_index_kwargs["summary_profile_id"] = "summary-prof-1" + base_index_kwargs["summarize_file_path"] = "/path/to/summary.txt" + mock_api.get_summary_index_key.side_effect = Exception("backend down") + + result = self._call(success_result, base_index_kwargs) + + # Primary indexing still succeeds + assert result["message"] == "Document indexed successfully." + mock_api.mark_document_indexed.assert_called_once() + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_doc_id_falls_back_to_key(self, mock_get_client, mock_emit_ws, mock_api, base_index_kwargs): + """When result has no doc_id, falls back to doc_id_key.""" + mock_get_client.return_value = mock_api + result_dict = {"success": True, "data": {}} + + result = self._call(result_dict, base_index_kwargs) + + assert result["document_id"] == "doc-1" + mock_api.mark_document_indexed.assert_called_once_with( + org_id="org-1", user_id="user-1", doc_id_key="key-abc", + doc_id="key-abc", organization_id="org-1", + ) + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_exception_emits_error_and_reraises(self, mock_get_client, mock_emit_ws, mock_api, base_index_kwargs, success_result): + """Unexpected exception emits error websocket and re-raises.""" + mock_get_client.return_value = mock_api + mock_api.mark_document_indexed.side_effect = RuntimeError("DB down") + + with pytest.raises(RuntimeError, match="DB down"): + self._call(success_result, base_index_kwargs) + + mock_emit_ws.assert_called() + last_ws = mock_emit_ws.call_args_list[-1] + assert last_ws[1]["data"]["status"] == "failed" + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_none_callback_kwargs(self, mock_get_client, mock_emit_ws, mock_api, success_result): + """Passing None for callback_kwargs uses defaults without crashing.""" + mock_get_client.return_value = mock_api + + result = self._call(success_result, None) + + assert result["document_id"] == "" + mock_api.mark_document_indexed.assert_called_once() + + +# --------------------------------------------------------------------------- +# TestIdeIndexError +# --------------------------------------------------------------------------- + + +class TestIdeIndexError: + """Tests for ide_index_error task.""" + + def _call(self, failed_task_id, callback_kwargs=None): + from ide_callback.tasks import ide_index_error + + return ide_index_error(failed_task_id, callback_kwargs) + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_error_with_result(self, mock_get_client, mock_emit_ws, mock_api, base_index_kwargs): + """Retrieves error message from AsyncResult when available.""" + mock_get_client.return_value = mock_api + + mock_async_result = MagicMock() + mock_async_result.result = ValueError("Index OOM") + + with patch(_PATCH_ASYNC_RESULT, return_value=mock_async_result): + self._call("failed-task-id-1", base_index_kwargs) + + mock_api.remove_document_indexing.assert_called_once_with( + org_id="org-1", user_id="user-1", doc_id_key="key-abc", + organization_id="org-1", + ) + + mock_emit_ws.assert_called_once() + ws_data = mock_emit_ws.call_args[1]["data"] + assert ws_data["status"] == "failed" + assert "Index OOM" in ws_data["error"] + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_error_without_result(self, mock_get_client, mock_emit_ws, mock_api, base_index_kwargs): + """Falls back to default error message when AsyncResult has no result.""" + mock_get_client.return_value = mock_api + + mock_async_result = MagicMock() + mock_async_result.result = None + + with patch(_PATCH_ASYNC_RESULT, return_value=mock_async_result): + self._call("failed-task-id-2", base_index_kwargs) + + mock_emit_ws.assert_called_once() + ws_data = mock_emit_ws.call_args[1]["data"] + assert ws_data["error"] == "Indexing failed" + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_no_doc_id_key_skips_cleanup(self, mock_get_client, mock_emit_ws, mock_api, base_index_kwargs): + """When doc_id_key is empty, skip remove_document_indexing.""" + mock_get_client.return_value = mock_api + base_index_kwargs["doc_id_key"] = "" + + mock_async_result = MagicMock() + mock_async_result.result = None + with patch(_PATCH_ASYNC_RESULT, return_value=mock_async_result): + self._call("failed-task-id-3", base_index_kwargs) + + mock_api.remove_document_indexing.assert_not_called() + mock_emit_ws.assert_called_once() + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_exception_does_not_crash(self, mock_get_client, mock_emit_ws, mock_api, base_index_kwargs): + """Exception in callback body is caught and logged, not re-raised.""" + mock_get_client.return_value = mock_api + mock_api.remove_document_indexing.side_effect = RuntimeError("oops") + + mock_async_result = MagicMock() + mock_async_result.result = None + with patch(_PATCH_ASYNC_RESULT, return_value=mock_async_result): + # Should not raise + self._call("failed-task-id-4", base_index_kwargs) + + +# --------------------------------------------------------------------------- +# TestIdePromptComplete +# --------------------------------------------------------------------------- + + +class TestIdePromptComplete: + """Tests for ide_prompt_complete task.""" + + def _call(self, result_dict, callback_kwargs=None): + from ide_callback.tasks import ide_prompt_complete + + return ide_prompt_complete(result_dict, callback_kwargs) + + def _make_result(self, output=None, metadata=None): + return { + "success": True, + "data": { + "output": output or {"p1": "answer1"}, + "metadata": metadata or {}, + }, + } + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_success_path(self, mock_get_client, mock_emit_ws, mock_api, base_prompt_kwargs): + mock_get_client.return_value = mock_api + + result = self._call(self._make_result(), base_prompt_kwargs) + + assert result["status"] == "completed" + assert result["operation"] == "fetch_response" + + mock_api.update_prompt_output.assert_called_once_with( + run_id="run-1", + prompt_ids=["p1", "p2"], + outputs={"p1": "answer1"}, + document_id="doc-1", + is_single_pass_extract=False, + metadata={}, + profile_manager_id="profile-1", + organization_id="org-1", + ) + + mock_emit_ws.assert_called_once() + ws_data = mock_emit_ws.call_args[1]["data"] + assert ws_data["status"] == "completed" + assert ws_data["operation"] == "fetch_response" + assert ws_data["prompt_ids"] == ["p1", "p2"] + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_executor_failure(self, mock_get_client, mock_emit_ws, mock_api, base_prompt_kwargs, failure_result): + mock_get_client.return_value = mock_api + + result = self._call(failure_result, base_prompt_kwargs) + + assert result["status"] == "failed" + mock_api.update_prompt_output.assert_not_called() + + mock_emit_ws.assert_called_once() + ws_data = mock_emit_ws.call_args[1]["data"] + assert ws_data["status"] == "failed" + assert ws_data["error"] == "Executor blew up" + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_single_pass(self, mock_get_client, mock_emit_ws, mock_api, base_prompt_kwargs): + mock_get_client.return_value = mock_api + base_prompt_kwargs["is_single_pass"] = True + + self._call(self._make_result(), base_prompt_kwargs) + + call_kwargs = mock_api.update_prompt_output.call_args[1] + assert call_kwargs["is_single_pass_extract"] is True + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_hubspot_event(self, mock_get_client, mock_emit_ws, mock_api, base_prompt_kwargs): + mock_get_client.return_value = mock_api + base_prompt_kwargs["hubspot_user_id"] = "hubspot-42" + base_prompt_kwargs["is_first_prompt_run"] = True + + self._call(self._make_result(), base_prompt_kwargs) + + mock_api.notify_hubspot.assert_called_once_with( + user_id="hubspot-42", + event_name="PROMPT_RUN", + is_first_for_org=True, + action_label="prompt run", + organization_id="org-1", + ) + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_hubspot_failure_non_fatal(self, mock_get_client, mock_emit_ws, mock_api, base_prompt_kwargs): + """HubSpot notification failure doesn't prevent success.""" + mock_get_client.return_value = mock_api + base_prompt_kwargs["hubspot_user_id"] = "hubspot-42" + mock_api.notify_hubspot.side_effect = Exception("HubSpot down") + + result = self._call(self._make_result(), base_prompt_kwargs) + + assert result["status"] == "completed" + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_elapsed_time_computed(self, mock_get_client, mock_emit_ws, mock_api, base_prompt_kwargs): + """Elapsed time is computed from dispatch_time when provided.""" + mock_get_client.return_value = mock_api + base_prompt_kwargs["dispatch_time"] = time.time() - 5 + + self._call(self._make_result(), base_prompt_kwargs) + + mock_emit_ws.assert_called_once() + ws_data = mock_emit_ws.call_args[1]["data"] + assert ws_data["elapsed"] >= 4 # Allow slight timing variance + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_exception_emits_error_and_reraises(self, mock_get_client, mock_emit_ws, mock_api, base_prompt_kwargs): + mock_get_client.return_value = mock_api + mock_api.update_prompt_output.side_effect = RuntimeError("Network") + + with pytest.raises(RuntimeError, match="Network"): + self._call(self._make_result(), base_prompt_kwargs) + + # At least one error emission + assert any( + c[1].get("data", {}).get("status") == "failed" + for c in mock_emit_ws.call_args_list + ) + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_output_api_failure_returns_empty_response(self, mock_get_client, mock_emit_ws, mock_api, base_prompt_kwargs): + """When update_prompt_output returns success=False, response is [].""" + mock_get_client.return_value = mock_api + mock_api.update_prompt_output.return_value = {"success": False} + + result = self._call(self._make_result(), base_prompt_kwargs) + + assert result["status"] == "completed" + # The emitted result should be the empty list + ws_data = mock_emit_ws.call_args[1]["data"] + assert ws_data["result"] == [] + + +# --------------------------------------------------------------------------- +# TestIdePromptError +# --------------------------------------------------------------------------- + + +class TestIdePromptError: + """Tests for ide_prompt_error task.""" + + def _call(self, failed_task_id, callback_kwargs=None): + from ide_callback.tasks import ide_prompt_error + + return ide_prompt_error(failed_task_id, callback_kwargs) + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_error_with_result(self, mock_get_client, mock_emit_ws, mock_api, base_prompt_kwargs): + mock_get_client.return_value = mock_api + + mock_async_result = MagicMock() + mock_async_result.result = RuntimeError("LLM timeout") + + with patch(_PATCH_ASYNC_RESULT, return_value=mock_async_result): + self._call("failed-task-99", base_prompt_kwargs) + + mock_emit_ws.assert_called_once() + ws_data = mock_emit_ws.call_args[1]["data"] + assert ws_data["status"] == "failed" + assert "LLM timeout" in ws_data["error"] + assert ws_data["prompt_ids"] == ["p1", "p2"] + assert ws_data["document_id"] == "doc-1" + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_error_without_result(self, mock_get_client, mock_emit_ws, mock_api, base_prompt_kwargs): + mock_get_client.return_value = mock_api + + mock_async_result = MagicMock() + mock_async_result.result = None + + with patch(_PATCH_ASYNC_RESULT, return_value=mock_async_result): + self._call("failed-task-100", base_prompt_kwargs) + + ws_data = mock_emit_ws.call_args[1]["data"] + assert ws_data["error"] == "Prompt execution failed" + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_minimal_callback_kwargs(self, mock_get_client, mock_emit_ws, mock_api): + """Works with minimal/empty callback_kwargs.""" + mock_get_client.return_value = mock_api + + mock_async_result = MagicMock() + mock_async_result.result = None + + with patch(_PATCH_ASYNC_RESULT, return_value=mock_async_result): + self._call("failed-task-101", {}) + + mock_emit_ws.assert_called_once() + ws_data = mock_emit_ws.call_args[1]["data"] + assert ws_data["operation"] == "fetch_response" + assert ws_data["prompt_ids"] == [] + assert ws_data["document_id"] == "" + + @patch(_PATCH_EMIT_WS) + @patch(_PATCH_GET_CLIENT) + def test_exception_does_not_crash(self, mock_get_client, mock_emit_ws, mock_api, base_prompt_kwargs): + """Exception in callback body is caught and logged, not re-raised.""" + mock_get_client.return_value = mock_api + mock_emit_ws.side_effect = RuntimeError("ws broken") + + mock_async_result = MagicMock() + mock_async_result.result = None + + with patch(_PATCH_ASYNC_RESULT, return_value=mock_async_result): + # Should not raise + self._call("failed-task-102", base_prompt_kwargs) + + +# --------------------------------------------------------------------------- +# TestEmitWebSocket (integration of _emit_websocket helper) +# --------------------------------------------------------------------------- + + +class TestEmitWebSocket: + """Test the _emit_websocket helper sends correct payload shape.""" + + @patch(_PATCH_GET_CLIENT) + def test_websocket_payload_not_double_wrapped(self, mock_get_client, mock_api): + """Verify Fix 1: data is NOT double-wrapped in {"data": {"data": ...}}.""" + mock_get_client.return_value = mock_api + + from ide_callback.tasks import _emit_websocket + + test_data = {"task_id": "t1", "status": "completed"} + _emit_websocket(mock_api, room="room-1", event="test_event", data=test_data) + + mock_api.post.assert_called_once() + payload = mock_api.post.call_args[1]["data"] + assert payload == { + "room": "room-1", + "event": "test_event", + "data": {"task_id": "t1", "status": "completed"}, + } + # The payload["data"] should be the raw data, NOT {"data": data} + assert "data" not in payload["data"] or payload["data"] != {"data": test_data} + + @patch(_PATCH_GET_CLIENT) + def test_websocket_post_failure_does_not_raise(self, mock_get_client, mock_api): + """_emit_websocket catches exceptions and logs.""" + mock_get_client.return_value = mock_api + mock_api.post.side_effect = RuntimeError("connection refused") + + from ide_callback.tasks import _emit_websocket + + # Should not raise + _emit_websocket(mock_api, room="room-1", event="test_event", data={}) + + +# --------------------------------------------------------------------------- +# TestJsonSafe +# --------------------------------------------------------------------------- + + +class TestJsonSafe: + """Test _json_safe serialization of non-standard types.""" + + def test_uuid_serialized(self): + import uuid + + from ide_callback.tasks import _json_safe + + val = {"id": uuid.UUID("12345678-1234-5678-1234-567812345678")} + result = _json_safe(val) + assert result["id"] == "12345678-1234-5678-1234-567812345678" + assert isinstance(result["id"], str) + + def test_datetime_serialized(self): + from datetime import datetime + + from ide_callback.tasks import _json_safe + + val = {"ts": datetime(2024, 1, 15, 12, 0, 0)} + result = _json_safe(val) + assert isinstance(result["ts"], str) + assert "2024-01-15" in result["ts"] + + def test_nested_types(self): + import uuid + from datetime import date + + from ide_callback.tasks import _json_safe + + val = { + "items": [ + {"id": uuid.uuid4(), "date": date(2024, 6, 1)}, + ] + } + result = _json_safe(val) + assert isinstance(result["items"][0]["id"], str) + assert isinstance(result["items"][0]["date"], str) diff --git a/workers/tests/test_legacy_executor_extract.py b/workers/tests/test_legacy_executor_extract.py new file mode 100644 index 0000000000..0711d2255a --- /dev/null +++ b/workers/tests/test_legacy_executor_extract.py @@ -0,0 +1,594 @@ +"""Phase 2B — LegacyExecutor._handle_extract tests. + +Verifies: +1. Happy path: extraction returns success with extracted_text +2. With highlight (LLMWhisperer): enable_highlight passed through +3. Without highlight (non-Whisperer): enable_highlight NOT passed +4. AdapterError → failure result +5. Missing required params → failure result +6. Metadata update for tool source: ToolUtils.dump_json called +7. IDE source skips metadata writing +8. FileUtils routing: correct storage type for ide vs tool +9. Orchestrator integration: extract returns success (mocked) +10. Celery eager-mode: full task chain returns extraction result +11. LegacyExecutorError caught by execute() → failure result +""" + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from executor.executors.constants import ( + FileStorageKeys, + IndexingConstants as IKeys, +) +from executor.executors.exceptions import LegacyExecutorError +from unstract.sdk1.adapters.x2text.constants import X2TextConstants +from unstract.sdk1.adapters.x2text.dto import ( + TextExtractionMetadata, + TextExtractionResult, +) +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +@pytest.fixture(autouse=True) +def _clean_registry(): + """Ensure a clean executor registry for every test.""" + ExecutorRegistry.clear() + yield + ExecutorRegistry.clear() + + +def _register_legacy(): + from executor.executors.legacy_executor import LegacyExecutor # noqa: F401 + + ExecutorRegistry.register(LegacyExecutor) + + +def _make_context(**overrides): + defaults = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-2b-001", + "execution_source": "tool", + "organization_id": "org-test", + "request_id": "req-2b-001", + "executor_params": { + "x2text_instance_id": "x2t-001", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-test-key", + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _mock_process_response(extracted_text="hello world", whisper_hash="hash-123"): + """Build a mock TextExtractionResult.""" + metadata = TextExtractionMetadata(whisper_hash=whisper_hash) + return TextExtractionResult( + extracted_text=extracted_text, + extraction_metadata=metadata, + ) + + +# --- 1. Happy path --- + + +class TestHappyPath: + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_extract_returns_success(self, mock_x2text_cls, mock_get_fs): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response("hello") + mock_x2text.x2text_instance = MagicMock() # not a Whisperer + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context() + result = executor.execute(ctx) + + assert result.success is True + assert result.data[IKeys.EXTRACTED_TEXT] == "hello" + + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_extract_passes_correct_params_to_x2text( + self, mock_x2text_cls, mock_get_fs + ): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response() + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context( + executor_params={ + "x2text_instance_id": "x2t-002", + "file_path": "/data/doc.pdf", + "platform_api_key": "sk-key", + "usage_kwargs": {"org": "test-org"}, + } + ) + executor.execute(ctx) + + mock_x2text_cls.assert_called_once() + call_kwargs = mock_x2text_cls.call_args + assert call_kwargs.kwargs.get("adapter_instance_id") == "x2t-002" or ( + call_kwargs.args + and len(call_kwargs.args) > 1 + and call_kwargs.args[1] == "x2t-002" + ) + + +# --- 2. With highlight (LLMWhisperer) --- + + +class TestWithHighlight: + @patch("executor.executors.legacy_executor.ToolUtils.dump_json") + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_highlight_with_whisperer_v2( + self, mock_x2text_cls, mock_get_fs, mock_dump + ): + from unstract.sdk1.adapters.x2text.llm_whisperer_v2.src import LLMWhispererV2 + + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response() + # Make isinstance check pass for LLMWhispererV2 + mock_x2text.x2text_instance = MagicMock(spec=LLMWhispererV2) + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context( + executor_params={ + "x2text_instance_id": "x2t-whisperer", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-key", + "enable_highlight": True, + "execution_data_dir": "/data/run", + "tool_execution_metadata": {}, + } + ) + result = executor.execute(ctx) + + assert result.success is True + # Verify enable_highlight was passed to process() + mock_x2text.process.assert_called_once() + call_kwargs = mock_x2text.process.call_args.kwargs + assert call_kwargs.get("enable_highlight") is True + + @patch("executor.executors.legacy_executor.ToolUtils.dump_json") + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_highlight_with_whisperer_v1( + self, mock_x2text_cls, mock_get_fs, mock_dump + ): + from unstract.sdk1.adapters.x2text.llm_whisperer.src import LLMWhisperer + + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response() + mock_x2text.x2text_instance = MagicMock(spec=LLMWhisperer) + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context( + executor_params={ + "x2text_instance_id": "x2t-whisperer-v1", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-key", + "enable_highlight": True, + "execution_data_dir": "/data/run", + "tool_execution_metadata": {}, + } + ) + result = executor.execute(ctx) + + assert result.success is True + call_kwargs = mock_x2text.process.call_args.kwargs + assert call_kwargs.get("enable_highlight") is True + + +# --- 3. Without highlight (non-Whisperer) --- + + +class TestWithoutHighlight: + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_no_highlight_for_non_whisperer(self, mock_x2text_cls, mock_get_fs): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response() + # Generic adapter — not LLMWhisperer + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context( + executor_params={ + "x2text_instance_id": "x2t-generic", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-key", + "enable_highlight": True, # requested but adapter doesn't support it + } + ) + result = executor.execute(ctx) + + assert result.success is True + # enable_highlight should NOT be in process() call + call_kwargs = mock_x2text.process.call_args.kwargs + assert "enable_highlight" not in call_kwargs + + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_highlight_false_skips_whisperer_branch( + self, mock_x2text_cls, mock_get_fs + ): + from unstract.sdk1.adapters.x2text.llm_whisperer_v2.src import LLMWhispererV2 + + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response() + mock_x2text.x2text_instance = MagicMock(spec=LLMWhispererV2) + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context( + executor_params={ + "x2text_instance_id": "x2t-whisperer", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-key", + "enable_highlight": False, # highlight disabled + } + ) + result = executor.execute(ctx) + + assert result.success is True + call_kwargs = mock_x2text.process.call_args.kwargs + assert "enable_highlight" not in call_kwargs + + +# --- 4. AdapterError → failure result --- + + +class TestAdapterError: + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_adapter_error_returns_failure(self, mock_x2text_cls, mock_get_fs): + from unstract.sdk1.adapters.exceptions import AdapterError + + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.x2text_instance = MagicMock() + mock_x2text.x2text_instance.get_name.return_value = "TestExtractor" + mock_x2text.process.side_effect = AdapterError("connection timeout") + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context() + result = executor.execute(ctx) + + assert result.success is False + assert "TestExtractor" in result.error + assert "connection timeout" in result.error + + +# --- 5. Missing required params --- + + +class TestMissingParams: + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_missing_x2text_instance_id(self, mock_x2text_cls, mock_get_fs): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + ctx = _make_context( + executor_params={ + "file_path": "/data/test.pdf", + "platform_api_key": "sk-key", + } + ) + result = executor.execute(ctx) + + assert result.success is False + assert "x2text_instance_id" in result.error + mock_x2text_cls.assert_not_called() + + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_missing_file_path(self, mock_x2text_cls, mock_get_fs): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + ctx = _make_context( + executor_params={ + "x2text_instance_id": "x2t-001", + "platform_api_key": "sk-key", + } + ) + result = executor.execute(ctx) + + assert result.success is False + assert "file_path" in result.error + mock_x2text_cls.assert_not_called() + + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_missing_both_params(self, mock_x2text_cls, mock_get_fs): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + ctx = _make_context(executor_params={"platform_api_key": "sk-key"}) + result = executor.execute(ctx) + + assert result.success is False + assert "x2text_instance_id" in result.error + assert "file_path" in result.error + + +# --- 6. Metadata update for tool source --- + + +class TestMetadataToolSource: + @patch("executor.executors.legacy_executor.ToolUtils.dump_json") + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_tool_source_writes_metadata( + self, mock_x2text_cls, mock_get_fs, mock_dump + ): + from unstract.sdk1.adapters.x2text.llm_whisperer_v2.src import LLMWhispererV2 + + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response( + whisper_hash="whash-456" + ) + mock_x2text.x2text_instance = MagicMock(spec=LLMWhispererV2) + mock_x2text_cls.return_value = mock_x2text + mock_fs = MagicMock() + mock_get_fs.return_value = mock_fs + + tool_meta = {} + ctx = _make_context( + execution_source="tool", + executor_params={ + "x2text_instance_id": "x2t-whisperer", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-key", + "enable_highlight": True, + "execution_data_dir": "/run/data", + "tool_execution_metadata": tool_meta, + }, + ) + result = executor.execute(ctx) + + assert result.success is True + # ToolUtils.dump_json should have been called + mock_dump.assert_called_once() + dump_kwargs = mock_dump.call_args.kwargs + assert dump_kwargs["file_to_dump"] == str( + Path("/run/data") / IKeys.METADATA_FILE + ) + assert dump_kwargs["json_to_dump"] == { + X2TextConstants.WHISPER_HASH: "whash-456" + } + assert dump_kwargs["fs"] is mock_fs + # tool_exec_metadata should be updated in-place + assert tool_meta[X2TextConstants.WHISPER_HASH] == "whash-456" + + +# --- 7. IDE source skips metadata --- + + +class TestMetadataIDESource: + @patch("executor.executors.legacy_executor.ToolUtils.dump_json") + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_ide_source_skips_metadata( + self, mock_x2text_cls, mock_get_fs, mock_dump + ): + from unstract.sdk1.adapters.x2text.llm_whisperer_v2.src import LLMWhispererV2 + + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response() + mock_x2text.x2text_instance = MagicMock(spec=LLMWhispererV2) + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context( + execution_source="ide", + executor_params={ + "x2text_instance_id": "x2t-whisperer", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-key", + "enable_highlight": True, + }, + ) + result = executor.execute(ctx) + + assert result.success is True + mock_dump.assert_not_called() + + +# --- 8. FileUtils routing --- + + +class TestFileUtilsRouting: + @patch("executor.executors.file_utils.EnvHelper.get_storage") + def test_ide_returns_permanent_storage(self, mock_get_storage): + from executor.executors.file_utils import FileUtils + from unstract.sdk1.file_storage.constants import StorageType + + mock_get_storage.return_value = MagicMock() + FileUtils.get_fs_instance("ide") + + mock_get_storage.assert_called_once_with( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + + @patch("executor.executors.file_utils.EnvHelper.get_storage") + def test_tool_returns_temporary_storage(self, mock_get_storage): + from executor.executors.file_utils import FileUtils + from unstract.sdk1.file_storage.constants import StorageType + + mock_get_storage.return_value = MagicMock() + FileUtils.get_fs_instance("tool") + + mock_get_storage.assert_called_once_with( + storage_type=StorageType.SHARED_TEMPORARY, + env_name=FileStorageKeys.TEMPORARY_REMOTE_STORAGE, + ) + + def test_invalid_source_raises_value_error(self): + from executor.executors.file_utils import FileUtils + + with pytest.raises(ValueError, match="Invalid execution source"): + FileUtils.get_fs_instance("unknown") + + +# --- 9. Orchestrator integration --- + + +class TestOrchestratorIntegration: + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_orchestrator_extract_returns_success( + self, mock_x2text_cls, mock_get_fs + ): + _register_legacy() + orchestrator = ExecutionOrchestrator() + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response("extracted!") + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context() + result = orchestrator.execute(ctx) + + assert result.success is True + assert result.data[IKeys.EXTRACTED_TEXT] == "extracted!" + + +# --- 10. Celery eager-mode --- + + +@pytest.fixture +def eager_app(): + """Configure the real executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + yield app + app.conf.update(original) + + +class TestCeleryEager: + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_eager_extract_returns_success( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + _register_legacy() + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response("celery text") + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context() + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data[IKeys.EXTRACTED_TEXT] == "celery text" + + +# --- 11. LegacyExecutorError caught by execute() --- + + +class TestExecuteErrorCatching: + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_extraction_error_caught_by_execute( + self, mock_x2text_cls, mock_get_fs + ): + """ExtractionError (a LegacyExecutorError) is caught in execute() + and mapped to ExecutionResult.failure().""" + from unstract.sdk1.adapters.exceptions import AdapterError + + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.x2text_instance = MagicMock() + mock_x2text.x2text_instance.get_name.return_value = "BadExtractor" + mock_x2text.process.side_effect = AdapterError("timeout") + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context() + result = executor.execute(ctx) + + # Should be a clean failure, NOT an unhandled exception + assert result.success is False + assert "BadExtractor" in result.error + assert "timeout" in result.error + + def test_legacy_executor_error_subclass_caught(self): + """Any LegacyExecutorError subclass raised by a handler is caught.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + # Monkey-patch _handle_extract to raise a LegacyExecutorError + def _raise_err(ctx): + raise LegacyExecutorError(message="custom error", code=422) + + executor._handle_extract = _raise_err + + ctx = _make_context() + result = executor.execute(ctx) + + assert result.success is False + assert result.error == "custom error" diff --git a/workers/tests/test_legacy_executor_index.py b/workers/tests/test_legacy_executor_index.py new file mode 100644 index 0000000000..d87d5b5b97 --- /dev/null +++ b/workers/tests/test_legacy_executor_index.py @@ -0,0 +1,453 @@ +"""Phase 2C — LegacyExecutor._handle_index tests. + +Verifies: +1. Happy path: indexing returns success with doc_id +2. Chunk size 0: skips indexing, still returns doc_id +3. Missing required params → failure result +4. Reindex flag: passes reindex through to Index +5. VectorDB.close() always called (even on error) +6. Indexing error → LegacyExecutorError → failure result +7. Orchestrator integration: index returns success (mocked) +8. Celery eager-mode: full task chain returns indexing result +9. Index class: generate_index_key called with correct DTOs +10. EmbeddingCompat and VectorDB created with correct params + +Heavy SDK1 dependencies (llama_index, qdrant) are lazily imported +via ``LegacyExecutor._get_indexing_deps()``. We mock that method +to avoid protobuf conflicts in the test environment. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from executor.executors.constants import IndexingConstants as IKeys +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +@pytest.fixture(autouse=True) +def _clean_registry(): + ExecutorRegistry.clear() + yield + ExecutorRegistry.clear() + + +def _register_legacy(): + from executor.executors.legacy_executor import LegacyExecutor # noqa: F401 + + ExecutorRegistry.register(LegacyExecutor) + + +def _make_index_context(**overrides): + defaults = { + "executor_name": "legacy", + "operation": "index", + "run_id": "run-2c-001", + "execution_source": "tool", + "organization_id": "org-test", + "request_id": "req-2c-001", + "executor_params": { + "embedding_instance_id": "emb-001", + "vector_db_instance_id": "vdb-001", + "x2text_instance_id": "x2t-001", + "file_path": "/data/test.pdf", + "file_hash": "abc123", + "extracted_text": "Hello world", + "platform_api_key": "sk-test", + "chunk_size": 512, + "chunk_overlap": 128, + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +_PATCH_FS = "executor.executors.legacy_executor.FileUtils.get_fs_instance" +_PATCH_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_indexing_deps" +) + + +@pytest.fixture +def mock_indexing_deps(): + """Mock the heavy indexing dependencies via _get_indexing_deps().""" + mock_index_cls = MagicMock() + mock_emb_cls = MagicMock() + mock_vdb_cls = MagicMock() + + with patch(_PATCH_DEPS, return_value=(mock_index_cls, mock_emb_cls, mock_vdb_cls)): + yield mock_index_cls, mock_emb_cls, mock_vdb_cls + + +def _setup_mock_index(mock_index_cls, doc_id="doc-hash-123"): + """Configure a mock Index instance.""" + mock_index = MagicMock() + mock_index.generate_index_key.return_value = doc_id + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.return_value = doc_id + mock_index_cls.return_value = mock_index + return mock_index + + +# --- 1. Happy path --- + + +class TestHappyPath: + @patch(_PATCH_FS) + def test_index_returns_success_with_doc_id( + self, mock_get_fs, mock_indexing_deps + ): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + _setup_mock_index(mock_index_cls, "doc-hash-123") + mock_emb_cls.return_value = MagicMock() + mock_vdb = MagicMock() + mock_vdb_cls.return_value = mock_vdb + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context() + result = executor.execute(ctx) + + assert result.success is True + assert result.data[IKeys.DOC_ID] == "doc-hash-123" + mock_vdb.close.assert_called_once() + + +# --- 2. Chunk size 0: skips indexing --- + + +class TestChunkSizeZero: + @patch( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-zero-chunk", + ) + @patch(_PATCH_FS) + def test_chunk_size_zero_skips_indexing(self, mock_get_fs, mock_gen_key): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context( + executor_params={ + "embedding_instance_id": "emb-001", + "vector_db_instance_id": "vdb-001", + "x2text_instance_id": "x2t-001", + "file_path": "/data/test.pdf", + "file_hash": "abc123", + "extracted_text": "text", + "platform_api_key": "sk-test", + "chunk_size": 0, + "chunk_overlap": 0, + } + ) + result = executor.execute(ctx) + + assert result.success is True + assert result.data[IKeys.DOC_ID] == "doc-zero-chunk" + mock_gen_key.assert_called_once() + + +# --- 3. Missing required params --- + + +class TestMissingParams: + def test_missing_embedding_instance_id(self): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + ctx = _make_index_context( + executor_params={ + "vector_db_instance_id": "vdb-001", + "x2text_instance_id": "x2t-001", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-test", + } + ) + result = executor.execute(ctx) + assert result.success is False + assert "embedding_instance_id" in result.error + + def test_missing_multiple_params(self): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + ctx = _make_index_context( + executor_params={"platform_api_key": "sk-test"} + ) + result = executor.execute(ctx) + assert result.success is False + assert "embedding_instance_id" in result.error + assert "vector_db_instance_id" in result.error + assert "x2text_instance_id" in result.error + assert "file_path" in result.error + + +# --- 4. Reindex flag --- + + +class TestReindex: + @patch(_PATCH_FS) + def test_reindex_passed_through(self, mock_get_fs, mock_indexing_deps): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + _setup_mock_index(mock_index_cls, "doc-reindex") + mock_index_cls.return_value.is_document_indexed.return_value = True + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls.return_value = MagicMock() + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context( + executor_params={ + "embedding_instance_id": "emb-001", + "vector_db_instance_id": "vdb-001", + "x2text_instance_id": "x2t-001", + "file_path": "/data/test.pdf", + "file_hash": "abc123", + "extracted_text": "text", + "platform_api_key": "sk-test", + "chunk_size": 512, + "chunk_overlap": 128, + "reindex": True, + } + ) + result = executor.execute(ctx) + + assert result.success is True + init_call = mock_index_cls.call_args + assert init_call.kwargs["processing_options"].reindex is True + + +# --- 5. VectorDB.close() always called --- + + +class TestVectorDBClose: + @patch(_PATCH_FS) + def test_vectordb_closed_on_success(self, mock_get_fs, mock_indexing_deps): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + _setup_mock_index(mock_index_cls) + mock_emb_cls.return_value = MagicMock() + mock_vdb = MagicMock() + mock_vdb_cls.return_value = mock_vdb + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context() + executor.execute(ctx) + mock_vdb.close.assert_called_once() + + @patch(_PATCH_FS) + def test_vectordb_closed_on_error(self, mock_get_fs, mock_indexing_deps): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_index = _setup_mock_index(mock_index_cls) + mock_index.is_document_indexed.side_effect = RuntimeError("boom") + mock_emb_cls.return_value = MagicMock() + mock_vdb = MagicMock() + mock_vdb_cls.return_value = mock_vdb + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context() + result = executor.execute(ctx) + + assert result.success is False + mock_vdb.close.assert_called_once() + + +# --- 6. Indexing error → failure result --- + + +class TestIndexingError: + @patch(_PATCH_FS) + def test_indexing_error_returns_failure( + self, mock_get_fs, mock_indexing_deps + ): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_index = _setup_mock_index(mock_index_cls, "doc-err") + mock_index.perform_indexing.side_effect = RuntimeError( + "vector DB unavailable" + ) + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls.return_value = MagicMock() + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context() + result = executor.execute(ctx) + + assert result.success is False + assert "indexing" in result.error.lower() + assert "vector DB unavailable" in result.error + + +# --- 7. Orchestrator integration --- + + +class TestOrchestratorIntegration: + @patch(_PATCH_FS) + def test_orchestrator_index_returns_success( + self, mock_get_fs, mock_indexing_deps + ): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + orchestrator = ExecutionOrchestrator() + + _setup_mock_index(mock_index_cls, "doc-orch") + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls.return_value = MagicMock() + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context() + result = orchestrator.execute(ctx) + + assert result.success is True + assert result.data[IKeys.DOC_ID] == "doc-orch" + + +# --- 8. Celery eager-mode --- + + +@pytest.fixture +def eager_app(): + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + yield app + app.conf.update(original) + + +class TestCeleryEager: + @patch(_PATCH_FS) + def test_eager_index_returns_success( + self, mock_get_fs, mock_indexing_deps, eager_app + ): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + + _setup_mock_index(mock_index_cls, "doc-celery") + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls.return_value = MagicMock() + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context() + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data[IKeys.DOC_ID] == "doc-celery" + + +# --- 9. Index class receives correct DTOs --- + + +class TestIndexDTOs: + @patch(_PATCH_FS) + def test_index_created_with_correct_dtos( + self, mock_get_fs, mock_indexing_deps + ): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + _setup_mock_index(mock_index_cls, "doc-dto") + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls.return_value = MagicMock() + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context( + executor_params={ + "embedding_instance_id": "emb-dto", + "vector_db_instance_id": "vdb-dto", + "x2text_instance_id": "x2t-dto", + "file_path": "/data/doc.pdf", + "file_hash": "hash-dto", + "extracted_text": "text", + "platform_api_key": "sk-test", + "chunk_size": 256, + "chunk_overlap": 64, + "tool_id": "tool-dto", + "tags": ["tag1"], + } + ) + executor.execute(ctx) + + init_kwargs = mock_index_cls.call_args.kwargs + ids = init_kwargs["instance_identifiers"] + assert ids.embedding_instance_id == "emb-dto" + assert ids.vector_db_instance_id == "vdb-dto" + assert ids.x2text_instance_id == "x2t-dto" + assert ids.tool_id == "tool-dto" + assert ids.tags == ["tag1"] + + chunking = init_kwargs["chunking_config"] + assert chunking.chunk_size == 256 + assert chunking.chunk_overlap == 64 + + gen_call = mock_index_cls.return_value.generate_index_key.call_args + fi = gen_call.kwargs["file_info"] + assert fi.file_path == "/data/doc.pdf" + assert fi.file_hash == "hash-dto" + + +# --- 10. EmbeddingCompat and VectorDB created with correct params --- + + +class TestAdapterCreation: + @patch(_PATCH_FS) + def test_embedding_and_vectordb_params( + self, mock_get_fs, mock_indexing_deps + ): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + _setup_mock_index(mock_index_cls, "doc-adapt") + mock_emb = MagicMock() + mock_emb_cls.return_value = mock_emb + mock_vdb = MagicMock() + mock_vdb_cls.return_value = mock_vdb + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context( + executor_params={ + "embedding_instance_id": "emb-check", + "vector_db_instance_id": "vdb-check", + "x2text_instance_id": "x2t-001", + "file_path": "/data/test.pdf", + "file_hash": "abc", + "extracted_text": "text", + "platform_api_key": "sk-test", + "chunk_size": 512, + "chunk_overlap": 128, + "usage_kwargs": {"org": "test-org"}, + } + ) + executor.execute(ctx) + + emb_call = mock_emb_cls.call_args + assert emb_call.kwargs["adapter_instance_id"] == "emb-check" + assert emb_call.kwargs["kwargs"] == {"org": "test-org"} + + vdb_call = mock_vdb_cls.call_args + assert vdb_call.kwargs["adapter_instance_id"] == "vdb-check" + assert vdb_call.kwargs["embedding"] is mock_emb diff --git a/workers/tests/test_legacy_executor_scaffold.py b/workers/tests/test_legacy_executor_scaffold.py new file mode 100644 index 0000000000..48789c218d --- /dev/null +++ b/workers/tests/test_legacy_executor_scaffold.py @@ -0,0 +1,306 @@ +"""Phase 2A — LegacyExecutor scaffold tests. + +Verifies: +1. Registration in ExecutorRegistry +2. Name property +3. Unsupported operation handling +4. Each operation raises NotImplementedError +5. Orchestrator wraps NotImplementedError as failure +6. Celery eager-mode chain +7. Dispatch table coverage (every Operation has a handler) +8. Constants importable +9. DTOs importable +10. Exceptions standalone (no Flask dependency) +""" + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +@pytest.fixture(autouse=True) +def _clean_registry(): + """Ensure a clean executor registry for every test.""" + ExecutorRegistry.clear() + yield + ExecutorRegistry.clear() + + +def _register_legacy(): + """Import executor.executors to trigger LegacyExecutor registration.""" + from executor.executors.legacy_executor import LegacyExecutor # noqa: F401 + + ExecutorRegistry.register(LegacyExecutor) + + +def _make_context(**overrides): + defaults = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-2a-001", + "execution_source": "tool", + "organization_id": "org-test", + "request_id": "req-2a-001", + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +# --- 1. Registration --- + + +class TestRegistration: + def test_legacy_in_registry(self): + _register_legacy() + assert "legacy" in ExecutorRegistry.list_executors() + + +# --- 2. Name --- + + +class TestName: + def test_name_is_legacy(self): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + assert executor.name == "legacy" + + +# --- 3. Unsupported operation --- + + +class TestUnsupportedOperation: + def test_unsupported_operation_returns_failure(self): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + ctx = _make_context(operation="totally_unknown_op") + result = executor.execute(ctx) + + assert result.success is False + assert "does not support operation" in result.error + assert "totally_unknown_op" in result.error + + +# --- 4. All operations are implemented (no stubs remain) --- +# TestHandlerStubs and TestOrchestratorWrapping removed: +# All operations (extract, index, answer_prompt, single_pass_extraction, +# summarize) are now fully implemented. Agentic operations moved to +# AgenticPromptStudioExecutor (cloud plugin). + + +# --- 6. Celery eager-mode chain --- + + +@pytest.fixture +def eager_app(): + """Configure the real executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + + yield app + + app.conf.update(original) + + +class TestCeleryEagerChain: + def test_eager_unsupported_op_returns_failure(self, eager_app): + """execute_extraction with an unsupported operation returns failure.""" + _register_legacy() + + ctx = _make_context(operation="totally_unknown_op") + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "does not support operation" in result.error + + +# --- 7. Dispatch table coverage --- + + +class TestDispatchTableCoverage: + def test_every_operation_has_handler(self): + """Every Operation handled by LegacyExecutor is in _OPERATION_MAP. + + Operations handled by cloud executors (discovered via entry points) + are excluded — they have their own executor classes. + """ + from executor.executors.legacy_executor import LegacyExecutor + + # Operations handled by cloud executors, not LegacyExecutor + cloud_executor_operations = { + "table_extract", # TableExtractorExecutor + "smart_table_extract", # SmartTableExtractorExecutor + "sps_answer_prompt", # SimplePromptStudioExecutor + "sps_index", # SimplePromptStudioExecutor + "agentic_extract", # AgenticPromptStudioExecutor + "agentic_summarize", # AgenticPromptStudioExecutor + "agentic_uniformize", # AgenticPromptStudioExecutor + "agentic_finalize", # AgenticPromptStudioExecutor + "agentic_generate_prompt", # AgenticPromptStudioExecutor + "agentic_generate_prompt_pipeline", # AgenticPromptStudioExecutor + "agentic_compare", # AgenticPromptStudioExecutor + "agentic_tune_field", # AgenticPromptStudioExecutor + } + + for op in Operation: + if op.value in cloud_executor_operations: + continue + assert op.value in LegacyExecutor._OPERATION_MAP, ( + f"Operation {op.value} missing from _OPERATION_MAP" + ) + + +# --- 8. Constants importable --- + + +class TestConstants: + def test_prompt_service_constants(self): + from executor.executors.constants import PromptServiceConstants + + assert hasattr(PromptServiceConstants, "TOOL_ID") + assert PromptServiceConstants.TOOL_ID == "tool_id" + + def test_retrieval_strategy(self): + from executor.executors.constants import RetrievalStrategy + + assert RetrievalStrategy.SIMPLE.value == "simple" + assert RetrievalStrategy.SUBQUESTION.value == "subquestion" + + def test_run_level(self): + from executor.executors.constants import RunLevel + + assert RunLevel.RUN.value == "RUN" + assert RunLevel.EVAL.value == "EVAL" + + +# --- 9. DTOs importable --- + + +class TestDTOs: + def test_chunking_config(self): + from executor.executors.dto import ChunkingConfig + + cfg = ChunkingConfig(chunk_size=512, chunk_overlap=64) + assert cfg.chunk_size == 512 + + def test_chunking_config_zero_raises(self): + from executor.executors.dto import ChunkingConfig + + with pytest.raises(ValueError, match="zero chunks"): + ChunkingConfig(chunk_size=0, chunk_overlap=0) + + def test_file_info(self, tmp_path): + from executor.executors.dto import FileInfo + + test_path = str(tmp_path / "test.pdf") + fi = FileInfo(file_path=test_path, file_hash="abc123") + assert fi.file_path == test_path + + def test_instance_identifiers(self): + from executor.executors.dto import InstanceIdentifiers + + ids = InstanceIdentifiers( + embedding_instance_id="emb-1", + vector_db_instance_id="vdb-1", + x2text_instance_id="x2t-1", + llm_instance_id="llm-1", + tool_id="tool-1", + ) + assert ids.tool_id == "tool-1" + + def test_processing_options(self): + from executor.executors.dto import ProcessingOptions + + opts = ProcessingOptions(reindex=True) + assert opts.reindex is True + assert opts.enable_highlight is False + + +# --- 10. Exceptions standalone --- + + +class TestExceptions: + def test_legacy_executor_error_has_code_and_message(self): + from executor.executors.exceptions import LegacyExecutorError + + err = LegacyExecutorError(message="test error", code=418) + assert err.message == "test error" + assert err.code == 418 + assert str(err) == "test error" + + def test_extraction_error_has_code_and_message(self): + from executor.executors.exceptions import ExtractionError + + err = ExtractionError(message="extraction failed", code=500) + assert err.message == "extraction failed" + assert err.code == 500 + + def test_no_flask_import(self): + """Verify exceptions module does NOT import Flask.""" + import importlib + import sys + + # Ensure fresh import + mod_name = "executor.executors.exceptions" + if mod_name in sys.modules: + importlib.reload(sys.modules[mod_name]) + else: + importlib.import_module(mod_name) + + # Check that no flask modules were pulled in + flask_modules = [m for m in sys.modules if m.startswith("flask")] + assert flask_modules == [], ( + f"Flask modules imported: {flask_modules}" + ) + + def test_custom_data_error_signature(self): + from executor.executors.exceptions import CustomDataError + + err = CustomDataError( + variable="invoice_num", reason="not found", is_ide=True + ) + assert "invoice_num" in err.message + assert "not found" in err.message + assert "Prompt Studio" in err.message + + def test_custom_data_error_tool_mode(self): + from executor.executors.exceptions import CustomDataError + + err = CustomDataError( + variable="order_id", reason="missing", is_ide=False + ) + assert "API request" in err.message + + def test_missing_field_error(self): + from executor.executors.exceptions import MissingFieldError + + err = MissingFieldError(missing_fields=["tool_id", "file_path"]) + assert "tool_id" in err.message + assert "file_path" in err.message + + def test_bad_request_defaults(self): + from executor.executors.exceptions import BadRequest + + err = BadRequest() + assert err.code == 400 + assert "Bad Request" in err.message + + def test_rate_limit_error_defaults(self): + from executor.executors.exceptions import RateLimitError + + err = RateLimitError() + assert err.code == 429 diff --git a/workers/tests/test_phase1_log_streaming.py b/workers/tests/test_phase1_log_streaming.py new file mode 100644 index 0000000000..9c063e19de --- /dev/null +++ b/workers/tests/test_phase1_log_streaming.py @@ -0,0 +1,489 @@ +"""Phase 1 — Executor log streaming to frontend via Socket.IO. + +Tests cover: +- ExecutionContext round-trips log_events_id through to_dict/from_dict +- LogPublisher.log_progress() returns type: "PROGRESS" (not "LOG") +- LogPublisher.log_prompt() still returns type: "LOG" (unchanged) +- ExecutorToolShim with log_events_id: stream_log() publishes progress +- ExecutorToolShim without log_events_id: no publishing, no exceptions +- ExecutorToolShim with failing LogPublisher: no exception raised +- execute_extraction builds component dict when log_events_id present +- execute_extraction skips component dict when log_events_id absent +""" + +from unittest.mock import MagicMock, patch + + +from unstract.sdk1.constants import LogLevel +from unstract.sdk1.execution.context import ExecutionContext + + +# --------------------------------------------------------------------------- +# 1A — ExecutionContext.log_events_id round-trip +# --------------------------------------------------------------------------- + + +class TestExecutionContextLogEventsId: + """Verify log_events_id serialization in ExecutionContext.""" + + def test_log_events_id_default_is_none(self): + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="r1", + execution_source="ide", + ) + assert ctx.log_events_id is None + + def test_log_events_id_round_trips(self): + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="r1", + execution_source="ide", + log_events_id="session-abc", + ) + d = ctx.to_dict() + assert d["log_events_id"] == "session-abc" + + restored = ExecutionContext.from_dict(d) + assert restored.log_events_id == "session-abc" + + def test_log_events_id_none_round_trips(self): + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="r1", + execution_source="ide", + ) + d = ctx.to_dict() + assert d["log_events_id"] is None + + restored = ExecutionContext.from_dict(d) + assert restored.log_events_id is None + + def test_backward_compat_missing_key(self): + """from_dict with old payload lacking log_events_id.""" + old_payload = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "r1", + "execution_source": "ide", + } + ctx = ExecutionContext.from_dict(old_payload) + assert ctx.log_events_id is None + + +# --------------------------------------------------------------------------- +# 1B-i — LogPublisher.log_progress() vs log_prompt() +# --------------------------------------------------------------------------- + + +class TestLogPublisherLogProgress: + """Verify log_progress returns type PROGRESS, log_prompt returns LOG.""" + + def test_log_progress_type(self): + from unstract.core.pubsub_helper import LogPublisher + + result = LogPublisher.log_progress( + component={"tool_id": "t1"}, + level="INFO", + state="TOOL_RUN", + message="Extracting text...", + ) + assert result["type"] == "PROGRESS" + assert result["service"] == "prompt" + assert result["message"] == "Extracting text..." + assert result["component"] == {"tool_id": "t1"} + assert "timestamp" in result + + def test_log_prompt_type_unchanged(self): + from unstract.core.pubsub_helper import LogPublisher + + result = LogPublisher.log_prompt( + component={"tool_id": "t1"}, + level="INFO", + state="RUNNING", + message="test", + ) + assert result["type"] == "LOG" + assert result["service"] == "prompt" + + def test_log_progress_has_all_fields(self): + from unstract.core.pubsub_helper import LogPublisher + + result = LogPublisher.log_progress( + component={"tool_id": "t1", "prompt_key": "pk"}, + level="ERROR", + state="FAILED", + message="boom", + ) + assert result["level"] == "ERROR" + assert result["state"] == "FAILED" + assert result["component"]["prompt_key"] == "pk" + + +# --------------------------------------------------------------------------- +# 1B-ii — ExecutorToolShim progress publishing +# --------------------------------------------------------------------------- + + +class TestExecutorToolShimProgress: + """Verify ExecutorToolShim publishes progress via LogPublisher.""" + + @patch("executor.executor_tool_shim.LogPublisher") + def test_stream_log_publishes_when_log_events_id_set(self, mock_lp): + from executor.executor_tool_shim import ExecutorToolShim + + component = {"tool_id": "t1", "run_id": "r1"} + shim = ExecutorToolShim( + platform_api_key="sk-test", + log_events_id="session-xyz", + component=component, + ) + shim.stream_log("Extracting...", level=LogLevel.INFO) + + mock_lp.log_progress.assert_called_once_with( + component=component, + level="INFO", + state="TOOL_RUN", + message="Extracting...", + ) + mock_lp.publish.assert_called_once_with( + channel_id="session-xyz", + payload=mock_lp.log_progress.return_value, + ) + + @patch("executor.executor_tool_shim.LogPublisher") + def test_stream_log_no_publish_without_log_events_id(self, mock_lp): + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim(platform_api_key="sk-test") + shim.stream_log("Hello", level=LogLevel.INFO) + + mock_lp.log_progress.assert_not_called() + mock_lp.publish.assert_not_called() + + @patch("executor.executor_tool_shim.LogPublisher") + def test_stream_log_empty_log_events_id_no_publish(self, mock_lp): + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim( + platform_api_key="sk-test", log_events_id="" + ) + shim.stream_log("Hello", level=LogLevel.INFO) + + mock_lp.log_progress.assert_not_called() + + @patch("executor.executor_tool_shim.LogPublisher") + def test_stream_log_swallows_publish_error(self, mock_lp): + from executor.executor_tool_shim import ExecutorToolShim + + mock_lp.publish.side_effect = ConnectionError("AMQP down") + shim = ExecutorToolShim( + platform_api_key="sk-test", + log_events_id="session-xyz", + component={"tool_id": "t1"}, + ) + # Should NOT raise + shim.stream_log("test", level=LogLevel.INFO) + + @patch("executor.executor_tool_shim.LogPublisher") + def test_level_mapping(self, mock_lp): + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim( + platform_api_key="sk-test", + log_events_id="s1", + component={}, + ) + + # DEBUG is below the shim's log_level (INFO) so it should NOT + # be published to the frontend. + shim.stream_log("msg", level=LogLevel.DEBUG) + assert not mock_lp.log_progress.called, ( + "DEBUG should be filtered out (below INFO threshold)" + ) + + # INFO and above should be published with the correct mapped level. + published_cases = [ + (LogLevel.INFO, "INFO"), + (LogLevel.WARN, "WARN"), + (LogLevel.ERROR, "ERROR"), + (LogLevel.FATAL, "ERROR"), + ] + for sdk_level, expected_wf_level in published_cases: + mock_lp.reset_mock() + shim.stream_log("msg", level=sdk_level) + call_kwargs = mock_lp.log_progress.call_args + assert call_kwargs.kwargs["level"] == expected_wf_level, ( + f"SDK {sdk_level} should map to {expected_wf_level}" + ) + + @patch("executor.executor_tool_shim.LogPublisher") + def test_custom_stage_passed_through(self, mock_lp): + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim( + platform_api_key="sk-test", + log_events_id="s1", + component={}, + ) + shim.stream_log("msg", level=LogLevel.INFO, stage="INDEXING") + call_kwargs = mock_lp.log_progress.call_args + assert call_kwargs.kwargs["state"] == "INDEXING" + + +# --------------------------------------------------------------------------- +# 1C — Component dict building in execute_extraction +# --------------------------------------------------------------------------- + + +class TestExecuteExtractionComponentDict: + """Verify component dict is built from executor_params.""" + + @patch("executor.tasks.ExecutionOrchestrator") + def test_component_dict_built_when_log_events_id_present( + self, mock_orch_cls + ): + mock_orch = MagicMock() + mock_orch.execute.return_value = MagicMock( + success=True, to_dict=lambda: {"success": True} + ) + mock_orch_cls.return_value = mock_orch + + from executor.tasks import execute_extraction + + payload = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "r1", + "execution_source": "ide", + "log_events_id": "session-abc", + "executor_params": { + "tool_id": "tool-123", + "file_name": "invoice.pdf", + }, + } + execute_extraction(payload) + + # Verify the context passed to orchestrator has _log_component + ctx = mock_orch.execute.call_args[0][0] + assert ctx._log_component == { + "tool_id": "tool-123", + "run_id": "r1", + "doc_name": "invoice.pdf", + "operation": "extract", + } + + @patch("executor.tasks.ExecutionOrchestrator") + def test_component_dict_empty_when_no_log_events_id( + self, mock_orch_cls + ): + mock_orch = MagicMock() + mock_orch.execute.return_value = MagicMock( + success=True, to_dict=lambda: {"success": True} + ) + mock_orch_cls.return_value = mock_orch + + from executor.tasks import execute_extraction + + payload = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "r1", + "execution_source": "ide", + "executor_params": {}, + } + execute_extraction(payload) + + ctx = mock_orch.execute.call_args[0][0] + assert ctx._log_component == {} + + +# --------------------------------------------------------------------------- +# 1D — LegacyExecutor passes log info to shim +# --------------------------------------------------------------------------- + + +class TestLegacyExecutorLogPassthrough: + """Verify LegacyExecutor passes log_events_id and component to shim.""" + + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_extract_passes_log_info_to_shim( + self, mock_shim_cls, mock_x2text, mock_fs, tmp_path + ): + from executor.executors.legacy_executor import LegacyExecutor + from unstract.sdk1.execution.registry import ExecutorRegistry + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry._registry["legacy"] = LegacyExecutor + + mock_shim = MagicMock() + mock_shim_cls.return_value = mock_shim + mock_x2t = MagicMock() + mock_x2t.process.return_value = MagicMock( + extracted_text="hello" + ) + mock_x2text.return_value = mock_x2t + + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="r1", + execution_source="ide", + log_events_id="session-abc", + executor_params={ + "x2text_instance_id": "x2t-1", + "file_path": str(tmp_path / "test.pdf"), + "platform_api_key": "sk-test", + }, + ) + ctx._log_component = {"tool_id": "t1", "run_id": "r1", "doc_name": "test.pdf"} + + executor = LegacyExecutor() + result = executor.execute(ctx) + + assert result.success + mock_shim_cls.assert_called_once_with( + platform_api_key="sk-test", + log_events_id="session-abc", + component={"tool_id": "t1", "run_id": "r1", "doc_name": "test.pdf"}, + ) + + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_extract_no_log_info_when_absent( + self, mock_shim_cls, mock_x2text, mock_fs, tmp_path + ): + from executor.executors.legacy_executor import LegacyExecutor + from unstract.sdk1.execution.registry import ExecutorRegistry + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry._registry["legacy"] = LegacyExecutor + + mock_shim = MagicMock() + mock_shim_cls.return_value = mock_shim + mock_x2t = MagicMock() + mock_x2t.process.return_value = MagicMock( + extracted_text="hello" + ) + mock_x2text.return_value = mock_x2t + + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="r1", + execution_source="tool", + executor_params={ + "x2text_instance_id": "x2t-1", + "file_path": str(tmp_path / "test.pdf"), + "platform_api_key": "sk-test", + }, + ) + + executor = LegacyExecutor() + result = executor.execute(ctx) + + assert result.success + mock_shim_cls.assert_called_once_with( + platform_api_key="sk-test", + log_events_id="", + component={}, + ) + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_answer_prompt_enriches_component_with_prompt_key( + self, mock_shim_cls, mock_prompt_deps + ): + """Verify per-prompt shim includes prompt_key in component.""" + from executor.executors.legacy_executor import LegacyExecutor + from unstract.sdk1.execution.registry import ExecutorRegistry + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry._registry["legacy"] = LegacyExecutor + + mock_shim = MagicMock() + mock_shim_cls.return_value = mock_shim + + # Mock prompt deps + mock_answer_prompt_service = MagicMock() + mock_answer_prompt_service.extract_variable.return_value = "prompt text" + mock_retrieval_service = MagicMock() + mock_variable_replacement_service = MagicMock() + mock_variable_replacement_service.is_variables_present.return_value = ( + False + ) + mock_index = MagicMock() + mock_llm = MagicMock() + mock_embedding_compat = MagicMock() + mock_vector_db = MagicMock() + + mock_prompt_deps.return_value = ( + mock_answer_prompt_service, + mock_retrieval_service, + mock_variable_replacement_service, + mock_index, + mock_llm, + mock_embedding_compat, + mock_vector_db, + ) + + ctx = ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id="r1", + execution_source="ide", + log_events_id="session-abc", + executor_params={ + "tool_id": "t1", + "outputs": [ + { + "name": "invoice_number", + "prompt": "What is the invoice number?", + "chunk-size": 0, + "type": "text", + "retrieval-strategy": "simple", + "vector-db": "vdb1", + "embedding": "emb1", + "x2text_adapter": "x2t1", + "chunk-overlap": 0, + "llm": "llm1", + }, + ], + "tool_settings": {}, + "PLATFORM_SERVICE_API_KEY": "sk-test", + }, + ) + ctx._log_component = { + "tool_id": "t1", + "run_id": "r1", + "doc_name": "test.pdf", + } + + # Mock IndexingUtils + with patch( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1", + ): + executor = LegacyExecutor() + # The handler will try retrieval which we need to mock + mock_retrieval_service.retrieve_complete_context.return_value = [ + "context" + ] + mock_answer_prompt_service.construct_and_run_prompt.return_value = ( + "INV-001" + ) + + executor.execute(ctx) + + # Check that shim was created with prompt_key in component + shim_call = mock_shim_cls.call_args + assert shim_call.kwargs["component"]["prompt_key"] == "invoice_number" + assert shim_call.kwargs["log_events_id"] == "session-abc" diff --git a/workers/tests/test_phase2f.py b/workers/tests/test_phase2f.py new file mode 100644 index 0000000000..a5913367c1 --- /dev/null +++ b/workers/tests/test_phase2f.py @@ -0,0 +1,330 @@ +"""Phase 2F — single_pass_extraction, summarize, agentic operations tests. + +Verifies: +1. single_pass_extraction delegates to answer_prompt +2. summarize constructs prompt and calls LLM +3. summarize missing params return failure +4. summarize prompt includes prompt_keys +5. agentic operations rejected by LegacyExecutor (cloud executor handles them) +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +@pytest.fixture(autouse=True) +def _clean_registry(): + """Ensure a clean executor registry for every test.""" + ExecutorRegistry.clear() + yield + ExecutorRegistry.clear() + + +def _register_legacy(): + from executor.executors.legacy_executor import LegacyExecutor # noqa: F401 + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + + +def _make_context(**overrides): + defaults = { + "executor_name": "legacy", + "operation": "summarize", + "run_id": "run-2f-001", + "execution_source": "tool", + "organization_id": "org-test", + "request_id": "req-2f-001", + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +# --------------------------------------------------------------------------- +# 1. single_pass_extraction delegates to answer_prompt +# --------------------------------------------------------------------------- + + +class TestSinglePassExtraction: + def test_delegates_to_answer_prompt(self): + """single_pass_extraction calls _handle_answer_prompt internally.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + # Mock _handle_answer_prompt so we can verify delegation + expected_result = ExecutionResult( + success=True, + data={"output": {"field1": "value1"}, "metadata": {}, "metrics": {}}, + ) + executor._handle_answer_prompt = MagicMock(return_value=expected_result) + + ctx = _make_context(operation="single_pass_extraction") + result = executor.execute(ctx) + + assert result.success is True + assert result.data["output"]["field1"] == "value1" + executor._handle_answer_prompt.assert_called_once_with(ctx) + + def test_delegates_failure_too(self): + """Failures from answer_prompt propagate through single_pass.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + fail_result = ExecutionResult.failure(error="some error") + executor._handle_answer_prompt = MagicMock(return_value=fail_result) + + ctx = _make_context(operation="single_pass_extraction") + result = executor.execute(ctx) + + assert result.success is False + assert "some error" in result.error + + +# --------------------------------------------------------------------------- +# 2. summarize +# --------------------------------------------------------------------------- + + +def _make_summarize_params(**overrides): + """Build executor_params for summarize operation.""" + defaults = { + "llm_adapter_instance_id": "llm-001", + "summarize_prompt": "Summarize the following document.", + "context": "This is a long document with lots of content.", + "prompt_keys": ["invoice_number", "total_amount"], + "PLATFORM_SERVICE_API_KEY": "test-key", + } + defaults.update(overrides) + return defaults + + +class TestSummarize: + @patch("executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps") + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_summarize_success(self, mock_shim_cls, mock_get_deps): + """Successful summarize returns data with summary text.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + # Set up mock LLM + mock_llm_cls = MagicMock() + mock_llm = MagicMock() + mock_llm_cls.return_value = mock_llm + + mock_get_deps.return_value = ( + MagicMock(), # AnswerPromptService + MagicMock(), # RetrievalService + MagicMock(), # VariableReplacementService + MagicMock(), # Index + mock_llm_cls, # LLM + MagicMock(), # EmbeddingCompat + MagicMock(), # VectorDB + ) + + # Mock AnswerPromptService.run_completion + with patch( + "executor.executors.answer_prompt.AnswerPromptService.run_completion", + return_value="This is a summary of the document.", + ): + ctx = _make_context( + operation="summarize", + executor_params=_make_summarize_params(), + ) + result = executor.execute(ctx) + + assert result.success is True + assert result.data["data"] == "This is a summary of the document." + + @patch("executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps") + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_summarize_prompt_includes_keys(self, mock_shim_cls, mock_get_deps): + """The summarize prompt includes prompt_keys.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_llm_cls = MagicMock() + mock_llm = MagicMock() + mock_llm_cls.return_value = mock_llm + + mock_get_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm_cls, MagicMock(), MagicMock(), + ) + + captured_prompt = {} + + def capture_run_completion(llm, prompt, **kwargs): + captured_prompt["value"] = prompt + return "summary" + + with patch( + "executor.executors.answer_prompt.AnswerPromptService.run_completion", + side_effect=capture_run_completion, + ): + ctx = _make_context( + operation="summarize", + executor_params=_make_summarize_params( + prompt_keys=["name", "address"], + ), + ) + executor.execute(ctx) + + assert "name" in captured_prompt["value"] + assert "address" in captured_prompt["value"] + + @patch("executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps") + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_summarize_no_prompt_keys(self, mock_shim_cls, mock_get_deps): + """Summarize works without prompt_keys.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_llm_cls = MagicMock() + mock_llm_cls.return_value = MagicMock() + + mock_get_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm_cls, MagicMock(), MagicMock(), + ) + + with patch( + "executor.executors.answer_prompt.AnswerPromptService.run_completion", + return_value="summary without keys", + ): + params = _make_summarize_params() + del params["prompt_keys"] + ctx = _make_context( + operation="summarize", + executor_params=params, + ) + result = executor.execute(ctx) + + assert result.success is True + assert result.data["data"] == "summary without keys" + + def test_summarize_missing_llm_adapter(self): + """Missing llm_adapter_instance_id returns failure.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + params = _make_summarize_params(llm_adapter_instance_id="") + ctx = _make_context( + operation="summarize", + executor_params=params, + ) + result = executor.execute(ctx) + + assert result.success is False + assert "llm_adapter_instance_id" in result.error + + def test_summarize_missing_context(self): + """Missing context returns failure.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + params = _make_summarize_params(context="") + ctx = _make_context( + operation="summarize", + executor_params=params, + ) + result = executor.execute(ctx) + + assert result.success is False + assert "context" in result.error + + @patch("executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps") + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_summarize_llm_error(self, mock_shim_cls, mock_get_deps): + """LLM errors are wrapped in ExecutionResult.failure.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_llm_cls = MagicMock() + mock_llm_cls.return_value = MagicMock() + + mock_get_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm_cls, MagicMock(), MagicMock(), + ) + + with patch( + "executor.executors.answer_prompt.AnswerPromptService.run_completion", + side_effect=Exception("LLM unavailable"), + ): + ctx = _make_context( + operation="summarize", + executor_params=_make_summarize_params(), + ) + result = executor.execute(ctx) + + assert result.success is False + assert "summarization" in result.error.lower() or "LLM" in result.error + + @patch("executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps") + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_summarize_creates_llm_with_correct_adapter( + self, mock_shim_cls, mock_get_deps + ): + """LLM is instantiated with the provided adapter instance ID.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_llm_cls = MagicMock() + mock_llm = MagicMock() + mock_llm_cls.return_value = mock_llm + + mock_get_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm_cls, MagicMock(), MagicMock(), + ) + + with patch( + "executor.executors.answer_prompt.AnswerPromptService.run_completion", + return_value="summary", + ): + ctx = _make_context( + operation="summarize", + executor_params=_make_summarize_params( + llm_adapter_instance_id="custom-llm-42", + ), + ) + executor.execute(ctx) + + mock_llm_cls.assert_called_once() + call_kwargs = mock_llm_cls.call_args + assert call_kwargs.kwargs["adapter_instance_id"] == "custom-llm-42" + + +# --------------------------------------------------------------------------- +# 3. agentic operations — handled by AgenticPromptStudioExecutor (cloud) +# --------------------------------------------------------------------------- + + +class TestAgenticExtraction: + def test_legacy_rejects_agentic_operations(self): + """LegacyExecutor does not handle agentic operations (cloud executor).""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + ctx = _make_context(operation="agentic_extract") + result = executor.execute(ctx) + + assert result.success is False + assert "does not support" in result.error + + def test_orchestrator_wraps_unsupported_agentic(self): + """ExecutionOrchestrator returns failure for agentic ops on legacy.""" + from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator + + _register_legacy() + orchestrator = ExecutionOrchestrator() + ctx = _make_context(operation="agentic_extract") + result = orchestrator.execute(ctx) + + assert result.success is False + assert "does not support" in result.error diff --git a/workers/tests/test_phase2h.py b/workers/tests/test_phase2h.py new file mode 100644 index 0000000000..cca39a3710 --- /dev/null +++ b/workers/tests/test_phase2h.py @@ -0,0 +1,482 @@ +"""Phase 2H: Tests for variable replacement and postprocessor modules. + +Covers VariableReplacementHelper, VariableReplacementService, and +the webhook postprocessor — all pure Python with no llama_index deps. +""" + +import json +from unittest.mock import MagicMock, patch + +import pytest +import requests as real_requests +from executor.executors.constants import VariableType +from executor.executors.exceptions import CustomDataError, LegacyExecutorError +from executor.executors.postprocessor import ( + _validate_structured_output, + postprocess_data, +) +from executor.executors.variable_replacement import ( + VariableReplacementHelper, + VariableReplacementService, +) + +# ============================================================================ +# 1. VariableReplacementHelper (15 tests) +# ============================================================================ + + +class TestVariableReplacementHelper: + """Tests for the low-level replacement helper.""" + + # --- extract_variables_from_prompt --- + + def test_extract_variables_single(self): + result = VariableReplacementHelper.extract_variables_from_prompt("{{name}}") + assert result == ["name"] + + def test_extract_variables_multiple(self): + result = VariableReplacementHelper.extract_variables_from_prompt( + "{{a}} and {{b}}" + ) + assert result == ["a", "b"] + + def test_extract_variables_none(self): + result = VariableReplacementHelper.extract_variables_from_prompt("no vars here") + assert result == [] + + # --- identify_variable_type --- + + def test_identify_static_type(self): + assert ( + VariableReplacementHelper.identify_variable_type("name") + == VariableType.STATIC + ) + + def test_identify_dynamic_type(self): + assert ( + VariableReplacementHelper.identify_variable_type( + "https://example.com/api[field1]" + ) + == VariableType.DYNAMIC + ) + + def test_identify_custom_data_type(self): + assert ( + VariableReplacementHelper.identify_variable_type("custom_data.company") + == VariableType.CUSTOM_DATA + ) + + # --- handle_json_and_str_types --- + + def test_handle_json_dict(self): + result = VariableReplacementHelper.handle_json_and_str_types({"k": "v"}) + assert result == '{"k": "v"}' + + def test_handle_json_list(self): + result = VariableReplacementHelper.handle_json_and_str_types([1, 2]) + assert result == "[1, 2]" + + # --- replace_generic_string_value --- + + def test_replace_generic_string_non_str(self): + """Non-string values get JSON-formatted before replacement.""" + result = VariableReplacementHelper.replace_generic_string_value( + prompt="value: {{x}}", variable="{{x}}", value={"nested": True} + ) + assert result == 'value: {"nested": true}' + + # --- check_static_variable_run_status --- + + def test_check_static_missing_key(self): + result = VariableReplacementHelper.check_static_variable_run_status( + structure_output={}, variable="missing" + ) + assert result is None + + # --- replace_static_variable --- + + def test_replace_static_missing_returns_prompt(self): + """Missing key in structured_output leaves prompt unchanged.""" + prompt = "Total is {{revenue}}" + result = VariableReplacementHelper.replace_static_variable( + prompt=prompt, structured_output={}, variable="revenue" + ) + assert result == prompt + + # --- replace_custom_data_variable --- + + def test_custom_data_nested_path(self): + """custom_data.nested.key navigates nested dict.""" + result = VariableReplacementHelper.replace_custom_data_variable( + prompt="val: {{custom_data.nested.key}}", + variable="custom_data.nested.key", + custom_data={"nested": {"key": "deep_value"}}, + ) + assert result == "val: deep_value" + + def test_custom_data_empty_dict_raises(self): + """Empty custom_data={} raises CustomDataError.""" + with pytest.raises(CustomDataError, match="Custom data is not configured"): + VariableReplacementHelper.replace_custom_data_variable( + prompt="{{custom_data.company}}", + variable="custom_data.company", + custom_data={}, + ) + + # --- fetch_dynamic_variable_value / replace_dynamic_variable --- + + @patch("executor.executors.variable_replacement.pyrequests.post") + def test_dynamic_variable_success(self, mock_post): + """Mock HTTP POST, verify URL extraction and replacement.""" + mock_resp = MagicMock() + mock_resp.headers = {"content-type": "application/json"} + mock_resp.json.return_value = {"result": "ok"} + mock_resp.raise_for_status = MagicMock() + mock_post.return_value = mock_resp + + variable = "https://example.com/api[field1]" + result = VariableReplacementHelper.replace_dynamic_variable( + prompt="data: {{" + variable + "}}", + variable=variable, + structured_output={"field1": "input_data"}, + ) + mock_post.assert_called_once() + assert '{"result": "ok"}' in result + + @patch("executor.executors.variable_replacement.pyrequests.post") + def test_dynamic_variable_http_error(self, mock_post): + """HTTP error raises LegacyExecutorError.""" + mock_post.side_effect = real_requests.exceptions.ConnectionError("refused") + + with pytest.raises(LegacyExecutorError, match="failed"): + VariableReplacementHelper.fetch_dynamic_variable_value( + url="https://example.com/api", data="payload" + ) + + +# ============================================================================ +# 2. VariableReplacementService (8 tests) +# ============================================================================ + + +class TestVariableReplacementService: + """Tests for the high-level orchestration service.""" + + def test_replace_with_variable_map(self): + """Uses variable_map key from prompt dict when present.""" + prompt = { + "prompt": "Hello {{name}}", + "variable_map": {"name": "World"}, + } + result = VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={"name": "Fallback"}, + prompt_name="test", + ) + assert result == "Hello World" + + def test_replace_fallback_structured_output(self): + """Falls back to structured_output when no variable_map.""" + prompt = {"prompt": "Hello {{name}}"} + result = VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={"name": "Fallback"}, + prompt_name="test", + ) + assert result == "Hello Fallback" + + def test_mixed_variable_types(self): + """Prompt with static + custom_data variables replaces both.""" + prompt = { + "prompt": "{{name}} works at {{custom_data.company}}", + "variable_map": {"name": "Alice"}, + } + result = VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={}, + prompt_name="test", + custom_data={"company": "Acme"}, + ) + assert result == "Alice works at Acme" + + def test_no_variables_noop(self): + """Prompt without {{}} returns unchanged.""" + prompt = {"prompt": "No variables here"} + result = VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={}, + prompt_name="test", + ) + assert result == "No variables here" + + def test_replace_with_custom_data(self): + """custom_data dict gets passed through to helper.""" + prompt = { + "prompt": "Company: {{custom_data.name}}", + "variable_map": {}, + } + result = VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={}, + prompt_name="test", + custom_data={"name": "TestCorp"}, + ) + assert result == "Company: TestCorp" + + def test_is_ide_flag_propagated(self): + """is_ide=False propagates — error message says 'API request'.""" + prompt = { + "prompt": "{{custom_data.missing}}", + "variable_map": {}, + } + with pytest.raises(CustomDataError, match="API request"): + VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={}, + prompt_name="test", + custom_data={}, + is_ide=False, + ) + + def test_multiple_same_variable(self): + """{{x}} and {{x}} — both occurrences replaced.""" + prompt = { + "prompt": "{{x}} and {{x}}", + "variable_map": {"x": "val"}, + } + result = VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={}, + prompt_name="test", + ) + assert result == "val and val" + + def test_json_value_replacement(self): + """Dict value gets JSON-serialized before replacement.""" + prompt = { + "prompt": "data: {{info}}", + "variable_map": {"info": {"key": "value"}}, + } + result = VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={}, + prompt_name="test", + ) + assert result == 'data: {"key": "value"}' + + +# ============================================================================ +# 3. Postprocessor (15 tests) +# ============================================================================ + + +class TestPostprocessor: + """Tests for the webhook postprocessor.""" + + PARSED = {"field": "original"} + HIGHLIGHT = [{"page": 1, "spans": []}] + + # --- disabled / no-op paths --- + + def test_disabled_returns_original(self): + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=False, + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + def test_no_url_returns_original(self): + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url=None, + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + # --- successful webhook --- + + @patch("executor.executors.postprocessor.requests.post") + def test_success_returns_updated(self, mock_post): + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = {"structured_output": {"field": "updated"}} + mock_post.return_value = mock_resp + + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert result[0] == {"field": "updated"} + + @patch("executor.executors.postprocessor.requests.post") + def test_success_preserves_highlight_data(self, mock_post): + """Response without highlight_data preserves original.""" + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = {"structured_output": {"f": "v"}} + mock_post.return_value = mock_resp + + _, highlight = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert highlight == self.HIGHLIGHT + + @patch("executor.executors.postprocessor.requests.post") + def test_success_updates_highlight_data(self, mock_post): + """Response with valid list highlight_data uses updated.""" + new_highlight = [{"page": 2}] + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = { + "structured_output": {"f": "v"}, + "highlight_data": new_highlight, + } + mock_post.return_value = mock_resp + + _, highlight = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert highlight == new_highlight + + @patch("executor.executors.postprocessor.requests.post") + def test_invalid_highlight_data_ignored(self, mock_post): + """Response with non-list highlight_data keeps original.""" + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = { + "structured_output": {"f": "v"}, + "highlight_data": "not-a-list", + } + mock_post.return_value = mock_resp + + _, highlight = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert highlight == self.HIGHLIGHT + + # --- response validation failures --- + + @patch("executor.executors.postprocessor.requests.post") + def test_missing_structured_output_key(self, mock_post): + """Response without structured_output returns original.""" + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = {"other_key": "value"} + mock_post.return_value = mock_resp + + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + @patch("executor.executors.postprocessor.requests.post") + def test_invalid_structured_output_type(self, mock_post): + """Response with string structured_output returns original.""" + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = {"structured_output": "just-a-string"} + mock_post.return_value = mock_resp + + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + # --- HTTP error paths --- + + @patch("executor.executors.postprocessor.requests.post") + def test_http_error_returns_original(self, mock_post): + mock_resp = MagicMock() + mock_resp.status_code = 500 + mock_post.return_value = mock_resp + + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + @patch("executor.executors.postprocessor.requests.post") + def test_timeout_returns_original(self, mock_post): + mock_post.side_effect = real_requests.exceptions.Timeout("timed out") + + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + @patch("executor.executors.postprocessor.requests.post") + def test_connection_error_returns_original(self, mock_post): + mock_post.side_effect = real_requests.exceptions.ConnectionError("refused") + + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + @patch("executor.executors.postprocessor.requests.post") + def test_json_decode_error_returns_original(self, mock_post): + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.side_effect = json.JSONDecodeError("err", "doc", 0) + mock_post.return_value = mock_resp + + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + @patch("executor.executors.postprocessor.requests.post") + def test_custom_timeout_passed(self, mock_post): + """timeout=5.0 is passed to requests.post().""" + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = {"structured_output": {"f": "v"}} + mock_post.return_value = mock_resp + + postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + timeout=5.0, + ) + _, kwargs = mock_post.call_args + assert kwargs["timeout"] == pytest.approx(5.0) + + # --- _validate_structured_output --- + + def test_validate_structured_output_dict(self): + assert _validate_structured_output({"k": "v"}) is True + + def test_validate_structured_output_list(self): + assert _validate_structured_output([1, 2]) is True diff --git a/workers/tests/test_phase5d.py b/workers/tests/test_phase5d.py new file mode 100644 index 0000000000..0a0489b5dc --- /dev/null +++ b/workers/tests/test_phase5d.py @@ -0,0 +1,899 @@ +"""Phase 5D — Tests for structure_pipeline compound operation. + +Tests _handle_structure_pipeline in LegacyExecutor which runs the full +extract → summarize → index → answer_prompt pipeline in a single +executor invocation. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.result import ExecutionResult + +# --------------------------------------------------------------------------- +# Patch targets — all at source in executor.executors.legacy_executor +# --------------------------------------------------------------------------- + +_PATCH_FILE_UTILS = "executor.executors.file_utils.FileUtils.get_fs_instance" +_PATCH_INDEXING_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_indexing_deps" +) +_PATCH_PROMPT_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def executor(): + """Create a LegacyExecutor instance.""" + from executor.executors.legacy_executor import LegacyExecutor + + return LegacyExecutor() + + +@pytest.fixture +def mock_fs(): + """Mock filesystem.""" + fs = MagicMock(name="file_storage") + fs.exists.return_value = False + fs.read.return_value = "" + fs.write.return_value = None + fs.get_hash_from_file.return_value = "hash123" + return fs + + +def _make_pipeline_context( + executor_params: dict, + run_id: str = "run-1", + organization_id: str = "org-1", +) -> ExecutionContext: + """Build a structure_pipeline ExecutionContext.""" + return ExecutionContext( + executor_name="legacy", + operation=Operation.STRUCTURE_PIPELINE.value, + run_id=run_id, + execution_source="tool", + organization_id=organization_id, + request_id="req-1", + executor_params=executor_params, + ) + + +def _base_extract_params() -> dict: + """Extract params template.""" + return { + "x2text_instance_id": "x2t-1", + "file_path": "/data/test.pdf", + "enable_highlight": False, + "output_file_path": "/data/exec/EXTRACT", + "platform_api_key": "sk-test", + "usage_kwargs": {"run_id": "run-1", "file_name": "test.pdf"}, + } + + +def _base_index_template() -> dict: + """Index template.""" + return { + "tool_id": "tool-1", + "file_hash": "hash-abc", + "is_highlight_enabled": False, + "platform_api_key": "sk-test", + "extracted_file_path": "/data/exec/EXTRACT", + } + + +def _base_answer_params() -> dict: + """Answer params (payload for answer_prompt).""" + return { + "run_id": "run-1", + "tool_settings": { + "vector-db": "vdb-1", + "embedding": "emb-1", + "x2text_adapter": "x2t-1", + "llm": "llm-1", + "challenge_llm": "", + "enable_challenge": False, + "enable_single_pass_extraction": False, + "summarize_as_source": False, + "enable_highlight": False, + }, + "outputs": [ + { + "name": "field_a", + "prompt": "What is the revenue?", + "type": "text", + "active": True, + "chunk-size": 512, + "chunk-overlap": 128, + "llm": "llm-1", + "embedding": "emb-1", + "vector-db": "vdb-1", + "x2text_adapter": "x2t-1", + "retrieval-strategy": "simple", + "similarity-top-k": 5, + }, + ], + "tool_id": "tool-1", + "file_hash": "hash-abc", + "file_name": "test.pdf", + "file_path": "/data/exec/EXTRACT", + "execution_source": "tool", + "custom_data": {}, + "PLATFORM_SERVICE_API_KEY": "sk-test", + } + + +def _base_pipeline_options() -> dict: + """Default pipeline options.""" + return { + "skip_extraction_and_indexing": False, + "is_summarization_enabled": False, + "is_single_pass_enabled": False, + "input_file_path": "/data/test.pdf", + "source_file_name": "test.pdf", + } + + +# --------------------------------------------------------------------------- +# Tests — Operation enum and routing +# --------------------------------------------------------------------------- + + +class TestStructurePipelineEnum: + """Verify enum and operation map registration.""" + + def test_operation_enum_exists(self): + assert Operation.STRUCTURE_PIPELINE.value == "structure_pipeline" + + def test_operation_map_has_structure_pipeline(self, executor): + assert "structure_pipeline" in executor._OPERATION_MAP + + +# --------------------------------------------------------------------------- +# Tests — Normal pipeline: extract → index → answer_prompt +# --------------------------------------------------------------------------- + + +class TestNormalPipeline: + """Normal pipeline: extract + index + answer_prompt.""" + + def test_extract_index_answer(self, executor): + """Full pipeline calls extract, index, and answer_prompt.""" + extract_result = ExecutionResult( + success=True, data={"extracted_text": "Revenue is $1M"} + ) + index_result = ExecutionResult( + success=True, data={"doc_id": "doc-1"} + ) + answer_result = ExecutionResult( + success=True, + data={ + "output": {"field_a": "$1M"}, + "metadata": {}, + "metrics": {"field_a": {"llm": {"time_taken(s)": 1.0}}}, + }, + ) + + executor._handle_extract = MagicMock(return_value=extract_result) + executor._handle_index = MagicMock(return_value=index_result) + executor._handle_answer_prompt = MagicMock( + return_value=answer_result + ) + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": _base_pipeline_options(), + }) + + result = executor._handle_structure_pipeline(ctx) + + assert result.success + assert executor._handle_extract.call_count == 1 + assert executor._handle_index.call_count == 1 + assert executor._handle_answer_prompt.call_count == 1 + + def test_result_has_metadata_and_file_name(self, executor): + """Result includes source_file_name in metadata.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_index = MagicMock( + return_value=ExecutionResult( + success=True, data={"doc_id": "d1"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}, "metadata": {}} + ) + ) + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + assert result.data["metadata"]["file_name"] == "test.pdf" + + def test_extracted_text_in_metadata(self, executor): + """Extracted text is added to result metadata.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "Revenue $1M"} + ) + ) + executor._handle_index = MagicMock( + return_value=ExecutionResult( + success=True, data={"doc_id": "d1"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.data["metadata"]["extracted_text"] == "Revenue $1M" + + def test_index_metrics_merged(self, executor): + """Index metrics are merged into answer metrics.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_index = MagicMock( + return_value=ExecutionResult( + success=True, data={"doc_id": "d1"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, + data={ + "output": {}, + "metrics": { + "field_a": {"llm": {"time_taken(s)": 2.0}}, + }, + }, + ) + ) + # Simulate index metrics by patching _run_pipeline_index + executor._run_pipeline_index = MagicMock( + return_value={ + "field_a": {"indexing": {"time_taken(s)": 0.5}}, + } + ) + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + metrics = result.data["metrics"] + # Both llm and indexing metrics for field_a should be merged + assert "llm" in metrics["field_a"] + assert "indexing" in metrics["field_a"] + + +# --------------------------------------------------------------------------- +# Tests — Extract failure propagation +# --------------------------------------------------------------------------- + + +class TestExtractFailure: + """Extract failure stops the pipeline.""" + + def test_extract_failure_stops_pipeline(self, executor): + executor._handle_extract = MagicMock( + return_value=ExecutionResult.failure(error="x2text error") + ) + executor._handle_index = MagicMock() + executor._handle_answer_prompt = MagicMock() + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert not result.success + assert "x2text error" in result.error + executor._handle_index.assert_not_called() + executor._handle_answer_prompt.assert_not_called() + + +# --------------------------------------------------------------------------- +# Tests — Skip extraction (smart table) +# --------------------------------------------------------------------------- + + +class TestSkipExtraction: + """Smart table: skip extract+index, use source file.""" + + def test_skip_extraction_uses_input_file(self, executor): + executor._handle_extract = MagicMock() + executor._handle_index = MagicMock() + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + opts = _base_pipeline_options() + opts["skip_extraction_and_indexing"] = True + answer = _base_answer_params() + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": answer, + "pipeline_options": opts, + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + executor._handle_extract.assert_not_called() + executor._handle_index.assert_not_called() + # file_path should be set to input_file_path + call_ctx = executor._handle_answer_prompt.call_args[0][0] + assert call_ctx.executor_params["file_path"] == "/data/test.pdf" + + def test_skip_extraction_table_settings_injection(self, executor): + """Table settings get input_file when extraction is skipped.""" + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + opts = _base_pipeline_options() + opts["skip_extraction_and_indexing"] = True + answer = _base_answer_params() + answer["outputs"][0]["table_settings"] = { + "is_directory_mode": False, + } + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": answer, + "pipeline_options": opts, + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + ts = answer["outputs"][0]["table_settings"] + assert ts["input_file"] == "/data/test.pdf" + + +# --------------------------------------------------------------------------- +# Tests — Single pass extraction +# --------------------------------------------------------------------------- + + +class TestSinglePass: + """Single pass: extract + answer_prompt (no indexing).""" + + def test_single_pass_skips_index(self, executor): + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_index = MagicMock() + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + opts = _base_pipeline_options() + opts["is_single_pass_enabled"] = True + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": opts, + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + executor._handle_extract.assert_called_once() + executor._handle_index.assert_not_called() + executor._handle_answer_prompt.assert_called_once() + + def test_single_pass_operation_is_single_pass(self, executor): + """The answer_prompt call uses single_pass_extraction operation.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + opts = _base_pipeline_options() + opts["is_single_pass_enabled"] = True + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": opts, + }) + executor._handle_structure_pipeline(ctx) + + call_ctx = executor._handle_answer_prompt.call_args[0][0] + assert call_ctx.operation == "single_pass_extraction" + + +# --------------------------------------------------------------------------- +# Tests — Summarize pipeline +# --------------------------------------------------------------------------- + + +class TestSummarizePipeline: + """Summarize: extract + summarize + answer_prompt (no indexing).""" + + @patch(_PATCH_FILE_UTILS) + def test_summarize_calls_handle_summarize( + self, mock_get_fs, executor, mock_fs + ): + mock_get_fs.return_value = mock_fs + mock_fs.exists.return_value = False + mock_fs.read.return_value = "extracted text for summarize" + + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_summarize = MagicMock( + return_value=ExecutionResult( + success=True, data={"data": "summarized text"} + ) + ) + executor._handle_index = MagicMock() + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + opts = _base_pipeline_options() + opts["is_summarization_enabled"] = True + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": opts, + "summarize_params": { + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize this", + "extract_file_path": "/data/exec/EXTRACT", + "summarize_file_path": "/data/exec/SUMMARIZE", + "platform_api_key": "sk-test", + "prompt_keys": ["field_a"], + }, + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + executor._handle_summarize.assert_called_once() + executor._handle_index.assert_not_called() + + @patch(_PATCH_FILE_UTILS) + def test_summarize_uses_cache(self, mock_get_fs, executor, mock_fs): + """If cached summary exists, _handle_summarize is NOT called.""" + mock_get_fs.return_value = mock_fs + mock_fs.exists.return_value = True + mock_fs.read.return_value = "cached summary" + + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_summarize = MagicMock() + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + opts = _base_pipeline_options() + opts["is_summarization_enabled"] = True + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": opts, + "summarize_params": { + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize this", + "extract_file_path": "/data/exec/EXTRACT", + "summarize_file_path": "/data/exec/SUMMARIZE", + "platform_api_key": "sk-test", + "prompt_keys": ["field_a"], + }, + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + executor._handle_summarize.assert_not_called() + + @patch(_PATCH_FILE_UTILS) + def test_summarize_updates_answer_params( + self, mock_get_fs, executor, mock_fs + ): + """After summarize, answer_params file_path and hash are updated.""" + mock_get_fs.return_value = mock_fs + mock_fs.exists.return_value = False + mock_fs.read.return_value = "doc text" + mock_fs.get_hash_from_file.return_value = "sum-hash-456" + + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_summarize = MagicMock( + return_value=ExecutionResult( + success=True, data={"data": "summarized"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + answer = _base_answer_params() + opts = _base_pipeline_options() + opts["is_summarization_enabled"] = True + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": answer, + "pipeline_options": opts, + "summarize_params": { + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize", + "extract_file_path": "/data/exec/EXTRACT", + "summarize_file_path": "/data/exec/SUMMARIZE", + "platform_api_key": "sk-test", + "prompt_keys": [], + }, + }) + executor._handle_structure_pipeline(ctx) + + # Check answer_params were updated + assert answer["file_hash"] == "sum-hash-456" + assert answer["file_path"] == "/data/exec/SUMMARIZE" + + @patch(_PATCH_FILE_UTILS) + def test_summarize_sets_chunk_size_zero( + self, mock_get_fs, executor, mock_fs + ): + """Summarize sets chunk-size=0 for all outputs.""" + mock_get_fs.return_value = mock_fs + mock_fs.exists.return_value = True + mock_fs.read.return_value = "cached" + + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "t"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + answer = _base_answer_params() + opts = _base_pipeline_options() + opts["is_summarization_enabled"] = True + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": answer, + "pipeline_options": opts, + "summarize_params": { + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize", + "extract_file_path": "/data/exec/EXTRACT", + "summarize_file_path": "/data/exec/SUMMARIZE", + "platform_api_key": "sk-test", + "prompt_keys": [], + }, + }) + executor._handle_structure_pipeline(ctx) + + # Outputs should have chunk-size=0 + for output in answer["outputs"]: + assert output["chunk-size"] == 0 + assert output["chunk-overlap"] == 0 + + +# --------------------------------------------------------------------------- +# Tests — Index dedup +# --------------------------------------------------------------------------- + + +class TestIndexDedup: + """Index step deduplication.""" + + def test_index_dedup_skips_duplicate_params(self, executor): + """Duplicate param combos are only indexed once.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + index_call_count = 0 + + def counting_index(ctx): + nonlocal index_call_count + index_call_count += 1 + return ExecutionResult(success=True, data={"doc_id": "d1"}) + + executor._handle_index = counting_index + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + answer = _base_answer_params() + # Add a second output with same adapter params + answer["outputs"].append({ + "name": "field_b", + "prompt": "What is the profit?", + "type": "text", + "active": True, + "chunk-size": 512, + "chunk-overlap": 128, + "llm": "llm-1", + "embedding": "emb-1", + "vector-db": "vdb-1", + "x2text_adapter": "x2t-1", + }) + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": answer, + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + # Only one index call despite two outputs (same params) + assert index_call_count == 1 + + def test_index_different_params_indexes_both(self, executor): + """Different param combos are indexed separately.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + index_call_count = 0 + + def counting_index(ctx): + nonlocal index_call_count + index_call_count += 1 + return ExecutionResult(success=True, data={"doc_id": "d1"}) + + executor._handle_index = counting_index + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + answer = _base_answer_params() + answer["outputs"].append({ + "name": "field_b", + "prompt": "What is the profit?", + "type": "text", + "active": True, + "chunk-size": 256, # Different chunk size + "chunk-overlap": 64, + "llm": "llm-1", + "embedding": "emb-1", + "vector-db": "vdb-1", + "x2text_adapter": "x2t-1", + }) + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": answer, + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + assert index_call_count == 2 + + def test_chunk_size_zero_skips_index(self, executor): + """chunk-size=0 outputs skip indexing entirely.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_index = MagicMock() + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + answer = _base_answer_params() + answer["outputs"][0]["chunk-size"] = 0 + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": answer, + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + executor._handle_index.assert_not_called() + + +# --------------------------------------------------------------------------- +# Tests — Answer prompt failure +# --------------------------------------------------------------------------- + + +class TestAnswerPromptFailure: + """Answer prompt failure propagates correctly.""" + + def test_answer_failure_propagates(self, executor): + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_index = MagicMock( + return_value=ExecutionResult( + success=True, data={"doc_id": "d1"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult.failure(error="LLM timeout") + ) + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert not result.success + assert "LLM timeout" in result.error + + +# --------------------------------------------------------------------------- +# Tests — Merge metrics utility +# --------------------------------------------------------------------------- + + +class TestMergeMetrics: + """Test _merge_pipeline_metrics.""" + + def test_merge_disjoint(self, executor): + m = executor._merge_pipeline_metrics( + {"a": {"x": 1}}, {"b": {"y": 2}} + ) + assert m == {"a": {"x": 1}, "b": {"y": 2}} + + def test_merge_overlapping(self, executor): + m = executor._merge_pipeline_metrics( + {"a": {"x": 1}}, {"a": {"y": 2}} + ) + assert m == {"a": {"x": 1, "y": 2}} + + def test_merge_non_dict_values(self, executor): + m = executor._merge_pipeline_metrics( + {"a": 1}, {"b": 2} + ) + assert m == {"a": 1, "b": 2} + + +# --------------------------------------------------------------------------- +# Tests — Sub-context creation +# --------------------------------------------------------------------------- + + +class TestSubContextCreation: + """Verify sub-contexts inherit parent context fields.""" + + def test_extract_context_inherits_fields(self, executor): + """Extract sub-context gets run_id, org_id, etc. from parent.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_index = MagicMock( + return_value=ExecutionResult( + success=True, data={"doc_id": "d1"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + ctx = _make_pipeline_context( + { + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": _base_pipeline_options(), + }, + run_id="custom-run", + organization_id="custom-org", + ) + executor._handle_structure_pipeline(ctx) + + extract_ctx = executor._handle_extract.call_args[0][0] + assert extract_ctx.run_id == "custom-run" + assert extract_ctx.organization_id == "custom-org" + assert extract_ctx.operation == "extract" + + index_ctx = executor._handle_index.call_args[0][0] + assert index_ctx.run_id == "custom-run" + assert index_ctx.operation == "index" + + answer_ctx = executor._handle_answer_prompt.call_args[0][0] + assert answer_ctx.run_id == "custom-run" + assert answer_ctx.operation == "answer_prompt" diff --git a/workers/tests/test_retrieval.py b/workers/tests/test_retrieval.py new file mode 100644 index 0000000000..defe746a3e --- /dev/null +++ b/workers/tests/test_retrieval.py @@ -0,0 +1,305 @@ +"""Tests for the RetrievalService factory and complete-context path. + +Retriever internals are NOT tested here — they're llama_index wrappers +that will be validated in Phase 2-SANITY integration tests. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from executor.executors.constants import RetrievalStrategy +from executor.executors.retrieval import RetrievalService + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_output(prompt: str = "What is X?", top_k: int = 5, name: str = "field_a"): + """Build a minimal ``output`` dict matching PromptServiceConstants keys.""" + return { + "promptx": prompt, + "similarity-top-k": top_k, + "name": name, + } + + +def _mock_retriever_class(return_value=None): + """Return a mock class whose instances have a ``.retrieve()`` method.""" + if return_value is None: + return_value = {"chunk1", "chunk2"} + cls = MagicMock() + instance = MagicMock() + instance.retrieve.return_value = return_value + cls.return_value = instance + return cls, instance + + +# --------------------------------------------------------------------------- +# Factory — run_retrieval +# --------------------------------------------------------------------------- + +class TestRunRetrieval: + """Tests for RetrievalService.run_retrieval().""" + + @pytest.mark.parametrize("strategy", list(RetrievalStrategy)) + @patch("executor.executors.retrieval.RetrievalService._get_retriever_map") + def test_correct_class_selected_for_each_strategy(self, mock_map, strategy): + """Factory returns the correct retriever class for each strategy.""" + cls, _inst = _mock_retriever_class() + mock_map.return_value = {strategy.value: cls} + + result = RetrievalService.run_retrieval( + output=_make_output(), + doc_id="doc-1", + llm=MagicMock(), + vector_db=MagicMock(), + retrieval_type=strategy.value, + ) + cls.assert_called_once() + assert isinstance(result, list) + + @patch("executor.executors.retrieval.RetrievalService._get_retriever_map") + def test_unknown_strategy_raises_value_error(self, mock_map): + """Passing an invalid strategy string raises ValueError.""" + mock_map.return_value = {} + + with pytest.raises(ValueError, match="Unknown retrieval type"): + RetrievalService.run_retrieval( + output=_make_output(), + doc_id="doc-1", + llm=MagicMock(), + vector_db=MagicMock(), + retrieval_type="nonexistent", + ) + + @patch("executor.executors.retrieval.RetrievalService._get_retriever_map") + def test_retriever_instantiated_with_correct_params(self, mock_map): + """Verify vector_db, doc_id, prompt, top_k, llm passed through.""" + cls, _inst = _mock_retriever_class() + mock_map.return_value = {RetrievalStrategy.SIMPLE.value: cls} + + llm = MagicMock(name="llm") + vdb = MagicMock(name="vdb") + output = _make_output(prompt="Find revenue", top_k=10, name="revenue") + + RetrievalService.run_retrieval( + output=output, + doc_id="doc-42", + llm=llm, + vector_db=vdb, + retrieval_type=RetrievalStrategy.SIMPLE.value, + ) + + cls.assert_called_once_with( + vector_db=vdb, + doc_id="doc-42", + prompt="Find revenue", + top_k=10, + llm=llm, + ) + + @patch("executor.executors.retrieval.RetrievalService._get_retriever_map") + def test_retrieve_result_converted_to_list(self, mock_map): + """Mock retriever returns a set; run_retrieval returns a list.""" + cls, _inst = _mock_retriever_class(return_value={"a", "b", "c"}) + mock_map.return_value = {RetrievalStrategy.FUSION.value: cls} + + result = RetrievalService.run_retrieval( + output=_make_output(), + doc_id="doc-1", + llm=MagicMock(), + vector_db=MagicMock(), + retrieval_type=RetrievalStrategy.FUSION.value, + ) + assert isinstance(result, list) + assert set(result) == {"a", "b", "c"} + + @patch("executor.executors.retrieval.RetrievalService._get_retriever_map") + def test_metrics_recorded(self, mock_map): + """Verify context_retrieval_metrics dict populated with timing.""" + cls, _inst = _mock_retriever_class() + mock_map.return_value = {RetrievalStrategy.SIMPLE.value: cls} + + metrics: dict = {} + RetrievalService.run_retrieval( + output=_make_output(name="my_field"), + doc_id="doc-1", + llm=MagicMock(), + vector_db=MagicMock(), + retrieval_type=RetrievalStrategy.SIMPLE.value, + context_retrieval_metrics=metrics, + ) + + assert "my_field" in metrics + assert "time_taken(s)" in metrics["my_field"] + assert isinstance(metrics["my_field"]["time_taken(s)"], float) + + @patch("executor.executors.retrieval.RetrievalService._get_retriever_map") + def test_metrics_optional_none_does_not_crash(self, mock_map): + """context_retrieval_metrics=None doesn't crash.""" + cls, _inst = _mock_retriever_class() + mock_map.return_value = {RetrievalStrategy.SIMPLE.value: cls} + + # Should not raise + RetrievalService.run_retrieval( + output=_make_output(), + doc_id="doc-1", + llm=MagicMock(), + vector_db=MagicMock(), + retrieval_type=RetrievalStrategy.SIMPLE.value, + context_retrieval_metrics=None, + ) + + +# --------------------------------------------------------------------------- +# Complete context — retrieve_complete_context +# --------------------------------------------------------------------------- + +class TestRetrieveCompleteContext: + """Tests for RetrievalService.retrieve_complete_context().""" + + @patch("executor.executors.file_utils.FileUtils.get_fs_instance") + def test_reads_file_with_correct_path(self, mock_get_fs): + """Mock FileUtils.get_fs_instance, verify fs.read() called correctly.""" + mock_fs = MagicMock() + mock_fs.read.return_value = "full document text" + mock_get_fs.return_value = mock_fs + + RetrievalService.retrieve_complete_context( + execution_source="ide", + file_path="/data/doc.txt", + ) + + mock_get_fs.assert_called_once_with(execution_source="ide") + mock_fs.read.assert_called_once_with(path="/data/doc.txt", mode="r") + + @patch("executor.executors.file_utils.FileUtils.get_fs_instance") + def test_returns_list_with_single_item(self, mock_get_fs): + """Verify [content] shape.""" + mock_fs = MagicMock() + mock_fs.read.return_value = "hello world" + mock_get_fs.return_value = mock_fs + + result = RetrievalService.retrieve_complete_context( + execution_source="tool", + file_path="/data/doc.txt", + ) + + assert result == ["hello world"] + assert len(result) == 1 + + @patch("executor.executors.file_utils.FileUtils.get_fs_instance") + def test_complete_context_records_metrics(self, mock_get_fs): + """Timing dict populated.""" + mock_fs = MagicMock() + mock_fs.read.return_value = "content" + mock_get_fs.return_value = mock_fs + + metrics: dict = {} + RetrievalService.retrieve_complete_context( + execution_source="ide", + file_path="/data/doc.txt", + context_retrieval_metrics=metrics, + prompt_key="total_revenue", + ) + + assert "total_revenue" in metrics + assert "time_taken(s)" in metrics["total_revenue"] + assert isinstance(metrics["total_revenue"]["time_taken(s)"], float) + + @patch("executor.executors.file_utils.FileUtils.get_fs_instance") + def test_complete_context_metrics_none_does_not_crash(self, mock_get_fs): + """context_retrieval_metrics=None doesn't crash.""" + mock_fs = MagicMock() + mock_fs.read.return_value = "content" + mock_get_fs.return_value = mock_fs + + # Should not raise + RetrievalService.retrieve_complete_context( + execution_source="ide", + file_path="/data/doc.txt", + context_retrieval_metrics=None, + ) + + +# --------------------------------------------------------------------------- +# BaseRetriever interface +# --------------------------------------------------------------------------- + +class TestBaseRetriever: + """Tests for BaseRetriever base class.""" + + def test_default_retrieve_returns_empty_set(self): + """Default retrieve() returns empty set.""" + from executor.executors.retrievers.base_retriever import BaseRetriever + + r = BaseRetriever( + vector_db=MagicMock(), + prompt="test", + doc_id="doc-1", + top_k=5, + ) + assert r.retrieve() == set() + + def test_constructor_stores_all_params(self): + """Constructor stores vector_db, prompt, doc_id, top_k, llm.""" + from executor.executors.retrievers.base_retriever import BaseRetriever + + vdb = MagicMock(name="vdb") + llm = MagicMock(name="llm") + r = BaseRetriever( + vector_db=vdb, + prompt="my prompt", + doc_id="doc-99", + top_k=3, + llm=llm, + ) + assert r.vector_db is vdb + assert r.prompt == "my prompt" + assert r.doc_id == "doc-99" + assert r.top_k == 3 + assert r._llm is llm + + def test_constructor_llm_defaults_to_none(self): + """When llm not provided, it defaults to None.""" + from executor.executors.retrievers.base_retriever import BaseRetriever + + r = BaseRetriever( + vector_db=MagicMock(), + prompt="test", + doc_id="doc-1", + top_k=5, + ) + assert r.llm is None + + def test_llm_property_returns_retriever_llm(self): + """When llm is set, the property returns a RetrieverLLM wrapper.""" + from executor.executors.retrievers.base_retriever import BaseRetriever + from executor.executors.retrievers.retriever_llm import RetrieverLLM + + llm = MagicMock(name="llm") + with patch.object(RetrieverLLM, "__init__", return_value=None): + r = BaseRetriever( + vector_db=MagicMock(), + prompt="test", + doc_id="doc-1", + top_k=5, + llm=llm, + ) + result = r.llm + assert isinstance(result, RetrieverLLM) + + def test_require_llm_raises_without_llm(self): + """require_llm() raises ValueError when no LLM is configured.""" + from executor.executors.retrievers.base_retriever import BaseRetriever + + r = BaseRetriever( + vector_db=MagicMock(), + prompt="test", + doc_id="doc-1", + top_k=5, + ) + with pytest.raises(ValueError, match="requires an LLM"): + r.require_llm() diff --git a/workers/tests/test_sanity_phase2.py b/workers/tests/test_sanity_phase2.py new file mode 100644 index 0000000000..18a87e51d3 --- /dev/null +++ b/workers/tests/test_sanity_phase2.py @@ -0,0 +1,792 @@ +"""Phase 2-SANITY — Full-chain integration tests for LegacyExecutor. + +All Phase 2 code and unit tests are complete (2A–2H, 194 workers tests). +This file bridges unit tests and real integration by testing the full +Celery chain: + + task.apply() → execute_extraction task → ExecutionOrchestrator + → ExecutorRegistry.get("legacy") → LegacyExecutor.execute() + → _handle_X() → ExecutionResult + +All in Celery eager mode (no broker needed). External adapters +(X2Text, LLM, VectorDB) are mocked. +""" + +import json +from unittest.mock import MagicMock, patch + +import pytest + +from executor.executors.constants import ( + IndexingConstants as IKeys, + PromptServiceConstants as PSKeys, +) +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + +# --------------------------------------------------------------------------- +# Patch targets +# --------------------------------------------------------------------------- + +_PATCH_X2TEXT = "executor.executors.legacy_executor.X2Text" +_PATCH_FS = "executor.executors.legacy_executor.FileUtils.get_fs_instance" +_PATCH_INDEX_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_indexing_deps" +) +_PATCH_PROMPT_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" +) +_PATCH_SHIM = "executor.executors.legacy_executor.ExecutorToolShim" +_PATCH_RUN_COMPLETION = ( + "executor.executors.answer_prompt.AnswerPromptService.run_completion" +) +_PATCH_INDEX_UTILS = ( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key" +) + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _ensure_legacy_registered(): + """Ensure LegacyExecutor is registered without clearing other state. + + Unlike unit tests that clear() + re-register, sanity tests need + LegacyExecutor always present. We add it idempotently. + """ + from executor.executors.legacy_executor import LegacyExecutor + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry._registry["legacy"] = LegacyExecutor + yield + + +@pytest.fixture +def eager_app(): + """Configure the real executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + yield app + app.conf.update(original) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _run_task(eager_app, ctx_dict): + """Run execute_extraction task via task.apply() (eager-safe).""" + task = eager_app.tasks["execute_extraction"] + result = task.apply(args=[ctx_dict]) + return result.get() + + +def _mock_llm(answer="sanity answer"): + """Create a mock LLM matching the test_answer_prompt.py pattern.""" + llm = MagicMock(name="llm") + response = MagicMock() + response.text = answer + llm.complete.return_value = { + PSKeys.RESPONSE: response, + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, + PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + llm.get_usage_reason.return_value = "extraction" + llm.get_metrics.return_value = {"tokens": 100} + return llm + + +def _mock_prompt_deps(llm=None): + """Return a 7-tuple matching _get_prompt_deps() return shape. + + Uses the real AnswerPromptService + mocked adapters. + """ + if llm is None: + llm = _mock_llm() + + from executor.executors.answer_prompt import AnswerPromptService as answer_prompt_svc_cls + + retrieval_svc = MagicMock(name="RetrievalService") + retrieval_svc.run_retrieval.return_value = ["chunk1", "chunk2"] + retrieval_svc.retrieve_complete_context.return_value = ["full content"] + + variable_replacement_svc = MagicMock(name="VariableReplacementService") + variable_replacement_svc.is_variables_present.return_value = False + + index_cls = MagicMock(name="Index") + index_instance = MagicMock() + index_instance.generate_index_key.return_value = "doc-id-sanity" + index_cls.return_value = index_instance + + llm_cls = MagicMock(name="LLM") + llm_cls.return_value = llm + + embedding_compat_cls = MagicMock(name="EmbeddingCompat") + vector_db_cls = MagicMock(name="VectorDB") + + return ( + answer_prompt_svc_cls, + retrieval_svc, + variable_replacement_svc, + index_cls, + llm_cls, + embedding_compat_cls, + vector_db_cls, + ) + + +def _mock_process_response(text="sanity extracted text"): + """Build a mock TextExtractionResult.""" + from unstract.sdk1.adapters.x2text.dto import ( + TextExtractionMetadata, + TextExtractionResult, + ) + + metadata = TextExtractionMetadata(whisper_hash="sanity-hash") + return TextExtractionResult( + extracted_text=text, + extraction_metadata=metadata, + ) + + +def _make_prompt(name="field_a", prompt="What is the revenue?", + output_type="text", **overrides): + """Build a minimal prompt definition dict.""" + d = { + PSKeys.NAME: name, + PSKeys.PROMPT: prompt, + PSKeys.TYPE: output_type, + PSKeys.CHUNK_SIZE: 512, + PSKeys.CHUNK_OVERLAP: 128, + PSKeys.RETRIEVAL_STRATEGY: "simple", + PSKeys.LLM: "llm-1", + PSKeys.EMBEDDING: "emb-1", + PSKeys.VECTOR_DB: "vdb-1", + PSKeys.X2TEXT_ADAPTER: "x2t-1", + PSKeys.SIMILARITY_TOP_K: 5, + } + d.update(overrides) + return d + + +# --- Context factories per operation --- + + +def _extract_ctx(**overrides): + defaults = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-sanity-ext", + "execution_source": "tool", + "organization_id": "org-test", + "executor_params": { + "x2text_instance_id": "x2t-sanity", + "file_path": "/data/sanity.pdf", + "platform_api_key": "sk-sanity", + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _index_ctx(**overrides): + defaults = { + "executor_name": "legacy", + "operation": "index", + "run_id": "run-sanity-idx", + "execution_source": "tool", + "organization_id": "org-test", + "executor_params": { + "embedding_instance_id": "emb-sanity", + "vector_db_instance_id": "vdb-sanity", + "x2text_instance_id": "x2t-sanity", + "file_path": "/data/sanity.pdf", + "file_hash": "sanity-hash", + "extracted_text": "Sanity test document text", + "platform_api_key": "sk-sanity", + "chunk_size": 512, + "chunk_overlap": 128, + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _answer_prompt_ctx(prompts=None, **overrides): + if prompts is None: + prompts = [_make_prompt()] + defaults = { + "executor_name": "legacy", + "operation": Operation.ANSWER_PROMPT.value, + "run_id": "run-sanity-ap", + "execution_source": "ide", + "executor_params": { + PSKeys.OUTPUTS: prompts, + PSKeys.TOOL_SETTINGS: {}, + PSKeys.TOOL_ID: "tool-sanity", + PSKeys.EXECUTION_ID: "exec-sanity", + PSKeys.FILE_HASH: "hash-sanity", + PSKeys.FILE_PATH: "/data/sanity.txt", + PSKeys.FILE_NAME: "sanity.txt", + PSKeys.LOG_EVENTS_ID: "", + PSKeys.CUSTOM_DATA: {}, + PSKeys.EXECUTION_SOURCE: "ide", + PSKeys.PLATFORM_SERVICE_API_KEY: "pk-sanity", + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _summarize_ctx(**overrides): + defaults = { + "executor_name": "legacy", + "operation": "summarize", + "run_id": "run-sanity-sum", + "execution_source": "tool", + "executor_params": { + "llm_adapter_instance_id": "llm-sanity", + "summarize_prompt": "Summarize the document.", + "context": "Long document content here.", + "prompt_keys": ["invoice_number", "total"], + "PLATFORM_SERVICE_API_KEY": "pk-sanity", + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +# =========================================================================== +# Test classes +# =========================================================================== + + +class TestSanityExtract: + """Full-chain extract tests through Celery eager mode.""" + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_extract_full_chain(self, mock_x2text_cls, mock_get_fs, eager_app): + """Mocked X2Text + FileUtils → result.data has extracted_text.""" + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response( + "sanity extracted" + ) + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _extract_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data[IKeys.EXTRACTED_TEXT] == "sanity extracted" + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_extract_missing_params_full_chain( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + """Empty params → failure with missing fields message.""" + ctx = _extract_ctx(executor_params={"platform_api_key": "sk-test"}) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "x2text_instance_id" in result.error + assert "file_path" in result.error + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_extract_adapter_error_full_chain( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + """X2Text raises AdapterError → failure result, no unhandled exception.""" + from unstract.sdk1.adapters.exceptions import AdapterError + + mock_x2text = MagicMock() + mock_x2text.x2text_instance = MagicMock() + mock_x2text.x2text_instance.get_name.return_value = "SanityExtractor" + mock_x2text.process.side_effect = AdapterError("sanity adapter err") + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _extract_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "SanityExtractor" in result.error + assert "sanity adapter err" in result.error + + +class TestSanityIndex: + """Full-chain index tests through Celery eager mode.""" + + @patch(_PATCH_FS) + @patch(_PATCH_INDEX_DEPS) + def test_index_full_chain(self, mock_deps, mock_get_fs, eager_app): + """Mocked _get_indexing_deps → result.data has doc_id.""" + mock_index_cls = MagicMock() + mock_index = MagicMock() + mock_index.generate_index_key.return_value = "doc-sanity-idx" + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.return_value = "doc-sanity-idx" + mock_index_cls.return_value = mock_index + + mock_emb_cls = MagicMock() + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls = MagicMock() + mock_vdb_cls.return_value = MagicMock() + + mock_deps.return_value = (mock_index_cls, mock_emb_cls, mock_vdb_cls) + mock_get_fs.return_value = MagicMock() + + ctx = _index_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data[IKeys.DOC_ID] == "doc-sanity-idx" + + @patch(_PATCH_INDEX_UTILS, return_value="doc-zero-chunk-sanity") + @patch(_PATCH_FS) + def test_index_chunk_size_zero_full_chain( + self, mock_get_fs, mock_gen_key, eager_app + ): + """chunk_size=0 skips heavy deps → returns doc_id via IndexingUtils.""" + mock_get_fs.return_value = MagicMock() + + params = { + "embedding_instance_id": "emb-sanity", + "vector_db_instance_id": "vdb-sanity", + "x2text_instance_id": "x2t-sanity", + "file_path": "/data/sanity.pdf", + "file_hash": "sanity-hash", + "extracted_text": "text", + "platform_api_key": "sk-sanity", + "chunk_size": 0, + "chunk_overlap": 0, + } + ctx = _index_ctx(executor_params=params) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data[IKeys.DOC_ID] == "doc-zero-chunk-sanity" + + @patch(_PATCH_FS) + @patch(_PATCH_INDEX_DEPS) + def test_index_error_full_chain(self, mock_deps, mock_get_fs, eager_app): + """perform_indexing raises → failure result.""" + mock_index_cls = MagicMock() + mock_index = MagicMock() + mock_index.generate_index_key.return_value = "doc-err" + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.side_effect = RuntimeError("VDB down") + mock_index_cls.return_value = mock_index + + mock_emb_cls = MagicMock() + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls = MagicMock() + mock_vdb_cls.return_value = MagicMock() + + mock_deps.return_value = (mock_index_cls, mock_emb_cls, mock_vdb_cls) + mock_get_fs.return_value = MagicMock() + + ctx = _index_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "indexing" in result.error.lower() + + +class TestSanityAnswerPrompt: + """Full-chain answer_prompt tests through Celery eager mode.""" + + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-sanity") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_answer_prompt_text_full_chain( + self, mock_shim_cls, mock_deps, _mock_idx, eager_app + ): + """TEXT prompt → result.data has output, metadata, metrics.""" + llm = _mock_llm("sanity answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _answer_prompt_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert PSKeys.OUTPUT in result.data + assert PSKeys.METADATA in result.data + assert PSKeys.METRICS in result.data + assert result.data[PSKeys.OUTPUT]["field_a"] == "sanity answer" + + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-sanity") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_answer_prompt_multi_prompt_full_chain( + self, mock_shim_cls, mock_deps, _mock_idx, eager_app + ): + """Two prompts → both field names in output and metrics.""" + llm = _mock_llm("multi answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + prompts = [ + _make_prompt(name="revenue"), + _make_prompt(name="date_signed"), + ] + ctx = _answer_prompt_ctx(prompts=prompts) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert "revenue" in result.data[PSKeys.OUTPUT] + assert "date_signed" in result.data[PSKeys.OUTPUT] + assert "revenue" in result.data[PSKeys.METRICS] + assert "date_signed" in result.data[PSKeys.METRICS] + + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-sanity") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_answer_prompt_table_fails_full_chain( + self, mock_shim_cls, mock_deps, _mock_idx, eager_app + ): + """TABLE type → failure mentioning TABLE.""" + llm = _mock_llm() + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _answer_prompt_ctx( + prompts=[_make_prompt(output_type="table")] + ) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "TABLE" in result.error + + +class TestSanitySinglePass: + """Full-chain single_pass_extraction test.""" + + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-sanity") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_single_pass_delegates_full_chain( + self, mock_shim_cls, mock_deps, _mock_idx, eager_app + ): + """Same mocks as answer_prompt → same response shape.""" + llm = _mock_llm("single pass answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _answer_prompt_ctx( + operation=Operation.SINGLE_PASS_EXTRACTION.value, + ) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert PSKeys.OUTPUT in result.data + assert result.data[PSKeys.OUTPUT]["field_a"] == "single pass answer" + + +class TestSanitySummarize: + """Full-chain summarize tests through Celery eager mode.""" + + @patch(_PATCH_RUN_COMPLETION, return_value="Sanity summary text.") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_summarize_full_chain( + self, mock_shim_cls, mock_get_deps, mock_run, eager_app + ): + """Mocked _get_prompt_deps + run_completion → result.data has summary.""" + mock_llm_cls = MagicMock() + mock_llm_cls.return_value = MagicMock() + mock_get_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm_cls, MagicMock(), MagicMock(), + ) + + ctx = _summarize_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data["data"] == "Sanity summary text." + + def test_summarize_missing_llm_full_chain(self, eager_app): + """Missing llm_adapter_instance_id → failure.""" + ctx = _summarize_ctx( + executor_params={ + "llm_adapter_instance_id": "", + "summarize_prompt": "Summarize.", + "context": "Document text.", + "PLATFORM_SERVICE_API_KEY": "pk-test", + } + ) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "llm_adapter_instance_id" in result.error + + @patch(_PATCH_RUN_COMPLETION, side_effect=Exception("LLM down")) + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_summarize_error_full_chain( + self, mock_shim_cls, mock_get_deps, mock_run, eager_app + ): + """run_completion raises → failure mentioning summarization.""" + mock_llm_cls = MagicMock() + mock_llm_cls.return_value = MagicMock() + mock_get_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm_cls, MagicMock(), MagicMock(), + ) + + ctx = _summarize_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "summarization" in result.error.lower() or "LLM" in result.error + + +class TestSanityAgenticExtraction: + """Full-chain agentic operations test — rejected by LegacyExecutor.""" + + def test_agentic_extract_rejected_by_legacy(self, eager_app): + """Agentic operations are handled by cloud executor, not legacy.""" + ctx = ExecutionContext( + executor_name="legacy", + operation="agentic_extract", + run_id="run-sanity-agentic", + execution_source="tool", + ) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "does not support" in result.error + + +class TestSanityResponseContracts: + """Verify response dicts survive JSON round-trip with expected keys.""" + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_extract_contract(self, mock_x2text_cls, mock_get_fs, eager_app): + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response("contract") + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _extract_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + + # JSON round-trip + serialized = json.dumps(result_dict) + deserialized = json.loads(serialized) + result = ExecutionResult.from_dict(deserialized) + + assert result.success is True + assert isinstance(result.data[IKeys.EXTRACTED_TEXT], str) + + @patch(_PATCH_FS) + @patch(_PATCH_INDEX_DEPS) + def test_index_contract(self, mock_deps, mock_get_fs, eager_app): + mock_index_cls = MagicMock() + mock_index = MagicMock() + mock_index.generate_index_key.return_value = "doc-contract" + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.return_value = "doc-contract" + mock_index_cls.return_value = mock_index + + mock_emb_cls = MagicMock() + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls = MagicMock() + mock_vdb_cls.return_value = MagicMock() + + mock_deps.return_value = (mock_index_cls, mock_emb_cls, mock_vdb_cls) + mock_get_fs.return_value = MagicMock() + + ctx = _index_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + + serialized = json.dumps(result_dict) + deserialized = json.loads(serialized) + result = ExecutionResult.from_dict(deserialized) + + assert result.success is True + assert isinstance(result.data[IKeys.DOC_ID], str) + + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-sanity") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_answer_prompt_contract( + self, mock_shim_cls, mock_deps, _mock_idx, eager_app + ): + llm = _mock_llm("contract answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _answer_prompt_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + + serialized = json.dumps(result_dict) + deserialized = json.loads(serialized) + result = ExecutionResult.from_dict(deserialized) + + assert result.success is True + assert isinstance(result.data[PSKeys.OUTPUT], dict) + assert isinstance(result.data[PSKeys.METADATA], dict) + assert isinstance(result.data[PSKeys.METRICS], dict) + + @patch(_PATCH_RUN_COMPLETION, return_value="contract summary") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_summarize_contract( + self, mock_shim_cls, mock_get_deps, mock_run, eager_app + ): + mock_llm_cls = MagicMock() + mock_llm_cls.return_value = MagicMock() + mock_get_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm_cls, MagicMock(), MagicMock(), + ) + + ctx = _summarize_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + + serialized = json.dumps(result_dict) + deserialized = json.loads(serialized) + result = ExecutionResult.from_dict(deserialized) + + assert result.success is True + assert isinstance(result.data["data"], str) + + +class TestSanityDispatcher: + """Full-chain dispatcher tests with Celery eager mode.""" + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_dispatcher_dispatch_full_chain( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + """ExecutionDispatcher dispatches through Celery and returns result. + + Celery's ``send_task`` doesn't reliably use eager mode, so we + patch it to route through ``task.apply()`` instead — this still + exercises the full Dispatcher → task → orchestrator chain. + """ + from unstract.sdk1.execution.dispatcher import ExecutionDispatcher + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response("dispatched") + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + task = eager_app.tasks["execute_extraction"] + + def eager_send_task(name, args=None, **kwargs): + return task.apply(args=args) + + with patch.object(eager_app, "send_task", side_effect=eager_send_task): + dispatcher = ExecutionDispatcher(celery_app=eager_app) + ctx = _extract_ctx() + result = dispatcher.dispatch(ctx, timeout=10) + + assert isinstance(result, ExecutionResult) + assert result.success is True + assert result.data[IKeys.EXTRACTED_TEXT] == "dispatched" + + def test_dispatcher_no_app_raises(self): + """ExecutionDispatcher(celery_app=None).dispatch() → ValueError.""" + from unstract.sdk1.execution.dispatcher import ExecutionDispatcher + + dispatcher = ExecutionDispatcher(celery_app=None) + ctx = _extract_ctx() + + with pytest.raises(ValueError, match="No Celery app"): + dispatcher.dispatch(ctx) + + +class TestSanityCrossCutting: + """Cross-cutting concerns: unknown ops, invalid contexts, error round-trip.""" + + def test_unknown_operation_full_chain(self, eager_app): + """operation='nonexistent' → failure mentioning unsupported.""" + ctx = ExecutionContext( + executor_name="legacy", + operation="nonexistent", + run_id="run-sanity-unknown", + execution_source="tool", + ) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "nonexistent" in result.error.lower() + + def test_invalid_context_dict_full_chain(self, eager_app): + """Malformed dict → failure mentioning 'Invalid execution context'.""" + result_dict = _run_task(eager_app, {"bad": "data"}) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "Invalid execution context" in result.error + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_failure_result_json_round_trip( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + """Failure result survives JSON serialization with error preserved.""" + from unstract.sdk1.adapters.exceptions import AdapterError + + mock_x2text = MagicMock() + mock_x2text.x2text_instance = MagicMock() + mock_x2text.x2text_instance.get_name.return_value = "FailExtractor" + mock_x2text.process.side_effect = AdapterError("round trip error") + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _extract_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + + # Verify raw dict survives JSON round-trip + serialized = json.dumps(result_dict) + deserialized = json.loads(serialized) + result = ExecutionResult.from_dict(deserialized) + + assert result.success is False + assert "round trip error" in result.error + assert "FailExtractor" in result.error diff --git a/workers/tests/test_sanity_phase3.py b/workers/tests/test_sanity_phase3.py new file mode 100644 index 0000000000..eb49e8611f --- /dev/null +++ b/workers/tests/test_sanity_phase3.py @@ -0,0 +1,968 @@ +"""Phase 3-SANITY — Integration tests for the structure tool Celery task. + +Tests the full structure tool pipeline with mocked platform API and +ExecutionDispatcher. After Phase 5E, the structure tool task dispatches a +single ``structure_pipeline`` operation to the executor worker instead of +3 sequential dispatches. These tests verify the correct pipeline params +are assembled and the result is written to filesystem. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from shared.enums.task_enums import TaskName +from unstract.sdk1.execution.result import ExecutionResult + +# --------------------------------------------------------------------------- +# Patch targets +# --------------------------------------------------------------------------- + +_PATCH_DISPATCHER = ( + "file_processing.structure_tool_task.ExecutionDispatcher" +) +_PATCH_PLATFORM_HELPER = ( + "file_processing.structure_tool_task._create_platform_helper" +) +_PATCH_FILE_STORAGE = ( + "file_processing.structure_tool_task._get_file_storage" +) +_PATCH_SHIM = ( + "executor.executor_tool_shim.ExecutorToolShim" +) +_PATCH_SERVICE_IS_STRUCTURE = ( + "shared.workflow.execution.service." + "WorkerWorkflowExecutionService._is_structure_tool_workflow" +) +_PATCH_SERVICE_EXECUTE_STRUCTURE = ( + "shared.workflow.execution.service." + "WorkerWorkflowExecutionService._execute_structure_tool_workflow" +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def mock_fs(): + """Create a mock file storage.""" + fs = MagicMock(name="file_storage") + fs.exists.return_value = False + fs.read.return_value = "" + fs.json_dump.return_value = None + fs.write.return_value = None + fs.get_hash_from_file.return_value = "abc123hash" + return fs + + +@pytest.fixture +def mock_dispatcher(): + """Create a mock ExecutionDispatcher that returns success results.""" + dispatcher = MagicMock(name="ExecutionDispatcher") + return dispatcher + + +@pytest.fixture +def mock_platform_helper(): + """Create a mock PlatformHelper.""" + helper = MagicMock(name="PlatformHelper") + return helper + + +@pytest.fixture +def tool_metadata_regular(): + """Standard prompt studio tool metadata.""" + return { + "name": "Test Project", + "is_agentic": False, + "tool_id": "tool-123", + "tool_settings": { + "vector-db": "vdb-1", + "embedding": "emb-1", + "x2text_adapter": "x2t-1", + "llm": "llm-1", + }, + "outputs": [ + { + "name": "field_a", + "prompt": "What is the revenue?", + "type": "text", + "active": True, + "chunk-size": 512, + "chunk-overlap": 128, + "retrieval-strategy": "simple", + "llm": "llm-1", + "embedding": "emb-1", + "vector-db": "vdb-1", + "x2text_adapter": "x2t-1", + "similarity-top-k": 5, + }, + ], + } + + +@pytest.fixture +def base_params(): + """Base params dict for execute_structure_tool.""" + return { + "organization_id": "org-test", + "workflow_id": "wf-123", + "execution_id": "exec-456", + "file_execution_id": "fexec-789", + "tool_instance_metadata": { + "prompt_registry_id": "preg-001", + }, + "platform_service_api_key": "sk-test-key", + "input_file_path": "/data/test.pdf", + "output_dir_path": "/output", + "source_file_name": "test.pdf", + "execution_data_dir": "/data/exec", + "messaging_channel": "channel-1", + "file_hash": "filehash123", + "exec_metadata": {"tags": ["tag1"]}, + } + + +def _make_pipeline_result( + output: dict | None = None, + metadata: dict | None = None, + metrics: dict | None = None, +) -> ExecutionResult: + """Create a mock structure_pipeline result.""" + return ExecutionResult( + success=True, + data={ + "output": output or {}, + "metadata": metadata or {}, + "metrics": metrics or {}, + }, + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestTaskEnumRegistered: + """3-SANITY: Verify TaskName enum exists.""" + + def test_task_enum_registered(self): + assert hasattr(TaskName, "EXECUTE_STRUCTURE_TOOL") + assert str(TaskName.EXECUTE_STRUCTURE_TOOL) == "execute_structure_tool" + + +class TestStructureToolPipeline: + """Full pipeline dispatched as single structure_pipeline operation.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_single_dispatch( + self, + mock_dispatcher_cls, + mock_create_ph, + mock_get_fs, + mock_shim_cls, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + """Single structure_pipeline dispatch for extract+index+answer.""" + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + mock_dispatcher_cls.return_value = dispatcher_instance + + pipeline_result = _make_pipeline_result( + output={"field_a": "$1M"}, + metadata={"run_id": "fexec-789", "file_name": "test.pdf"}, + metrics={"field_a": {"extraction_llm": {"tokens": 50}}}, + ) + dispatcher_instance.dispatch.return_value = pipeline_result + + result = execute_structure_tool(base_params) + + assert result["success"] is True + assert result["data"]["output"]["field_a"] == "$1M" + assert result["data"]["metadata"]["file_name"] == "test.pdf" + # json_dump called twice: output file + INFILE overwrite + assert mock_fs.json_dump.call_count == 2 + + # Single dispatch with structure_pipeline + assert dispatcher_instance.dispatch.call_count == 1 + ctx = dispatcher_instance.dispatch.call_args[0][0] + assert ctx.operation == "structure_pipeline" + assert ctx.execution_source == "tool" + assert ctx.executor_name == "legacy" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_pipeline_params_structure( + self, + mock_dispatcher_cls, + mock_create_ph, + mock_get_fs, + mock_shim_cls, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + """Verify executor_params contains all pipeline sub-params.""" + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + mock_dispatcher_cls.return_value = dispatcher_instance + dispatcher_instance.dispatch.return_value = _make_pipeline_result() + + execute_structure_tool(base_params) + + ctx = dispatcher_instance.dispatch.call_args[0][0] + ep = ctx.executor_params + + # All required keys present + assert "extract_params" in ep + assert "index_template" in ep + assert "answer_params" in ep + assert "pipeline_options" in ep + + # Extract params + assert ep["extract_params"]["file_path"] == "/data/test.pdf" + + # Index template + assert ep["index_template"]["tool_id"] == "tool-123" + assert ep["index_template"]["file_hash"] == "filehash123" + + # Answer params + assert ep["answer_params"]["tool_id"] == "tool-123" + assert ep["answer_params"]["run_id"] == "fexec-789" + + # Pipeline options (normal flow) + opts = ep["pipeline_options"] + assert opts["skip_extraction_and_indexing"] is False + assert opts["is_summarization_enabled"] is False + assert opts["is_single_pass_enabled"] is False + assert opts["source_file_name"] == "test.pdf" + + +class TestStructureToolSinglePass: + """Single-pass flag passed to pipeline_options.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_single_pass( + self, + mock_dispatcher_cls, + mock_create_ph, + mock_get_fs, + mock_shim_cls, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + base_params["tool_instance_metadata"]["single_pass_extraction_mode"] = True + + dispatcher_instance = MagicMock() + mock_dispatcher_cls.return_value = dispatcher_instance + dispatcher_instance.dispatch.return_value = _make_pipeline_result( + output={"field_a": "answer"}, + ) + + result = execute_structure_tool(base_params) + + assert result["success"] is True + # Single dispatch with is_single_pass_enabled flag + assert dispatcher_instance.dispatch.call_count == 1 + ctx = dispatcher_instance.dispatch.call_args[0][0] + assert ctx.operation == "structure_pipeline" + opts = ctx.executor_params["pipeline_options"] + assert opts["is_single_pass_enabled"] is True + + +class TestStructureToolSummarize: + """Summarization params passed to pipeline.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_summarize_flow( + self, + mock_dispatcher_cls, + mock_create_ph, + mock_get_fs, + mock_shim_cls, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + tool_metadata_regular["tool_settings"]["summarize_prompt"] = ( + "Summarize this doc" + ) + base_params["tool_instance_metadata"]["summarize_as_source"] = True + + dispatcher_instance = MagicMock() + mock_dispatcher_cls.return_value = dispatcher_instance + dispatcher_instance.dispatch.return_value = _make_pipeline_result( + output={"field_a": "answer"}, + ) + + result = execute_structure_tool(base_params) + + assert result["success"] is True + assert dispatcher_instance.dispatch.call_count == 1 + ctx = dispatcher_instance.dispatch.call_args[0][0] + assert ctx.operation == "structure_pipeline" + + opts = ctx.executor_params["pipeline_options"] + assert opts["is_summarization_enabled"] is True + + # Summarize params included + sp = ctx.executor_params["summarize_params"] + assert sp is not None + assert sp["summarize_prompt"] == "Summarize this doc" + assert sp["llm_adapter_instance_id"] == "llm-1" + assert "extract_file_path" in sp + assert "summarize_file_path" in sp + + +class TestStructureToolSmartTable: + """Excel with valid JSON schema sets skip_extraction_and_indexing.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_skip_extraction_smart_table( + self, + mock_dispatcher_cls, + mock_create_ph, + mock_get_fs, + mock_shim_cls, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + + tool_metadata_regular["outputs"][0]["table_settings"] = { + "is_directory_mode": False, + } + tool_metadata_regular["outputs"][0]["prompt"] = '{"key": "value"}' + + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + mock_dispatcher_cls.return_value = dispatcher_instance + dispatcher_instance.dispatch.return_value = _make_pipeline_result( + output={"field_a": "table_answer"}, + ) + + result = execute_structure_tool(base_params) + + assert result["success"] is True + # Single pipeline dispatch with skip flag + assert dispatcher_instance.dispatch.call_count == 1 + ctx = dispatcher_instance.dispatch.call_args[0][0] + assert ctx.operation == "structure_pipeline" + opts = ctx.executor_params["pipeline_options"] + assert opts["skip_extraction_and_indexing"] is True + + +class TestStructureToolAgentic: + """Agentic project routes to AgenticPromptStudioExecutor.""" + + @patch("unstract.sdk1.x2txt.X2Text") + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_agentic_routing( + self, + mock_dispatcher_cls, + mock_create_ph, + mock_get_fs, + mock_shim_cls, + mock_x2text_cls, + base_params, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + + # Prompt studio lookup fails, agentic succeeds + mock_platform_helper.get_prompt_studio_tool.return_value = None + + agentic_metadata = { + "name": "Agentic Project", + "project_id": "ap-001", + "json_schema": {"field": "string"}, + } + mock_platform_helper.get_agentic_studio_tool.return_value = { + "tool_metadata": agentic_metadata, + } + + dispatcher_instance = MagicMock() + mock_dispatcher_cls.return_value = dispatcher_instance + + # Mock X2Text extraction + mock_x2text_instance = MagicMock() + mock_x2text_instance.process.return_value = MagicMock( + extracted_text="extracted text" + ) + mock_x2text_cls.return_value = mock_x2text_instance + + # Simulate successful agentic extraction + agentic_result = ExecutionResult( + success=True, + data={"output": {"field": "value"}}, + ) + dispatcher_instance.dispatch.return_value = agentic_result + + execute_structure_tool(base_params) + + # Should dispatch to agentic executor with agentic_extract operation + calls = dispatcher_instance.dispatch.call_args_list + assert len(calls) == 1 + assert calls[0][0][0].executor_name == "agentic" + assert calls[0][0][0].operation == "agentic_extract" + + +class TestStructureToolProfileOverrides: + """Profile overrides modify tool_metadata before pipeline dispatch.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_profile_overrides( + self, + mock_dispatcher_cls, + mock_create_ph, + mock_get_fs, + mock_shim_cls, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + base_params["exec_metadata"]["llm_profile_id"] = "profile-1" + mock_platform_helper.get_llm_profile.return_value = { + "profile_name": "Test Profile", + "llm_id": "llm-override", + } + + dispatcher_instance = MagicMock() + mock_dispatcher_cls.return_value = dispatcher_instance + dispatcher_instance.dispatch.return_value = _make_pipeline_result( + output={"field_a": "answer"}, + ) + + result = execute_structure_tool(base_params) + + assert result["success"] is True + mock_platform_helper.get_llm_profile.assert_called_once_with("profile-1") + assert tool_metadata_regular["tool_settings"]["llm"] == "llm-override" + + +class TestStructureToolPipelineFailure: + """Pipeline failure propagated to caller.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_pipeline_failure( + self, + mock_dispatcher_cls, + mock_create_ph, + mock_get_fs, + mock_shim_cls, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + mock_dispatcher_cls.return_value = dispatcher_instance + + pipeline_failure = ExecutionResult.failure( + error="X2Text adapter error: connection refused" + ) + dispatcher_instance.dispatch.return_value = pipeline_failure + + result = execute_structure_tool(base_params) + + assert result["success"] is False + assert "X2Text" in result["error"] + assert dispatcher_instance.dispatch.call_count == 1 + + +class TestStructureToolMultipleOutputs: + """Multiple outputs are passed to executor in answer_params.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_multiple_outputs( + self, + mock_dispatcher_cls, + mock_create_ph, + mock_get_fs, + mock_shim_cls, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + + # Add a second output with same chunking params + second_output = dict(tool_metadata_regular["outputs"][0]) + second_output["name"] = "field_b" + tool_metadata_regular["outputs"].append(second_output) + + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + mock_dispatcher_cls.return_value = dispatcher_instance + dispatcher_instance.dispatch.return_value = _make_pipeline_result( + output={"field_a": "a", "field_b": "b"}, + ) + + result = execute_structure_tool(base_params) + + assert result["success"] is True + # Single dispatch — index dedup handled inside executor + assert dispatcher_instance.dispatch.call_count == 1 + ctx = dispatcher_instance.dispatch.call_args[0][0] + outputs = ctx.executor_params["answer_params"]["outputs"] + assert len(outputs) == 2 + assert outputs[0]["name"] == "field_a" + assert outputs[1]["name"] == "field_b" + + +class TestStructureToolOutputWritten: + """Output JSON written to correct path with correct structure.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_output_written( + self, + mock_dispatcher_cls, + mock_create_ph, + mock_get_fs, + mock_shim_cls, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + mock_dispatcher_cls.return_value = dispatcher_instance + dispatcher_instance.dispatch.return_value = _make_pipeline_result( + output={"field_a": "answer"}, + ) + + result = execute_structure_tool(base_params) + + assert result["success"] is True + + # json_dump called twice: once for output file, once for INFILE overwrite + assert mock_fs.json_dump.call_count == 2 + + # First call: output file (execution_dir/{stem}.json) + first_call = mock_fs.json_dump.call_args_list[0] + first_path = first_call.kwargs.get( + "path", first_call[1].get("path") if len(first_call) > 1 else None + ) + if first_path is None: + first_path = first_call[0][0] if first_call[0] else None + assert str(first_path).endswith("test.json") + + # Second call: INFILE overwrite (so destination connector reads JSON, not PDF) + second_call = mock_fs.json_dump.call_args_list[1] + second_path = second_call.kwargs.get( + "path", second_call[1].get("path") if len(second_call) > 1 else None + ) + if second_path is None: + second_path = second_call[0][0] if second_call[0] else None + assert str(second_path) == base_params["input_file_path"] + + +class TestStructureToolMetadataFileName: + """metadata.file_name in pipeline result preserved.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_metadata_file_name( + self, + mock_dispatcher_cls, + mock_create_ph, + mock_get_fs, + mock_shim_cls, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + mock_dispatcher_cls.return_value = dispatcher_instance + dispatcher_instance.dispatch.return_value = _make_pipeline_result( + output={"field_a": "answer"}, + metadata={"run_id": "123", "file_name": "test.pdf"}, + ) + + result = execute_structure_tool(base_params) + + assert result["success"] is True + assert result["data"]["metadata"]["file_name"] == "test.pdf" + + +class TestStructureToolNoSummarize: + """No summarize_params when summarization is not enabled.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_no_summarize_params_when_disabled( + self, + mock_dispatcher_cls, + mock_create_ph, + mock_get_fs, + mock_shim_cls, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + mock_dispatcher_cls.return_value = dispatcher_instance + dispatcher_instance.dispatch.return_value = _make_pipeline_result() + + execute_structure_tool(base_params) + + ctx = dispatcher_instance.dispatch.call_args[0][0] + assert ctx.executor_params["summarize_params"] is None + assert ctx.executor_params["pipeline_options"]["is_summarization_enabled"] is False + + +class TestWorkflowServiceDetection: + """Test _is_structure_tool_workflow detection.""" + + def test_is_structure_tool_detection(self): + from shared.workflow.execution.service import ( + WorkerWorkflowExecutionService, + ) + + service = WorkerWorkflowExecutionService() + + # Mock execution_service with a structure tool instance + mock_exec_service = MagicMock() + ti = MagicMock() + ti.image_name = "unstract/tool-structure" + mock_exec_service.tool_instances = [ti] + + result = service._is_structure_tool_workflow(mock_exec_service) + assert result is True + + def test_non_structure_tool_uses_docker(self): + from shared.workflow.execution.service import ( + WorkerWorkflowExecutionService, + ) + + service = WorkerWorkflowExecutionService() + + # Mock execution_service with a non-structure tool + mock_exec_service = MagicMock() + ti = MagicMock() + ti.image_name = "unstract/tool-classifier" + mock_exec_service.tool_instances = [ti] + + result = service._is_structure_tool_workflow(mock_exec_service) + assert result is False + + @patch.dict("os.environ", {"STRUCTURE_TOOL_IMAGE_NAME": "custom/structure"}) + def test_custom_structure_image_name(self): + from shared.workflow.execution.service import ( + WorkerWorkflowExecutionService, + ) + + service = WorkerWorkflowExecutionService() + + mock_exec_service = MagicMock() + ti = MagicMock() + ti.image_name = "custom/structure" + mock_exec_service.tool_instances = [ti] + + result = service._is_structure_tool_workflow(mock_exec_service) + assert result is True + + def test_registry_prefix_match(self): + """Image from backend with registry prefix matches default base name.""" + from shared.workflow.execution.service import ( + WorkerWorkflowExecutionService, + ) + + service = WorkerWorkflowExecutionService() + + # Worker uses default "unstract/tool-structure", but backend sends + # image with registry prefix (common in K8s deployments) + mock_exec_service = MagicMock() + ti = MagicMock() + ti.image_name = "gcr.io/my-project/tool-structure" + mock_exec_service.tool_instances = [ti] + + result = service._is_structure_tool_workflow(mock_exec_service) + assert result is True + + def test_registry_prefix_with_tag_match(self): + """Image with registry prefix and tag still matches.""" + from shared.workflow.execution.service import ( + WorkerWorkflowExecutionService, + ) + + service = WorkerWorkflowExecutionService() + + mock_exec_service = MagicMock() + ti = MagicMock() + ti.image_name = "us.gcr.io/prod/tool-structure:v1.2.3" + mock_exec_service.tool_instances = [ti] + + result = service._is_structure_tool_workflow(mock_exec_service) + assert result is True + + @patch.dict("os.environ", {"STRUCTURE_TOOL_IMAGE_NAME": "gcr.io/prod/tool-structure"}) + def test_env_has_registry_prefix_instance_has_different_prefix(self): + """Both env and instance have different registry prefixes, same base.""" + from shared.workflow.execution.service import ( + WorkerWorkflowExecutionService, + ) + + service = WorkerWorkflowExecutionService() + + mock_exec_service = MagicMock() + ti = MagicMock() + ti.image_name = "ecr.aws/other/tool-structure" + mock_exec_service.tool_instances = [ti] + + result = service._is_structure_tool_workflow(mock_exec_service) + assert result is True + + +class TestStructureToolParamsPassthrough: + """Task receives correct params from WorkerWorkflowExecutionService.""" + + @patch( + "shared.workflow.execution.service.WorkerWorkflowExecutionService." + "_execute_structure_tool_workflow" + ) + @patch( + "shared.workflow.execution.service.WorkerWorkflowExecutionService." + "_is_structure_tool_workflow", + return_value=True, + ) + def test_structure_tool_params_passthrough( + self, mock_is_struct, mock_exec_struct + ): + from shared.workflow.execution.service import ( + WorkerWorkflowExecutionService, + ) + + service = WorkerWorkflowExecutionService() + + mock_exec_service = MagicMock() + mock_exec_service.tool_instances = [MagicMock()] + + service._build_and_execute_workflow(mock_exec_service, "test.pdf") + + # Verify _execute_structure_tool_workflow was called + mock_exec_struct.assert_called_once_with( + mock_exec_service, "test.pdf" + ) + + +class TestHelperFunctions: + """Test standalone helper functions.""" + + def test_apply_profile_overrides(self): + from file_processing.structure_tool_task import ( + _apply_profile_overrides, + ) + + tool_metadata = { + "tool_settings": { + "llm": "old-llm", + "embedding": "old-emb", + }, + "outputs": [ + { + "name": "field_a", + "llm": "old-llm", + "embedding": "old-emb", + }, + ], + } + profile_data = { + "llm_id": "new-llm", + "embedding_model_id": "new-emb", + } + + changes = _apply_profile_overrides(tool_metadata, profile_data) + + assert len(changes) == 4 # 2 in tool_settings + 2 in output + assert tool_metadata["tool_settings"]["llm"] == "new-llm" + assert tool_metadata["tool_settings"]["embedding"] == "new-emb" + assert tool_metadata["outputs"][0]["llm"] == "new-llm" + assert tool_metadata["outputs"][0]["embedding"] == "new-emb" + + def test_should_skip_extraction_no_table_settings(self): + from file_processing.structure_tool_task import ( + _should_skip_extraction_for_smart_table, + ) + + outputs = [{"name": "field_a", "prompt": "What?"}] + assert ( + _should_skip_extraction_for_smart_table(outputs) + is False + ) + + def test_should_skip_extraction_with_json_schema(self): + from file_processing.structure_tool_task import ( + _should_skip_extraction_for_smart_table, + ) + + outputs = [ + { + "name": "field_a", + "table_settings": {}, + "prompt": '{"col1": "string", "col2": "number"}', + } + ] + assert ( + _should_skip_extraction_for_smart_table(outputs) + is True + ) diff --git a/workers/tests/test_sanity_phase4.py b/workers/tests/test_sanity_phase4.py new file mode 100644 index 0000000000..7e94489f3d --- /dev/null +++ b/workers/tests/test_sanity_phase4.py @@ -0,0 +1,899 @@ +"""Phase 4-SANITY — IDE path integration tests through executor chain. + +Phase 4 replaces PromptTool HTTP calls in PromptStudioHelper with +ExecutionDispatcher → executor worker → LegacyExecutor. + +These tests build the EXACT payloads that prompt_studio_helper.py +now sends via ExecutionDispatcher, push them through the full Celery +eager-mode chain, and verify the results match what the IDE expects. + +This validates the full contract: + prompt_studio_helper builds payload + → ExecutionContext(execution_source="ide", ...) + → Celery task → LegacyExecutor._handle_X() + → ExecutionResult → result.data used by IDE + +All tests use execution_source="ide" to match the real IDE path. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from executor.executors.constants import ( + PromptServiceConstants as PSKeys, +) +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + +# --------------------------------------------------------------------------- +# Patch targets (same as Phase 2 sanity) +# --------------------------------------------------------------------------- + +_PATCH_X2TEXT = "executor.executors.legacy_executor.X2Text" +_PATCH_FS = "executor.executors.legacy_executor.FileUtils.get_fs_instance" +_PATCH_INDEX_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_indexing_deps" +) +_PATCH_PROMPT_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" +) +_PATCH_SHIM = "executor.executors.legacy_executor.ExecutorToolShim" +_PATCH_RUN_COMPLETION = ( + "executor.executors.answer_prompt.AnswerPromptService.run_completion" +) +_PATCH_INDEX_UTILS = ( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key" +) +_PATCH_PLUGIN_LOADER = ( + "executor.executors.plugins.loader.ExecutorPluginLoader.get" +) + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _ensure_legacy_registered(): + """Ensure LegacyExecutor is registered.""" + from executor.executors.legacy_executor import LegacyExecutor + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry._registry["legacy"] = LegacyExecutor + yield + + +@pytest.fixture +def eager_app(): + """Configure executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + yield app + app.conf.update(original) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _run_task(eager_app, ctx_dict): + """Run execute_extraction task via task.apply() (eager-safe).""" + task = eager_app.tasks["execute_extraction"] + result = task.apply(args=[ctx_dict]) + return result.get() + + +def _mock_llm(answer="ide answer"): + """Create a mock LLM matching the answer_prompt pattern.""" + llm = MagicMock(name="llm") + response = MagicMock() + response.text = answer + llm.complete.return_value = { + PSKeys.RESPONSE: response, + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, + PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + llm.get_usage_reason.return_value = "extraction" + llm.get_metrics.return_value = {"tokens": 42} + return llm + + +def _mock_prompt_deps(llm=None): + """Return 7-tuple matching _get_prompt_deps() shape.""" + if llm is None: + llm = _mock_llm() + + from executor.executors.answer_prompt import AnswerPromptService as answer_prompt_svc_cls + + retrieval_svc = MagicMock(name="RetrievalService") + retrieval_svc.run_retrieval.return_value = ["chunk1"] + retrieval_svc.retrieve_complete_context.return_value = ["full doc"] + + variable_replacement_svc = MagicMock(name="VariableReplacementService") + variable_replacement_svc.is_variables_present.return_value = False + + index_cls = MagicMock(name="Index") + index_instance = MagicMock() + index_instance.generate_index_key.return_value = "doc-ide-key" + index_cls.return_value = index_instance + + llm_cls = MagicMock(name="LLM") + llm_cls.return_value = llm + + embedding_compat_cls = MagicMock(name="EmbeddingCompat") + vector_db_cls = MagicMock(name="VectorDB") + + return ( + answer_prompt_svc_cls, + retrieval_svc, + variable_replacement_svc, + index_cls, + llm_cls, + embedding_compat_cls, + vector_db_cls, + ) + + +def _mock_process_response(text="ide extracted text"): + """Build a mock TextExtractionResult.""" + from unstract.sdk1.adapters.x2text.dto import ( + TextExtractionMetadata, + TextExtractionResult, + ) + + metadata = TextExtractionMetadata(whisper_hash="ide-hash") + return TextExtractionResult( + extracted_text=text, + extraction_metadata=metadata, + ) + + +def _make_ide_prompt(name="invoice_number", prompt="What is the invoice number?", + output_type="text", **overrides): + """Build a prompt dict matching what prompt_studio_helper builds. + + Uses the exact key strings from ToolStudioPromptKeys / PSKeys. + """ + d = { + PSKeys.NAME: name, + PSKeys.PROMPT: prompt, + PSKeys.TYPE: output_type, + # These match the hyphenated keys from ToolStudioPromptKeys + "chunk-size": 512, + "chunk-overlap": 64, + "retrieval-strategy": "simple", + "llm": "llm-ide-1", + "embedding": "emb-ide-1", + "vector-db": "vdb-ide-1", + "x2text_adapter": "x2t-ide-1", + "similarity-top-k": 3, + "active": True, + "required": True, + } + d.update(overrides) + return d + + +# --- IDE context factories matching prompt_studio_helper payloads --- + + +def _ide_extract_ctx(**overrides): + """Build ExecutionContext matching dynamic_extractor() dispatch. + + Key mapping: dynamic_extractor uses IKeys constants for payload keys, + and adds "platform_api_key" for the executor. + """ + defaults = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-ide-ext", + "execution_source": "ide", + "organization_id": "org-ide-test", + "executor_params": { + "x2text_instance_id": "x2t-ide-1", + "file_path": "/prompt-studio/org/user/tool/doc.pdf", + "enable_highlight": True, + "usage_kwargs": {"run_id": "run-ide-ext", "file_name": "doc.pdf"}, + "run_id": "run-ide-ext", + "log_events_id": "log-ide-1", + "execution_source": "ide", + "output_file_path": "/prompt-studio/org/user/tool/extract/doc.txt", + "platform_api_key": "pk-ide-test", + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _ide_index_ctx(**overrides): + """Build ExecutionContext matching dynamic_indexer() dispatch. + + Key mapping: dynamic_indexer uses IKeys constants and adds + "platform_api_key" for the executor. + """ + defaults = { + "executor_name": "legacy", + "operation": "index", + "run_id": "run-ide-idx", + "execution_source": "ide", + "organization_id": "org-ide-test", + "executor_params": { + "tool_id": "tool-ide-1", + "embedding_instance_id": "emb-ide-1", + "vector_db_instance_id": "vdb-ide-1", + "x2text_instance_id": "x2t-ide-1", + "file_path": "/prompt-studio/org/user/tool/extract/doc.txt", + "file_hash": None, + "chunk_overlap": 64, + "chunk_size": 512, + "reindex": False, + "enable_highlight": True, + "usage_kwargs": {"run_id": "run-ide-idx", "file_name": "doc.pdf"}, + "extracted_text": "IDE extracted document text content", + "run_id": "run-ide-idx", + "log_events_id": "log-ide-1", + "execution_source": "ide", + "platform_api_key": "pk-ide-test", + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _ide_answer_prompt_ctx(prompts=None, **overrides): + """Build ExecutionContext matching _fetch_response() dispatch. + + Key mapping: _fetch_response uses TSPKeys (ToolStudioPromptKeys) + constants and adds PLATFORM_SERVICE_API_KEY + include_metadata. + """ + if prompts is None: + prompts = [_make_ide_prompt()] + defaults = { + "executor_name": "legacy", + "operation": "answer_prompt", + "run_id": "run-ide-ap", + "execution_source": "ide", + "organization_id": "org-ide-test", + "executor_params": { + "tool_settings": { + "enable_challenge": False, + "challenge_llm": "llm-challenge-1", + "single_pass_extraction_mode": False, + "summarize_as_source": False, + "preamble": "Extract accurately.", + "postamble": "No explanation.", + "grammar": [], + "enable_highlight": True, + "enable_word_confidence": False, + "platform_postamble": "", + "word_confidence_postamble": "", + }, + "outputs": prompts, + "tool_id": "tool-ide-1", + "run_id": "run-ide-ap", + "file_name": "invoice.pdf", + "file_hash": "abc123hash", + "file_path": "/prompt-studio/org/user/tool/extract/invoice.txt", + "log_events_id": "log-ide-1", + "execution_source": "ide", + "custom_data": {}, + "PLATFORM_SERVICE_API_KEY": "pk-ide-test", + "include_metadata": True, + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _ide_single_pass_ctx(prompts=None, **overrides): + """Build ExecutionContext matching _fetch_single_pass_response() dispatch.""" + if prompts is None: + prompts = [ + _make_ide_prompt(name="revenue", prompt="What is total revenue?"), + _make_ide_prompt(name="date", prompt="What is the date?"), + ] + defaults = { + "executor_name": "legacy", + "operation": "single_pass_extraction", + "run_id": "run-ide-sp", + "execution_source": "ide", + "organization_id": "org-ide-test", + "executor_params": { + "tool_settings": { + "preamble": "Extract accurately.", + "postamble": "No explanation.", + "grammar": [], + "llm": "llm-ide-1", + "x2text_adapter": "x2t-ide-1", + "vector-db": "vdb-ide-1", + "embedding": "emb-ide-1", + "chunk-size": 0, + "chunk-overlap": 0, + "enable_challenge": False, + "enable_highlight": True, + "enable_word_confidence": False, + "challenge_llm": None, + "platform_postamble": "", + "word_confidence_postamble": "", + "summarize_as_source": False, + }, + "outputs": prompts, + "tool_id": "tool-ide-1", + "run_id": "run-ide-sp", + "file_hash": "abc123hash", + "file_name": "invoice.pdf", + "file_path": "/prompt-studio/org/user/tool/extract/invoice.txt", + "log_events_id": "log-ide-1", + "execution_source": "ide", + "custom_data": {}, + "PLATFORM_SERVICE_API_KEY": "pk-ide-test", + "include_metadata": True, + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +# =========================================================================== +# Test classes +# =========================================================================== + + +class TestIDEExtract: + """IDE extract payload → executor → extracted_text.""" + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_ide_extract_returns_text(self, mock_x2text_cls, mock_get_fs, eager_app): + """IDE extract payload produces extracted_text in result.data.""" + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response( + "Invoice #12345 dated 2024-01-15" + ) + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _ide_extract_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert "extracted_text" in result.data + assert result.data["extracted_text"] == "Invoice #12345 dated 2024-01-15" + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_ide_extract_with_output_file_path( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + """IDE extract passes output_file_path to x2text.process().""" + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response("text") + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _ide_extract_ctx() + _run_task(eager_app, ctx.to_dict()) + + # Verify output_file_path was passed through + call_kwargs = mock_x2text.process.call_args + assert call_kwargs is not None + assert "output_file_path" in call_kwargs.kwargs + assert call_kwargs.kwargs["output_file_path"] == ( + "/prompt-studio/org/user/tool/extract/doc.txt" + ) + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_ide_extract_failure(self, mock_x2text_cls, mock_get_fs, eager_app): + """Adapter failure → ExecutionResult(success=False).""" + from unstract.sdk1.adapters.exceptions import AdapterError + + mock_x2text = MagicMock() + mock_x2text.x2text_instance = MagicMock() + mock_x2text.x2text_instance.get_name.return_value = "LLMWhisperer" + mock_x2text.process.side_effect = AdapterError("extraction failed") + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _ide_extract_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "extraction failed" in result.error + + +class TestIDEIndex: + """IDE index payload → executor → doc_id.""" + + @patch(_PATCH_FS) + @patch(_PATCH_INDEX_DEPS) + def test_ide_index_returns_doc_id(self, mock_deps, mock_get_fs, eager_app): + """IDE index payload produces doc_id in result.data.""" + mock_index_cls = MagicMock() + mock_index = MagicMock() + mock_index.generate_index_key.return_value = "doc-ide-indexed" + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.return_value = "doc-ide-indexed" + mock_index_cls.return_value = mock_index + + mock_emb_cls = MagicMock() + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls = MagicMock() + mock_vdb_cls.return_value = MagicMock() + + mock_deps.return_value = (mock_index_cls, mock_emb_cls, mock_vdb_cls) + mock_get_fs.return_value = MagicMock() + + ctx = _ide_index_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data["doc_id"] == "doc-ide-indexed" + + @patch(_PATCH_FS) + @patch(_PATCH_INDEX_DEPS) + def test_ide_index_with_null_file_hash(self, mock_deps, mock_get_fs, eager_app): + """IDE indexer sends file_hash=None — executor handles it.""" + mock_index_cls = MagicMock() + mock_index = MagicMock() + mock_index.generate_index_key.return_value = "doc-null-hash" + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.return_value = "doc-null-hash" + mock_index_cls.return_value = mock_index + + mock_deps.return_value = (mock_index_cls, MagicMock(), MagicMock()) + mock_get_fs.return_value = MagicMock() + + # file_hash=None is exactly what dynamic_indexer sends + ctx = _ide_index_ctx() + assert ctx.executor_params["file_hash"] is None + + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data["doc_id"] == "doc-null-hash" + + @patch(_PATCH_FS) + @patch(_PATCH_INDEX_DEPS) + def test_ide_index_failure(self, mock_deps, mock_get_fs, eager_app): + """Index failure → ExecutionResult(success=False).""" + mock_index_cls = MagicMock() + mock_index = MagicMock() + mock_index.generate_index_key.return_value = "doc-fail" + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.side_effect = RuntimeError("VDB timeout") + mock_index_cls.return_value = mock_index + + mock_deps.return_value = (mock_index_cls, MagicMock(), MagicMock()) + mock_get_fs.return_value = MagicMock() + + ctx = _ide_index_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + + +class TestIDEAnswerPrompt: + """IDE answer_prompt payload → executor → {output, metadata, metrics}.""" + + @patch(_PATCH_PLUGIN_LOADER, return_value=None) + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_answer_prompt_text( + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app + ): + """IDE text prompt → output dict with prompt_key → answer.""" + llm = _mock_llm("INV-2024-001") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _ide_answer_prompt_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + # IDE expects result.data to have "output", "metadata", "metrics" + assert "output" in result.data + assert "metadata" in result.data + assert "metrics" in result.data + assert result.data["output"]["invoice_number"] == "INV-2024-001" + + @patch(_PATCH_PLUGIN_LOADER, return_value=None) + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_answer_prompt_metadata_has_run_id( + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app + ): + """IDE response metadata contains run_id and file_name.""" + llm = _mock_llm("answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _ide_answer_prompt_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + metadata = result.data["metadata"] + assert metadata["run_id"] == "run-ide-ap" + assert metadata["file_name"] == "invoice.pdf" + + @patch(_PATCH_PLUGIN_LOADER, return_value=None) + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_answer_prompt_with_eval_settings( + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app + ): + """Prompt with eval_settings passes through to executor cleanly.""" + llm = _mock_llm("answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + prompt = _make_ide_prompt( + eval_settings={ + "evaluate": True, + "monitor_llm": ["llm-monitor-1"], + "exclude_failed": True, + } + ) + ctx = _ide_answer_prompt_ctx(prompts=[prompt]) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + + @patch(_PATCH_PLUGIN_LOADER, return_value=None) + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_answer_prompt_platform_key_reaches_shim( + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app + ): + """PLATFORM_SERVICE_API_KEY in payload reaches ExecutorToolShim.""" + llm = _mock_llm("answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _ide_answer_prompt_ctx() + _run_task(eager_app, ctx.to_dict()) + + # Verify shim was constructed with the platform key + mock_shim_cls.assert_called() + call_kwargs = mock_shim_cls.call_args + assert call_kwargs.kwargs.get("platform_api_key") == "pk-ide-test" + + @patch(_PATCH_PLUGIN_LOADER, return_value=None) + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_answer_prompt_webhook_settings( + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app + ): + """Prompt with webhook settings passes through cleanly.""" + llm = _mock_llm("answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + prompt = _make_ide_prompt( + enable_postprocessing_webhook=True, + postprocessing_webhook_url="https://example.com/hook", + ) + ctx = _ide_answer_prompt_ctx(prompts=[prompt]) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + + +class TestIDESinglePass: + """IDE single_pass_extraction → executor → same shape as answer_prompt.""" + + @patch(_PATCH_PLUGIN_LOADER, return_value=None) + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_single_pass_multi_prompt( + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app + ): + """Single pass with multiple prompts → all fields in output.""" + llm = _mock_llm("single pass value") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _ide_single_pass_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert "output" in result.data + assert "revenue" in result.data["output"] + assert "date" in result.data["output"] + + @patch(_PATCH_PLUGIN_LOADER, return_value=None) + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_single_pass_has_metadata( + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app + ): + """Single pass returns metadata with run_id.""" + llm = _mock_llm("value") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _ide_single_pass_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert "metadata" in result.data + assert result.data["metadata"]["run_id"] == "run-ide-sp" + + +class TestIDEDispatcherIntegration: + """Test ExecutionDispatcher dispatch() with IDE payloads in eager mode. + + Celery's send_task() doesn't work with eager mode for AsyncResult.get(), + so we patch send_task to delegate to task.apply() instead. + """ + + @staticmethod + def _patch_send_task(eager_app): + """Patch send_task on eager_app to use task.apply().""" + original_send_task = eager_app.send_task + + def patched_send_task(name, args=None, kwargs=None, **opts): + task = eager_app.tasks[name] + return task.apply(args=args, kwargs=kwargs) + + eager_app.send_task = patched_send_task + return original_send_task + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_dispatcher_extract_round_trip( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + """ExecutionDispatcher.dispatch() → extract → ExecutionResult.""" + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response( + "dispatcher extracted" + ) + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + original = self._patch_send_task(eager_app) + try: + dispatcher = ExecutionDispatcher(celery_app=eager_app) + ctx = _ide_extract_ctx() + result = dispatcher.dispatch(ctx) + finally: + eager_app.send_task = original + + assert result.success is True + assert result.data["extracted_text"] == "dispatcher extracted" + + @patch(_PATCH_PLUGIN_LOADER, return_value=None) + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_dispatcher_answer_prompt_round_trip( + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app + ): + """ExecutionDispatcher.dispatch() → answer_prompt → ExecutionResult.""" + llm = _mock_llm("dispatcher answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + original = self._patch_send_task(eager_app) + try: + dispatcher = ExecutionDispatcher(celery_app=eager_app) + ctx = _ide_answer_prompt_ctx() + result = dispatcher.dispatch(ctx) + finally: + eager_app.send_task = original + + assert result.success is True + assert result.data["output"]["invoice_number"] == "dispatcher answer" + assert "metadata" in result.data + + @patch(_PATCH_PLUGIN_LOADER, return_value=None) + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_dispatcher_single_pass_round_trip( + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app + ): + """ExecutionDispatcher.dispatch() → single_pass → ExecutionResult.""" + llm = _mock_llm("sp dispatch") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + original = self._patch_send_task(eager_app) + try: + dispatcher = ExecutionDispatcher(celery_app=eager_app) + ctx = _ide_single_pass_ctx() + result = dispatcher.dispatch(ctx) + finally: + eager_app.send_task = original + + assert result.success is True + assert "revenue" in result.data["output"] + + @patch(_PATCH_FS) + @patch(_PATCH_INDEX_DEPS) + def test_dispatcher_index_round_trip( + self, mock_deps, mock_get_fs, eager_app + ): + """ExecutionDispatcher.dispatch() → index → ExecutionResult.""" + mock_index_cls = MagicMock() + mock_index = MagicMock() + mock_index.generate_index_key.return_value = "doc-dispatch-idx" + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.return_value = "doc-dispatch-idx" + mock_index_cls.return_value = mock_index + + mock_deps.return_value = (mock_index_cls, MagicMock(), MagicMock()) + mock_get_fs.return_value = MagicMock() + + original = self._patch_send_task(eager_app) + try: + dispatcher = ExecutionDispatcher(celery_app=eager_app) + ctx = _ide_index_ctx() + result = dispatcher.dispatch(ctx) + finally: + eager_app.send_task = original + + assert result.success is True + assert result.data["doc_id"] == "doc-dispatch-idx" + + +class TestIDEExecutionSourceRouting: + """Verify execution_source='ide' propagates correctly.""" + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_ide_source_reaches_extract_handler( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + """Extract handler receives execution_source='ide' from context.""" + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response("text") + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_fs = MagicMock() + mock_get_fs.return_value = mock_fs + + ctx = _ide_extract_ctx() + assert ctx.execution_source == "ide" + + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + assert result.success is True + + # For IDE source, _update_exec_metadata should NOT write + # (it only writes for execution_source="tool") + # This is verified by the fact that no dump_json was called + # on the fs mock. In IDE mode, whisper_hash metadata is skipped. + + @patch(_PATCH_PLUGIN_LOADER, return_value=None) + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_source_in_answer_prompt_enables_variable_replacement( + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app + ): + """execution_source='ide' in payload sets is_ide=True for variable replacement.""" + llm = _mock_llm("var answer") + deps = _mock_prompt_deps(llm) + # Enable variable checking to verify is_ide routing + var_service = deps[2] # VariableReplacementService + var_service.is_variables_present.return_value = False + mock_deps.return_value = deps + mock_shim_cls.return_value = MagicMock() + + ctx = _ide_answer_prompt_ctx() + # Verify execution_source is in both context and payload + assert ctx.execution_source == "ide" + assert ctx.executor_params["execution_source"] == "ide" + + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + assert result.success is True + + +class TestIDEPayloadKeyCompatibility: + """Verify the exact key names in IDE payloads match executor expectations.""" + + def test_extract_payload_keys_match_executor(self): + """dynamic_extractor payload keys match _handle_extract reads.""" + ctx = _ide_extract_ctx() + params = ctx.executor_params + + # These are the keys _handle_extract reads from params + assert "x2text_instance_id" in params + assert "file_path" in params + assert "platform_api_key" in params + assert "output_file_path" in params + assert "enable_highlight" in params + assert "usage_kwargs" in params + + def test_index_payload_keys_match_executor(self): + """dynamic_indexer payload keys match _handle_index reads.""" + ctx = _ide_index_ctx() + params = ctx.executor_params + + # These are the keys _handle_index reads from params + assert "embedding_instance_id" in params + assert "vector_db_instance_id" in params + assert "x2text_instance_id" in params + assert "file_path" in params + assert "extracted_text" in params + assert "platform_api_key" in params + assert "chunk_size" in params + assert "chunk_overlap" in params + + def test_answer_prompt_payload_keys_match_executor(self): + """_fetch_response payload keys match _handle_answer_prompt reads.""" + ctx = _ide_answer_prompt_ctx() + params = ctx.executor_params + + # These are the keys _handle_answer_prompt reads + assert "tool_settings" in params + assert "outputs" in params + assert "tool_id" in params + assert "file_hash" in params + assert "file_path" in params + assert "file_name" in params + assert "PLATFORM_SERVICE_API_KEY" in params + assert "log_events_id" in params + assert "execution_source" in params + assert "custom_data" in params + + def test_answer_prompt_platform_key_is_uppercase(self): + """answer_prompt uses PLATFORM_SERVICE_API_KEY (uppercase, not snake_case).""" + ctx = _ide_answer_prompt_ctx() + # _handle_answer_prompt reads PSKeys.PLATFORM_SERVICE_API_KEY + # which is "PLATFORM_SERVICE_API_KEY" + assert "PLATFORM_SERVICE_API_KEY" in ctx.executor_params + # NOT "platform_api_key" (that's for extract/index) + assert ctx.executor_params["PLATFORM_SERVICE_API_KEY"] == "pk-ide-test" + + def test_extract_platform_key_is_lowercase(self): + """extract/index uses platform_api_key (lowercase snake_case).""" + ctx = _ide_extract_ctx() + assert "platform_api_key" in ctx.executor_params + + def test_execution_context_has_ide_source(self): + """All IDE contexts have execution_source='ide'.""" + assert _ide_extract_ctx().execution_source == "ide" + assert _ide_index_ctx().execution_source == "ide" + assert _ide_answer_prompt_ctx().execution_source == "ide" + assert _ide_single_pass_ctx().execution_source == "ide" diff --git a/workers/tests/test_sanity_phase5.py b/workers/tests/test_sanity_phase5.py new file mode 100644 index 0000000000..31675b8f9c --- /dev/null +++ b/workers/tests/test_sanity_phase5.py @@ -0,0 +1,852 @@ +"""Phase 5-SANITY — Integration tests for the multi-hop elimination. + +Phase 5 eliminates idle backend worker slots by: + - Adding ``dispatch_with_callback`` (fire-and-forget with link/link_error) + - Adding compound operations: ``ide_index``, ``structure_pipeline`` + - Rewiring structure_tool_task to single ``structure_pipeline`` dispatch + +These tests push payloads through the full Celery eager-mode chain and +verify the results match what callers (views / structure_tool_task) expect. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from executor.executors.constants import ( + PromptServiceConstants as PSKeys, +) +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + +# --------------------------------------------------------------------------- +# Patch targets +# --------------------------------------------------------------------------- + +_PATCH_X2TEXT = "executor.executors.legacy_executor.X2Text" +_PATCH_FS = "executor.executors.legacy_executor.FileUtils.get_fs_instance" +_PATCH_INDEX_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_indexing_deps" +) +_PATCH_PROMPT_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" +) +_PATCH_SHIM = "executor.executors.legacy_executor.ExecutorToolShim" +_PATCH_RUN_COMPLETION = ( + "executor.executors.answer_prompt.AnswerPromptService.run_completion" +) +_PATCH_INDEX_UTILS = ( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key" +) + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _ensure_legacy_registered(): + """Ensure LegacyExecutor is registered.""" + from executor.executors.legacy_executor import LegacyExecutor + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry._registry["legacy"] = LegacyExecutor + yield + + +@pytest.fixture +def eager_app(): + """Configure executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + yield app + app.conf.update(original) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _run_task(eager_app, ctx_dict): + """Run execute_extraction task via task.apply() (eager-safe).""" + task = eager_app.tasks["execute_extraction"] + result = task.apply(args=[ctx_dict]) + return result.get() + + +def _mock_llm(answer="pipeline answer"): + """Create a mock LLM matching the answer_prompt pattern.""" + llm = MagicMock(name="llm") + response = MagicMock() + response.text = answer + llm.complete.return_value = { + PSKeys.RESPONSE: response, + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, + PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + llm.get_usage_reason.return_value = "extraction" + llm.get_metrics.return_value = {"tokens": 42} + return llm + + +def _mock_prompt_deps(llm=None): + """Return 7-tuple matching _get_prompt_deps() shape.""" + if llm is None: + llm = _mock_llm() + + from executor.executors.answer_prompt import AnswerPromptService as answer_prompt_svc_cls + + retrieval_service = MagicMock(name="RetrievalService") + retrieval_service.run_retrieval.return_value = ["chunk1"] + retrieval_service.retrieve_complete_context.return_value = ["full doc"] + + variable_replacement_service = MagicMock(name="VariableReplacementService") + variable_replacement_service.is_variables_present.return_value = False + + index_cls = MagicMock(name="Index") + index_instance = MagicMock() + index_instance.generate_index_key.return_value = "doc-key-1" + index_cls.return_value = index_instance + + llm_cls = MagicMock(name="LLM") + llm_cls.return_value = llm + + embedding_compat_cls = MagicMock(name="EmbeddingCompat") + vector_db_cls = MagicMock(name="VectorDB") + + return ( + answer_prompt_svc_cls, + retrieval_service, + variable_replacement_service, + index_cls, + llm_cls, + embedding_compat_cls, + vector_db_cls, + ) + + +def _mock_process_response(text="extracted text"): + """Build a mock TextExtractionResult.""" + from unstract.sdk1.adapters.x2text.dto import ( + TextExtractionMetadata, + TextExtractionResult, + ) + + metadata = TextExtractionMetadata(whisper_hash="test-hash") + return TextExtractionResult( + extracted_text=text, + extraction_metadata=metadata, + ) + + +def _make_output(name="field_a", prompt="What is the revenue?", **overrides): + """Build an output dict for answer_prompt payloads.""" + d = { + PSKeys.NAME: name, + PSKeys.PROMPT: prompt, + PSKeys.TYPE: "text", + "chunk-size": 512, + "chunk-overlap": 64, + "retrieval-strategy": "simple", + "llm": "llm-1", + "embedding": "emb-1", + "vector-db": "vdb-1", + "x2text_adapter": "x2t-1", + "similarity-top-k": 3, + "active": True, + } + d.update(overrides) + return d + + +# --------------------------------------------------------------------------- +# 5A: dispatch_with_callback +# --------------------------------------------------------------------------- + + +class TestDispatchWithCallback: + """Verify dispatch_with_callback passes link/link_error to send_task.""" + + def test_callback_kwargs_passed(self): + mock_app = MagicMock() + mock_app.send_task.return_value = MagicMock(id="task-123") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + + ctx = ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id="run-cb-1", + execution_source="ide", + ) + on_success = MagicMock(name="success_sig") + on_error = MagicMock(name="error_sig") + + result = dispatcher.dispatch_with_callback( + ctx, + on_success=on_success, + on_error=on_error, + task_id="pre-generated-id", + ) + + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs["link"] is on_success + assert call_kwargs.kwargs["link_error"] is on_error + assert call_kwargs.kwargs["task_id"] == "pre-generated-id" + assert result.id == "task-123" + + def test_no_callbacks_omits_link_kwargs(self): + mock_app = MagicMock() + mock_app.send_task.return_value = MagicMock(id="task-456") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="run-cb-2", + execution_source="tool", + ) + dispatcher.dispatch_with_callback(ctx) + + call_kwargs = mock_app.send_task.call_args + assert "link" not in call_kwargs.kwargs + assert "link_error" not in call_kwargs.kwargs + + def test_no_app_raises(self): + dispatcher = ExecutionDispatcher(celery_app=None) + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="run-cb-3", + execution_source="tool", + ) + with pytest.raises(ValueError, match="No Celery app"): + dispatcher.dispatch_with_callback(ctx) + + +# --------------------------------------------------------------------------- +# 5C: ide_index compound operation through eager chain +# --------------------------------------------------------------------------- + + +class TestIdeIndexEagerChain: + """ide_index: extract + index in a single executor invocation.""" + + @patch(_PATCH_INDEX_DEPS) + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + @patch(_PATCH_SHIM) + def test_ide_index_success( + self, + mock_shim, + mock_x2text, + mock_fs, + mock_index_deps, + eager_app, + ): + """Full ide_index through eager chain returns doc_id.""" + # Mock extract + x2t_instance = MagicMock() + x2t_instance.process.return_value = _mock_process_response( + "IDE extracted text" + ) + mock_x2text.return_value = x2t_instance + + fs = MagicMock() + fs.exists.return_value = False + mock_fs.return_value = fs + + # Mock index + index_inst = MagicMock() + index_inst.index.return_value = "idx-doc-1" + index_inst.generate_index_key.return_value = "idx-key-1" + mock_index_deps.return_value = ( + MagicMock(return_value=index_inst), # Index + MagicMock(), # EmbeddingCompat + MagicMock(), # VectorDB + ) + + ctx = ExecutionContext( + executor_name="legacy", + operation="ide_index", + run_id="run-ide-idx", + execution_source="ide", + organization_id="org-test", + executor_params={ + "extract_params": { + "x2text_instance_id": "x2t-1", + "file_path": "/data/doc.pdf", + "enable_highlight": False, + "output_file_path": "/data/extract/doc.txt", + "platform_api_key": "pk-test", + "usage_kwargs": {}, + }, + "index_params": { + "tool_id": "tool-1", + "embedding_instance_id": "emb-1", + "vector_db_instance_id": "vdb-1", + "x2text_instance_id": "x2t-1", + "file_path": "/data/extract/doc.txt", + "file_hash": None, + "chunk_overlap": 64, + "chunk_size": 512, + "reindex": True, + "enable_highlight": False, + "usage_kwargs": {}, + "run_id": "run-ide-idx", + "execution_source": "ide", + "platform_api_key": "pk-test", + }, + }, + ) + + result_dict = _run_task(eager_app, ctx.to_dict()) + + result = ExecutionResult.from_dict(result_dict) + assert result.success + assert "doc_id" in result.data + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + @patch(_PATCH_SHIM) + def test_ide_index_extract_failure( + self, + mock_shim, + mock_x2text, + mock_fs, + eager_app, + ): + """ide_index returns failure if extract fails.""" + x2t_instance = MagicMock() + x2t_instance.process.side_effect = Exception("X2Text unavailable") + mock_x2text.return_value = x2t_instance + + fs = MagicMock() + fs.exists.return_value = False + mock_fs.return_value = fs + + ctx = ExecutionContext( + executor_name="legacy", + operation="ide_index", + run_id="run-ide-fail", + execution_source="ide", + executor_params={ + "extract_params": { + "x2text_instance_id": "x2t-1", + "file_path": "/data/doc.pdf", + "enable_highlight": False, + "platform_api_key": "pk-test", + "usage_kwargs": {}, + }, + "index_params": { + "tool_id": "tool-1", + "embedding_instance_id": "emb-1", + "vector_db_instance_id": "vdb-1", + "x2text_instance_id": "x2t-1", + "file_path": "/data/extract/doc.txt", + "file_hash": None, + "chunk_overlap": 64, + "chunk_size": 512, + "reindex": True, + "enable_highlight": False, + "usage_kwargs": {}, + "run_id": "run-ide-fail", + "execution_source": "ide", + "platform_api_key": "pk-test", + }, + }, + ) + + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + assert not result.success + assert "X2Text" in result.error + + +# --------------------------------------------------------------------------- +# 5D: structure_pipeline compound operation through eager chain +# --------------------------------------------------------------------------- + + +class TestStructurePipelineEagerChain: + """structure_pipeline: full extract→index→answer through eager chain.""" + + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-pipeline") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_INDEX_DEPS) + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + @patch(_PATCH_SHIM) + def test_structure_pipeline_normal( + self, + mock_shim, + mock_x2text, + mock_fs, + mock_index_deps, + mock_prompt_deps, + _mock_idx_utils, + eager_app, + ): + """Normal pipeline: extract → index → answer_prompt.""" + # Mock extract + x2t_instance = MagicMock() + x2t_instance.process.return_value = _mock_process_response("Revenue is $1M") + mock_x2text.return_value = x2t_instance + + fs = MagicMock() + fs.exists.return_value = False + mock_fs.return_value = fs + + # Mock index + index_inst = MagicMock() + index_inst.index.return_value = "idx-doc-1" + index_inst.generate_index_key.return_value = "idx-key-1" + mock_index_deps.return_value = ( + MagicMock(return_value=index_inst), + MagicMock(), + MagicMock(), + ) + + # Mock prompt deps + mock_prompt_deps.return_value = _mock_prompt_deps() + + ctx = ExecutionContext( + executor_name="legacy", + operation="structure_pipeline", + run_id="run-sp-1", + execution_source="tool", + organization_id="org-test", + executor_params={ + "extract_params": { + "x2text_instance_id": "x2t-1", + "file_path": "/data/test.pdf", + "enable_highlight": False, + "output_file_path": "/data/exec/EXTRACT", + "platform_api_key": "pk-test", + "usage_kwargs": {}, + }, + "index_template": { + "tool_id": "tool-1", + "file_hash": "hash123", + "is_highlight_enabled": False, + "platform_api_key": "pk-test", + "extracted_file_path": "/data/exec/EXTRACT", + }, + "answer_params": { + "run_id": "run-sp-1", + "execution_id": "exec-1", + "tool_settings": { + "vector-db": "vdb-1", + "embedding": "emb-1", + "x2text_adapter": "x2t-1", + "llm": "llm-1", + "enable_challenge": False, + "challenge_llm": "", + "enable_single_pass_extraction": False, + "summarize_as_source": False, + "enable_highlight": False, + }, + "outputs": [_make_output()], + "tool_id": "tool-1", + "file_hash": "hash123", + "file_name": "test.pdf", + "file_path": "/data/exec/EXTRACT", + "execution_source": "tool", + "PLATFORM_SERVICE_API_KEY": "pk-test", + }, + "pipeline_options": { + "skip_extraction_and_indexing": False, + "is_summarization_enabled": False, + "is_single_pass_enabled": False, + "input_file_path": "/data/test.pdf", + "source_file_name": "test.pdf", + }, + "summarize_params": None, + }, + ) + + result_dict = _run_task(eager_app, ctx.to_dict()) + + result = ExecutionResult.from_dict(result_dict) + assert result.success + assert "output" in result.data + assert "metadata" in result.data + # source_file_name injected into metadata + assert result.data["metadata"]["file_name"] == "test.pdf" + + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-sp") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + @patch(_PATCH_SHIM) + def test_structure_pipeline_single_pass( + self, + mock_shim, + mock_x2text, + mock_fs, + mock_prompt_deps, + _mock_idx_utils, + eager_app, + ): + """Single pass: extract → single_pass_extraction (no index).""" + x2t_instance = MagicMock() + x2t_instance.process.return_value = _mock_process_response("Revenue data") + mock_x2text.return_value = x2t_instance + + fs = MagicMock() + fs.exists.return_value = False + mock_fs.return_value = fs + + mock_prompt_deps.return_value = _mock_prompt_deps() + + ctx = ExecutionContext( + executor_name="legacy", + operation="structure_pipeline", + run_id="run-sp-sp", + execution_source="tool", + executor_params={ + "extract_params": { + "x2text_instance_id": "x2t-1", + "file_path": "/data/test.pdf", + "enable_highlight": False, + "output_file_path": "/data/exec/EXTRACT", + "platform_api_key": "pk-test", + "usage_kwargs": {}, + }, + "index_template": {}, + "answer_params": { + "run_id": "run-sp-sp", + "tool_settings": { + "vector-db": "vdb-1", + "embedding": "emb-1", + "x2text_adapter": "x2t-1", + "llm": "llm-1", + "enable_challenge": False, + "challenge_llm": "", + "enable_single_pass_extraction": True, + "summarize_as_source": False, + "enable_highlight": False, + }, + "outputs": [_make_output()], + "tool_id": "tool-1", + "file_hash": "hash123", + "file_name": "test.pdf", + "file_path": "/data/exec/EXTRACT", + "execution_source": "tool", + "PLATFORM_SERVICE_API_KEY": "pk-test", + }, + "pipeline_options": { + "skip_extraction_and_indexing": False, + "is_summarization_enabled": False, + "is_single_pass_enabled": True, + "input_file_path": "/data/test.pdf", + "source_file_name": "test.pdf", + }, + "summarize_params": None, + }, + ) + + result_dict = _run_task(eager_app, ctx.to_dict()) + + result = ExecutionResult.from_dict(result_dict) + assert result.success + assert "output" in result.data + + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-skip") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + @patch(_PATCH_SHIM) + def test_structure_pipeline_skip_extraction( + self, + mock_shim, + mock_x2text, + mock_fs, + mock_prompt_deps, + _mock_idx_utils, + eager_app, + ): + """Smart table: skip extraction, go straight to answer_prompt.""" + fs = MagicMock() + fs.exists.return_value = False + mock_fs.return_value = fs + + mock_prompt_deps.return_value = _mock_prompt_deps() + + ctx = ExecutionContext( + executor_name="legacy", + operation="structure_pipeline", + run_id="run-sp-skip", + execution_source="tool", + executor_params={ + "extract_params": {}, + "index_template": {}, + "answer_params": { + "run_id": "run-sp-skip", + "tool_settings": { + "vector-db": "vdb-1", + "embedding": "emb-1", + "x2text_adapter": "x2t-1", + "llm": "llm-1", + "enable_challenge": False, + "challenge_llm": "", + "enable_single_pass_extraction": False, + "summarize_as_source": False, + "enable_highlight": False, + }, + "outputs": [_make_output(prompt='{"key": "value"}')], + "tool_id": "tool-1", + "file_hash": "hash123", + "file_name": "test.xlsx", + "file_path": "/data/test.xlsx", + "execution_source": "tool", + "PLATFORM_SERVICE_API_KEY": "pk-test", + }, + "pipeline_options": { + "skip_extraction_and_indexing": True, + "is_summarization_enabled": False, + "is_single_pass_enabled": False, + "input_file_path": "/data/test.xlsx", + "source_file_name": "test.xlsx", + }, + "summarize_params": None, + }, + ) + + result_dict = _run_task(eager_app, ctx.to_dict()) + + result = ExecutionResult.from_dict(result_dict) + assert result.success + # No extract was called (X2Text not mocked beyond fixture) + mock_x2text.assert_not_called() + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + @patch(_PATCH_SHIM) + def test_structure_pipeline_extract_failure( + self, + mock_shim, + mock_x2text, + mock_fs, + eager_app, + ): + """Pipeline extract failure propagated as result failure.""" + x2t_instance = MagicMock() + x2t_instance.process.side_effect = Exception("X2Text timeout") + mock_x2text.return_value = x2t_instance + + fs = MagicMock() + fs.exists.return_value = False + mock_fs.return_value = fs + + ctx = ExecutionContext( + executor_name="legacy", + operation="structure_pipeline", + run_id="run-sp-fail", + execution_source="tool", + executor_params={ + "extract_params": { + "x2text_instance_id": "x2t-1", + "file_path": "/data/test.pdf", + "enable_highlight": False, + "platform_api_key": "pk-test", + "usage_kwargs": {}, + }, + "index_template": {}, + "answer_params": {}, + "pipeline_options": { + "skip_extraction_and_indexing": False, + "is_summarization_enabled": False, + "is_single_pass_enabled": False, + "input_file_path": "/data/test.pdf", + "source_file_name": "test.pdf", + }, + "summarize_params": None, + }, + ) + + result_dict = _run_task(eager_app, ctx.to_dict()) + + result = ExecutionResult.from_dict(result_dict) + assert not result.success + assert "X2Text" in result.error + + +# --------------------------------------------------------------------------- +# 5E: structure_tool_task single dispatch verification +# --------------------------------------------------------------------------- + + +class TestStructureToolSingleDispatch: + """Verify structure_tool_task dispatches exactly once.""" + + @patch( + "executor.executor_tool_shim.ExecutorToolShim" + ) + @patch( + "file_processing.structure_tool_task._get_file_storage" + ) + @patch( + "file_processing.structure_tool_task._create_platform_helper" + ) + @patch( + "file_processing.structure_tool_task.ExecutionDispatcher" + ) + def test_single_dispatch_normal( + self, + mock_dispatcher_cls, + mock_create_ph, + mock_get_fs, + mock_shim, + ): + """Normal path sends single structure_pipeline dispatch.""" + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl, + ) + + fs = MagicMock() + fs.exists.return_value = False + mock_get_fs.return_value = fs + + ph = MagicMock() + ph.get_prompt_studio_tool.return_value = { + "tool_metadata": { + "name": "Test", + "is_agentic": False, + "tool_id": "t1", + "tool_settings": { + "vector-db": "v1", + "embedding": "e1", + "x2text_adapter": "x1", + "llm": "l1", + }, + "outputs": [ + { + "name": "f1", + "prompt": "What?", + "type": "text", + "active": True, + "chunk-size": 512, + "chunk-overlap": 64, + "llm": "l1", + "embedding": "e1", + "vector-db": "v1", + "x2text_adapter": "x1", + }, + ], + }, + } + mock_create_ph.return_value = ph + + dispatcher = MagicMock() + mock_dispatcher_cls.return_value = dispatcher + dispatcher.dispatch.return_value = ExecutionResult( + success=True, + data={"output": {"f1": "ans"}, "metadata": {}, "metrics": {}}, + ) + + params = { + "organization_id": "org-1", + "workflow_id": "wf-1", + "execution_id": "ex-1", + "file_execution_id": "fex-1", + "tool_instance_metadata": {"prompt_registry_id": "pr-1"}, + "platform_service_api_key": "pk-1", + "input_file_path": "/data/test.pdf", + "output_dir_path": "/output", + "source_file_name": "test.pdf", + "execution_data_dir": "/data/exec", + "file_hash": "h1", + "exec_metadata": {}, + } + + result = _execute_structure_tool_impl(params) + + assert result["success"] is True + assert dispatcher.dispatch.call_count == 1 + ctx = dispatcher.dispatch.call_args[0][0] + assert ctx.operation == "structure_pipeline" + assert "extract_params" in ctx.executor_params + assert "index_template" in ctx.executor_params + assert "answer_params" in ctx.executor_params + assert "pipeline_options" in ctx.executor_params + + +# --------------------------------------------------------------------------- +# Operation enum completeness +# --------------------------------------------------------------------------- + + +class TestOperationEnum: + """Verify Phase 5 operations registered in enum.""" + + def test_ide_index_operation(self): + assert hasattr(Operation, "IDE_INDEX") + assert Operation.IDE_INDEX.value == "ide_index" + + def test_structure_pipeline_operation(self): + assert hasattr(Operation, "STRUCTURE_PIPELINE") + assert Operation.STRUCTURE_PIPELINE.value == "structure_pipeline" + + +# --------------------------------------------------------------------------- +# Dispatcher modes +# --------------------------------------------------------------------------- + + +class TestDispatcherModes: + """Verify all three dispatch modes work.""" + + def test_dispatch_sync(self): + """dispatch() calls send_task and .get().""" + mock_app = MagicMock() + async_result = MagicMock() + async_result.get.return_value = ExecutionResult( + success=True, data={"test": 1} + ).to_dict() + mock_app.send_task.return_value = async_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="r1", + execution_source="tool", + ) + result = dispatcher.dispatch(ctx, timeout=10) + + assert result.success + mock_app.send_task.assert_called_once() + async_result.get.assert_called_once() + + def test_dispatch_async(self): + """dispatch_async() returns task_id without blocking.""" + mock_app = MagicMock() + mock_app.send_task.return_value = MagicMock(id="async-id") + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="r2", + execution_source="tool", + ) + task_id = dispatcher.dispatch_async(ctx) + + assert task_id == "async-id" + mock_app.send_task.assert_called_once() diff --git a/workers/tests/test_sanity_phase6a.py b/workers/tests/test_sanity_phase6a.py new file mode 100644 index 0000000000..4c49c7407a --- /dev/null +++ b/workers/tests/test_sanity_phase6a.py @@ -0,0 +1,310 @@ +"""Phase 6A Sanity — Plugin loader infrastructure + queue-per-executor routing. + +Verifies: +1. ExecutorPluginLoader.get() returns None when no plugins installed +2. ExecutorPluginLoader.discover_executors() returns empty when no cloud executors +3. ExecutorPluginLoader.clear() resets cached state +4. ExecutorPluginLoader.get() discovers entry-point-based plugins (mocked) +5. ExecutorPluginLoader.discover_executors() loads cloud executors (mocked) +6. text_processor.add_hex_line_numbers() +7. ExecutionDispatcher._get_queue() naming convention +8. Protocol classes importable and runtime-checkable +9. executors/__init__.py triggers discover_executors() +""" + +from unittest.mock import MagicMock, patch + +import pytest +from executor.executors.plugins.loader import ExecutorPluginLoader +from executor.executors.plugins.text_processor import add_hex_line_numbers +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher + + +@pytest.fixture(autouse=True) +def _reset_plugin_loader(): + """Ensure clean plugin loader state for every test.""" + ExecutorPluginLoader.clear() + yield + ExecutorPluginLoader.clear() + + +# ── 1. Plugin loader: no plugins installed ────────────────────────── + + +class TestPluginLoaderNoPlugins: + """When no cloud plugins are installed, loader returns None / empty. + + Mocks entry_points to simulate a clean OSS environment where + no cloud executor plugins are pip-installed. + """ + + @patch( + "importlib.metadata.entry_points", + return_value=[], + ) + def test_get_returns_none_for_unknown_plugin(self, _mock_eps): + result = ExecutorPluginLoader.get("nonexistent-plugin") + assert result is None + + @patch( + "importlib.metadata.entry_points", + return_value=[], + ) + def test_get_returns_none_for_highlight_data(self, _mock_eps): + """highlight-data is a cloud plugin, not installed in OSS.""" + result = ExecutorPluginLoader.get("highlight-data") + assert result is None + + @patch( + "importlib.metadata.entry_points", + return_value=[], + ) + def test_get_returns_none_for_challenge(self, _mock_eps): + result = ExecutorPluginLoader.get("challenge") + assert result is None + + @patch( + "importlib.metadata.entry_points", + return_value=[], + ) + def test_get_returns_none_for_evaluation(self, _mock_eps): + result = ExecutorPluginLoader.get("evaluation") + assert result is None + + @patch( + "importlib.metadata.entry_points", + return_value=[], + ) + def test_discover_executors_returns_empty(self, _mock_eps): + discovered = ExecutorPluginLoader.discover_executors() + assert discovered == [] + + +# ── 2. Plugin loader: clear resets cached state ───────────────────── + + +class TestPluginLoaderClear: + @patch("importlib.metadata.entry_points", return_value=[]) + def test_clear_resets_plugins(self, _mock_eps): + # Force discovery (caches empty dict) + ExecutorPluginLoader.get("anything") + assert ExecutorPluginLoader._plugins is not None + + ExecutorPluginLoader.clear() + assert ExecutorPluginLoader._plugins is None + + @patch("importlib.metadata.entry_points", return_value=[]) + def test_get_after_clear_re_discovers(self, _mock_eps): + """After clear(), next get() re-runs discovery.""" + ExecutorPluginLoader.get("x") + assert ExecutorPluginLoader._plugins == {} + + ExecutorPluginLoader.clear() + assert ExecutorPluginLoader._plugins is None + + # Next get() triggers fresh discovery + ExecutorPluginLoader.get("y") + assert ExecutorPluginLoader._plugins is not None + + +# ── 3. Plugin loader with mocked entry points ────────────────────── + + +class TestPluginLoaderWithMockedEntryPoints: + """Simulate cloud plugins being installed by mocking entry_points().""" + + def test_get_discovers_plugin_from_entry_point(self): + """Mocked highlight-data entry point is loaded and cached.""" + + class FakeHighlightData: + pass + + fake_ep = MagicMock() + fake_ep.name = "highlight-data" + fake_ep.load.return_value = FakeHighlightData + + with patch( + "importlib.metadata.entry_points", + return_value=[fake_ep], + ): + result = ExecutorPluginLoader.get("highlight-data") + + assert result is FakeHighlightData + fake_ep.load.assert_called_once() + + def test_get_caches_after_first_call(self): + """Entry points are only queried once; subsequent calls use cache.""" + fake_ep = MagicMock() + fake_ep.name = "challenge" + fake_ep.load.return_value = type("FakeChallenge", (), {}) + + with patch( + "importlib.metadata.entry_points", + return_value=[fake_ep], + ) as mock_eps: + ExecutorPluginLoader.get("challenge") + ExecutorPluginLoader.get("challenge") # second call + + # entry_points() called only once (first get triggers discovery) + mock_eps.assert_called_once() + + def test_failed_plugin_load_is_skipped(self): + """If a plugin fails to load, it's skipped without raising.""" + bad_ep = MagicMock() + bad_ep.name = "bad-plugin" + bad_ep.load.side_effect = ImportError("missing dep") + + good_ep = MagicMock() + good_ep.name = "good-plugin" + good_ep.load.return_value = type("Good", (), {}) + + with patch( + "importlib.metadata.entry_points", + return_value=[bad_ep, good_ep], + ): + assert ExecutorPluginLoader.get("good-plugin") is not None + assert ExecutorPluginLoader.get("bad-plugin") is None + + def test_discover_executors_loads_classes(self): + """Mocked cloud executor entry points are imported.""" + + class FakeTableExecutor: + pass + + fake_ep = MagicMock() + fake_ep.name = "table" + fake_ep.load.return_value = FakeTableExecutor + + with patch( + "importlib.metadata.entry_points", + return_value=[fake_ep], + ): + discovered = ExecutorPluginLoader.discover_executors() + + assert discovered == ["table"] + fake_ep.load.assert_called_once() + + def test_discover_executors_skips_failures(self): + """Failed executor loads are skipped, successful ones returned.""" + bad_ep = MagicMock() + bad_ep.name = "broken" + bad_ep.load.side_effect = ImportError("nope") + + good_ep = MagicMock() + good_ep.name = "smart_table" + good_ep.load.return_value = type("FakeSmartTable", (), {}) + + with patch( + "importlib.metadata.entry_points", + return_value=[bad_ep, good_ep], + ): + discovered = ExecutorPluginLoader.discover_executors() + + assert discovered == ["smart_table"] + + +# ── 4. text_processor ─────────────────────────────────────────────── + + +class TestTextProcessor: + def test_single_line(self): + result = add_hex_line_numbers("hello") + assert result == "0x0: hello" + + def test_multiple_lines(self): + result = add_hex_line_numbers("a\nb\nc") + assert result == "0x0: a\n0x1: b\n0x2: c" + + def test_empty_string(self): + result = add_hex_line_numbers("") + assert result == "0x0: " + + def test_hex_width_grows(self): + # 17 lines → hex needs 2 digits (0x10 = 16) + text = "\n".join(f"line{i}" for i in range(17)) + result = add_hex_line_numbers(text) + lines = result.split("\n") + assert lines[0].startswith("0x00: ") + assert lines[16].startswith("0x10: ") + + +# ── 5. Queue-per-executor routing ─────────────────────────────────── + + +class TestQueuePerExecutor: + def test_get_queue_legacy(self): + assert ExecutionDispatcher._get_queue("legacy") == "celery_executor_legacy" + + def test_get_queue_table(self): + assert ExecutionDispatcher._get_queue("table") == "celery_executor_table" + + def test_get_queue_smart_table(self): + assert ( + ExecutionDispatcher._get_queue("smart_table") + == "celery_executor_smart_table" + ) + + def test_get_queue_simple_prompt_studio(self): + assert ( + ExecutionDispatcher._get_queue("simple_prompt_studio") + == "celery_executor_simple_prompt_studio" + ) + + def test_get_queue_agentic(self): + assert ExecutionDispatcher._get_queue("agentic") == "celery_executor_agentic" + + def test_get_queue_arbitrary_name(self): + """Any executor_name works — no whitelist.""" + assert ( + ExecutionDispatcher._get_queue("my_custom") + == "celery_executor_my_custom" + ) + + def test_queue_name_enum_matches_dispatcher(self): + """QueueName.EXECUTOR matches what dispatcher generates for 'legacy'.""" + from shared.enums.worker_enums import QueueName + + assert QueueName.EXECUTOR.value == ExecutionDispatcher._get_queue("legacy") + + +# ── 6. Protocol classes importable ────────────────────────────────── + + +class TestProtocols: + def test_highlight_data_protocol_importable(self): + from executor.executors.plugins.protocols import HighlightDataProtocol + + assert HighlightDataProtocol is not None + + def test_challenge_protocol_importable(self): + from executor.executors.plugins.protocols import ChallengeProtocol + + assert ChallengeProtocol is not None + + def test_evaluation_protocol_importable(self): + from executor.executors.plugins.protocols import EvaluationProtocol + + assert EvaluationProtocol is not None + + def test_runtime_checkable(self): + """Protocols are @runtime_checkable — isinstance checks work.""" + from executor.executors.plugins.protocols import ChallengeProtocol + + class FakeChallenge: + def run(self): + pass # Minimal stub to satisfy ChallengeProtocol for isinstance check + + assert isinstance(FakeChallenge(), ChallengeProtocol) + + +# ── 7. executors/__init__.py triggers discovery ───────────────────── + + +class TestExecutorsInit: + def test_cloud_executors_list_exists(self): + """executors.__init__ populates _cloud_executors (empty in OSS).""" + import executor.executors as mod + + assert hasattr(mod, "_cloud_executors") + # In pure OSS, no cloud executors are installed + assert isinstance(mod._cloud_executors, list) diff --git a/workers/tests/test_sanity_phase6c.py b/workers/tests/test_sanity_phase6c.py new file mode 100644 index 0000000000..54388f6fee --- /dev/null +++ b/workers/tests/test_sanity_phase6c.py @@ -0,0 +1,559 @@ +"""Phase 6C Sanity — Highlight data as cross-cutting plugin. + +Verifies: +1. run_completion() passes process_text to llm.complete() +2. run_completion() with process_text=None (default) works as before +3. construct_and_run_prompt() passes process_text through to run_completion() +4. _handle_answer_prompt() initializes highlight plugin when enabled + available +5. _handle_answer_prompt() skips highlight when plugin not installed +6. _handle_answer_prompt() skips highlight when enable_highlight=False +7. Highlight metadata populated when plugin provides data via process_text +""" + +from unittest.mock import MagicMock, patch + +import pytest +from executor.executors.answer_prompt import AnswerPromptService +from executor.executors.constants import PromptServiceConstants as PSKeys + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture() +def mock_llm(): + """Create a mock LLM that returns a realistic completion dict.""" + llm = MagicMock() + llm.complete.return_value = { + PSKeys.RESPONSE: MagicMock(text="42"), + PSKeys.HIGHLIGHT_DATA: [{"line": 1}], + PSKeys.CONFIDENCE_DATA: {"score": 0.95}, + PSKeys.WORD_CONFIDENCE_DATA: {"words": []}, + PSKeys.LINE_NUMBERS: [1, 2], + PSKeys.WHISPER_HASH: "abc123", + } + return llm + + +@pytest.fixture() +def mock_llm_no_highlight(): + """Create a mock LLM that returns completion without highlight data.""" + llm = MagicMock() + llm.complete.return_value = { + PSKeys.RESPONSE: MagicMock(text="answer"), + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + return llm + + +# --------------------------------------------------------------------------- +# 1. run_completion() passes process_text to llm.complete() +# --------------------------------------------------------------------------- + +class TestRunCompletionProcessText: + def test_process_text_passed_to_llm_complete(self, mock_llm): + """process_text callback is forwarded to llm.complete().""" + callback = MagicMock(name="highlight_run") + AnswerPromptService.run_completion( + llm=mock_llm, + prompt="test prompt", + process_text=callback, + ) + mock_llm.complete.assert_called_once() + call_kwargs = mock_llm.complete.call_args + assert call_kwargs.kwargs.get("process_text") is callback or \ + call_kwargs[1].get("process_text") is callback + + def test_process_text_none_by_default(self, mock_llm): + """When process_text not provided, None is passed to llm.complete().""" + AnswerPromptService.run_completion( + llm=mock_llm, + prompt="test prompt", + ) + call_kwargs = mock_llm.complete.call_args + # Check both positional and keyword args + pt = call_kwargs.kwargs.get("process_text", "MISSING") + if pt == "MISSING": + # Might be positional + pt = call_kwargs[1].get("process_text") + assert pt is None + + def test_process_text_none_explicit(self, mock_llm): + """Explicit process_text=None works as before.""" + answer = AnswerPromptService.run_completion( + llm=mock_llm, + prompt="test prompt", + process_text=None, + ) + assert answer == "42" + + +# --------------------------------------------------------------------------- +# 2. run_completion() populates metadata from completion dict +# --------------------------------------------------------------------------- + +class TestRunCompletionMetadata: + def test_highlight_metadata_populated_with_process_text(self, mock_llm): + """When process_text is provided and LLM returns highlight data, + metadata is populated correctly.""" + callback = MagicMock(name="highlight_run") + metadata: dict = {} + AnswerPromptService.run_completion( + llm=mock_llm, + prompt="test", + metadata=metadata, + prompt_key="field1", + enable_highlight=True, + enable_word_confidence=True, + process_text=callback, + ) + assert metadata[PSKeys.HIGHLIGHT_DATA]["field1"] == [{"line": 1}] + assert metadata[PSKeys.CONFIDENCE_DATA]["field1"] == {"score": 0.95} + assert metadata[PSKeys.WORD_CONFIDENCE_DATA]["field1"] == {"words": []} + assert metadata[PSKeys.LINE_NUMBERS]["field1"] == [1, 2] + assert metadata[PSKeys.WHISPER_HASH] == "abc123" + + def test_highlight_metadata_empty_without_process_text( + self, mock_llm_no_highlight + ): + """Without process_text, highlight data is empty but no error.""" + metadata: dict = {} + AnswerPromptService.run_completion( + llm=mock_llm_no_highlight, + prompt="test", + metadata=metadata, + prompt_key="field1", + enable_highlight=True, + process_text=None, + ) + assert metadata[PSKeys.HIGHLIGHT_DATA]["field1"] == [] + assert metadata[PSKeys.LINE_NUMBERS]["field1"] == [] + + +# --------------------------------------------------------------------------- +# 3. construct_and_run_prompt() passes process_text through +# --------------------------------------------------------------------------- + +class TestConstructAndRunPromptProcessText: + def test_process_text_forwarded(self, mock_llm): + """construct_and_run_prompt passes process_text to run_completion.""" + callback = MagicMock(name="highlight_run") + tool_settings = { + PSKeys.PREAMBLE: "", + PSKeys.POSTAMBLE: "", + PSKeys.GRAMMAR: [], + PSKeys.ENABLE_HIGHLIGHT: True, + } + output = { + PSKeys.NAME: "field1", + PSKeys.PROMPT: "What is the value?", + PSKeys.PROMPTX: "What is the value?", + PSKeys.TYPE: PSKeys.TEXT, + } + answer = AnswerPromptService.construct_and_run_prompt( + tool_settings=tool_settings, + output=output, + llm=mock_llm, + context="some context", + prompt=PSKeys.PROMPTX, + metadata={}, + process_text=callback, + ) + # Verify callback was passed to llm.complete + call_kwargs = mock_llm.complete.call_args + pt = call_kwargs.kwargs.get("process_text") + if pt is None: + pt = call_kwargs[1].get("process_text") + assert pt is callback + assert answer == "42" + + def test_process_text_none_default(self, mock_llm): + """construct_and_run_prompt defaults process_text to None.""" + tool_settings = { + PSKeys.PREAMBLE: "", + PSKeys.POSTAMBLE: "", + PSKeys.GRAMMAR: [], + } + output = { + PSKeys.NAME: "field1", + PSKeys.PROMPT: "What?", + PSKeys.PROMPTX: "What?", + PSKeys.TYPE: PSKeys.TEXT, + } + AnswerPromptService.construct_and_run_prompt( + tool_settings=tool_settings, + output=output, + llm=mock_llm, + context="ctx", + prompt=PSKeys.PROMPTX, + metadata={}, + ) + call_kwargs = mock_llm.complete.call_args + pt = call_kwargs.kwargs.get("process_text") + if pt is None and "process_text" not in (call_kwargs.kwargs or {}): + pt = call_kwargs[1].get("process_text") + assert pt is None + + +# --------------------------------------------------------------------------- +# 4. _handle_answer_prompt() initializes highlight plugin +# --------------------------------------------------------------------------- + +class TestHandleAnswerPromptHighlight: + """Test highlight plugin integration in LegacyExecutor._handle_answer_prompt.""" + + def _make_context(self, enable_highlight=False): + """Build a minimal ExecutionContext for answer_prompt.""" + from unstract.sdk1.execution.context import ExecutionContext + + prompt_output = { + PSKeys.NAME: "field1", + PSKeys.PROMPT: "What is X?", + PSKeys.PROMPTX: "What is X?", + PSKeys.TYPE: PSKeys.TEXT, + PSKeys.CHUNK_SIZE: 0, + PSKeys.CHUNK_OVERLAP: 0, + PSKeys.LLM: "llm-123", + PSKeys.EMBEDDING: "emb-123", + PSKeys.VECTOR_DB: "vdb-123", + PSKeys.X2TEXT_ADAPTER: "x2t-123", + PSKeys.RETRIEVAL_STRATEGY: "simple", + } + return ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id="run-001", + execution_source="ide", + organization_id="org-1", + executor_params={ + PSKeys.TOOL_SETTINGS: { + PSKeys.PREAMBLE: "", + PSKeys.POSTAMBLE: "", + PSKeys.GRAMMAR: [], + PSKeys.ENABLE_HIGHLIGHT: enable_highlight, + }, + PSKeys.OUTPUTS: [prompt_output], + PSKeys.TOOL_ID: "tool-1", + PSKeys.FILE_HASH: "hash123", + PSKeys.FILE_PATH: "/data/doc.txt", + PSKeys.FILE_NAME: "doc.txt", + PSKeys.PLATFORM_SERVICE_API_KEY: "key-123", + }, + ) + + def _get_executor(self): + from executor.executors.legacy_executor import LegacyExecutor + from unstract.sdk1.execution.registry import ExecutorRegistry + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + return ExecutorRegistry.get("legacy") + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_highlight_plugin_initialized_when_enabled( + self, mock_index_key, mock_shim_cls + ): + """When enable_highlight=True and plugin available, highlight is used.""" + mock_shim_cls.return_value = MagicMock() + + # Mock highlight plugin + mock_highlight_cls = MagicMock() + mock_highlight_instance = MagicMock() + mock_highlight_cls.return_value = mock_highlight_instance + + # Mock LLM + mock_llm = MagicMock() + mock_llm.complete.return_value = { + PSKeys.RESPONSE: MagicMock(text="result"), + PSKeys.HIGHLIGHT_DATA: [{"line": 5}], + PSKeys.CONFIDENCE_DATA: {"score": 0.9}, + PSKeys.LINE_NUMBERS: [5], + PSKeys.WHISPER_HASH: "hash1", + } + mock_llm.get_usage_reason.return_value = "extraction" + mock_llm.get_metrics.return_value = {} + + mock_fs = MagicMock() + mock_llm_cls = MagicMock(return_value=mock_llm) + + executor = self._get_executor() + ctx = self._make_context(enable_highlight=True) + + with ( + patch.object( + executor, "_get_prompt_deps", + return_value=( + AnswerPromptService, + MagicMock( + retrieve_complete_context=MagicMock( + return_value=["context chunk"] + ) + ), + MagicMock( + is_variables_present=MagicMock(return_value=False) + ), + None, # Index + mock_llm_cls, + MagicMock(), # EmbeddingCompat + MagicMock(), # VectorDB + ), + ), + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + return_value=mock_highlight_cls, + ), + patch( + "executor.executors.file_utils.FileUtils.get_fs_instance", + return_value=mock_fs, + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # Verify highlight plugin was instantiated with correct args + mock_highlight_cls.assert_called_once_with( + file_path="/data/doc.txt", + fs_instance=mock_fs, + enable_word_confidence=False, + ) + # Verify process_text was the highlight instance's run method + llm_complete_call = mock_llm.complete.call_args + assert llm_complete_call.kwargs.get("process_text") is \ + mock_highlight_instance.run + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_highlight_skipped_when_plugin_not_installed( + self, mock_index_key, mock_shim_cls + ): + """When enable_highlight=True but plugin not installed, process_text=None.""" + mock_shim = MagicMock() + mock_shim_cls.return_value = mock_shim + + mock_llm = MagicMock() + mock_llm.complete.return_value = { + PSKeys.RESPONSE: MagicMock(text="result"), + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + mock_llm.get_usage_reason.return_value = "extraction" + mock_llm.get_metrics.return_value = {} + + executor = self._get_executor() + ctx = self._make_context(enable_highlight=True) + + mock_llm_cls = MagicMock(return_value=mock_llm) + with ( + patch.object( + executor, "_get_prompt_deps", + return_value=( + AnswerPromptService, + MagicMock( + retrieve_complete_context=MagicMock( + return_value=["chunk"] + ) + ), + MagicMock( + is_variables_present=MagicMock(return_value=False) + ), + None, + mock_llm_cls, + MagicMock(), + MagicMock(), + ), + ), + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + return_value=None, # Plugin not installed + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # process_text should be None since plugin not available + llm_complete_call = mock_llm.complete.call_args + assert llm_complete_call.kwargs.get("process_text") is None + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_highlight_skipped_when_disabled( + self, mock_index_key, mock_shim_cls + ): + """When enable_highlight=False, plugin loader is not even called.""" + mock_shim = MagicMock() + mock_shim_cls.return_value = mock_shim + + mock_llm = MagicMock() + mock_llm.complete.return_value = { + PSKeys.RESPONSE: MagicMock(text="result"), + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + mock_llm.get_usage_reason.return_value = "extraction" + mock_llm.get_metrics.return_value = {} + + executor = self._get_executor() + ctx = self._make_context(enable_highlight=False) + + mock_llm_cls = MagicMock(return_value=mock_llm) + with ( + patch.object( + executor, "_get_prompt_deps", + return_value=( + AnswerPromptService, + MagicMock( + retrieve_complete_context=MagicMock( + return_value=["chunk"] + ) + ), + MagicMock( + is_variables_present=MagicMock(return_value=False) + ), + None, + mock_llm_cls, + MagicMock(), + MagicMock(), + ), + ), + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + ) as mock_plugin_get, + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # Plugin loader should NOT have been called + mock_plugin_get.assert_not_called() + # process_text should be None + llm_complete_call = mock_llm.complete.call_args + assert llm_complete_call.kwargs.get("process_text") is None + + +# --------------------------------------------------------------------------- +# 5. Multiple prompts share same highlight instance +# --------------------------------------------------------------------------- + +class TestHighlightMultiplePrompts: + """Verify that one highlight instance is shared across all prompts.""" + + def _make_multi_prompt_context(self): + from unstract.sdk1.execution.context import ExecutionContext + + prompts = [] + for name in ["field1", "field2", "field3"]: + prompts.append({ + PSKeys.NAME: name, + PSKeys.PROMPT: f"What is {name}?", + PSKeys.PROMPTX: f"What is {name}?", + PSKeys.TYPE: PSKeys.TEXT, + PSKeys.CHUNK_SIZE: 0, + PSKeys.CHUNK_OVERLAP: 0, + PSKeys.LLM: "llm-123", + PSKeys.EMBEDDING: "emb-123", + PSKeys.VECTOR_DB: "vdb-123", + PSKeys.X2TEXT_ADAPTER: "x2t-123", + PSKeys.RETRIEVAL_STRATEGY: "simple", + }) + return ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id="run-002", + execution_source="tool", + organization_id="org-1", + executor_params={ + PSKeys.TOOL_SETTINGS: { + PSKeys.PREAMBLE: "", + PSKeys.POSTAMBLE: "", + PSKeys.GRAMMAR: [], + PSKeys.ENABLE_HIGHLIGHT: True, + }, + PSKeys.OUTPUTS: prompts, + PSKeys.TOOL_ID: "tool-1", + PSKeys.FILE_HASH: "hash123", + PSKeys.FILE_PATH: "/data/doc.txt", + PSKeys.FILE_NAME: "doc.txt", + PSKeys.PLATFORM_SERVICE_API_KEY: "key-123", + }, + ) + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_single_highlight_instance_for_all_prompts( + self, mock_index_key, mock_shim_cls + ): + """One highlight instance is created and reused for all prompts.""" + mock_shim_cls.return_value = MagicMock() + + mock_highlight_cls = MagicMock() + mock_highlight_instance = MagicMock() + mock_highlight_cls.return_value = mock_highlight_instance + + mock_llm = MagicMock() + mock_llm.complete.return_value = { + PSKeys.RESPONSE: MagicMock(text="val"), + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + mock_llm.get_usage_reason.return_value = "extraction" + mock_llm.get_metrics.return_value = {} + + from executor.executors.legacy_executor import LegacyExecutor + from unstract.sdk1.execution.registry import ExecutorRegistry + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + executor = ExecutorRegistry.get("legacy") + ctx = self._make_multi_prompt_context() + + mock_llm_cls = MagicMock(return_value=mock_llm) + with ( + patch.object( + executor, "_get_prompt_deps", + return_value=( + AnswerPromptService, + MagicMock( + retrieve_complete_context=MagicMock( + return_value=["chunk"] + ) + ), + MagicMock( + is_variables_present=MagicMock(return_value=False) + ), + None, + mock_llm_cls, + MagicMock(), + MagicMock(), + ), + ), + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + return_value=mock_highlight_cls, + ), + patch( + "executor.executors.file_utils.FileUtils.get_fs_instance", + return_value=MagicMock(), + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # highlight_cls should be instantiated exactly ONCE + assert mock_highlight_cls.call_count == 1 + # llm.complete should be called 3 times (once per prompt) + assert mock_llm.complete.call_count == 3 + # Each call should use the same process_text + for c in mock_llm.complete.call_args_list: + assert c.kwargs.get("process_text") is mock_highlight_instance.run diff --git a/workers/tests/test_sanity_phase6d.py b/workers/tests/test_sanity_phase6d.py new file mode 100644 index 0000000000..cd40c1b685 --- /dev/null +++ b/workers/tests/test_sanity_phase6d.py @@ -0,0 +1,553 @@ +"""Phase 6D Sanity — LegacyExecutor plugin integration. + +Verifies: +1. TABLE type raises LegacyExecutorError with routing guidance +2. LINE_ITEM type raises LegacyExecutorError (not supported) +3. Challenge plugin invoked when enable_challenge=True + plugin installed +4. Challenge skipped when plugin not installed (graceful degradation) +5. Challenge skipped when enable_challenge=False +6. Challenge skipped when challenge_llm not configured +7. Evaluation plugin invoked when eval_settings.evaluate=True + plugin installed +8. Evaluation skipped when plugin not installed +9. Evaluation skipped when eval_settings.evaluate=False +10. Challenge runs before evaluation (order matters) +11. Challenge mutates structured_output (via mock) +""" + +from unittest.mock import MagicMock, patch + +import pytest +from executor.executors.answer_prompt import AnswerPromptService +from executor.executors.constants import PromptServiceConstants as PSKeys +from executor.executors.exceptions import LegacyExecutorError +from unstract.sdk1.execution.result import ExecutionResult + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_context( + output_type="TEXT", + enable_highlight=False, + enable_challenge=False, + challenge_llm="", + eval_settings=None, +): + """Build a minimal ExecutionContext for answer_prompt tests.""" + from unstract.sdk1.execution.context import ExecutionContext + + prompt_output = { + PSKeys.NAME: "field1", + PSKeys.PROMPT: "What is X?", + PSKeys.PROMPTX: "What is X?", + PSKeys.TYPE: output_type, + PSKeys.CHUNK_SIZE: 0, + PSKeys.CHUNK_OVERLAP: 0, + PSKeys.LLM: "llm-123", + PSKeys.EMBEDDING: "emb-123", + PSKeys.VECTOR_DB: "vdb-123", + PSKeys.X2TEXT_ADAPTER: "x2t-123", + PSKeys.RETRIEVAL_STRATEGY: "simple", + } + if eval_settings: + prompt_output[PSKeys.EVAL_SETTINGS] = eval_settings + + tool_settings = { + PSKeys.PREAMBLE: "", + PSKeys.POSTAMBLE: "", + PSKeys.GRAMMAR: [], + PSKeys.ENABLE_HIGHLIGHT: enable_highlight, + PSKeys.ENABLE_CHALLENGE: enable_challenge, + } + if challenge_llm: + tool_settings[PSKeys.CHALLENGE_LLM] = challenge_llm + + return ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id="run-001", + execution_source="ide", + organization_id="org-1", + executor_params={ + PSKeys.TOOL_SETTINGS: tool_settings, + PSKeys.OUTPUTS: [prompt_output], + PSKeys.TOOL_ID: "tool-1", + PSKeys.FILE_HASH: "hash123", + PSKeys.FILE_PATH: "/data/doc.txt", + PSKeys.FILE_NAME: "doc.txt", + PSKeys.PLATFORM_SERVICE_API_KEY: "key-123", + }, + ) + + +def _get_executor(): + from executor.executors.legacy_executor import LegacyExecutor + from unstract.sdk1.execution.registry import ExecutorRegistry + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + return ExecutorRegistry.get("legacy") + + +def _mock_llm(): + """Create a mock LLM that returns a realistic completion dict.""" + llm = MagicMock() + llm.complete.return_value = { + PSKeys.RESPONSE: MagicMock(text="42"), + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + llm.get_usage_reason.return_value = "extraction" + llm.get_metrics.return_value = {} + return llm + + +def _standard_patches(executor, mock_llm_instance): + """Return common patches for _handle_answer_prompt tests.""" + mock_llm_cls = MagicMock(return_value=mock_llm_instance) + return { + "_get_prompt_deps": patch.object( + executor, "_get_prompt_deps", + return_value=( + AnswerPromptService, + MagicMock( + retrieve_complete_context=MagicMock( + return_value=["context chunk"] + ) + ), + MagicMock( + is_variables_present=MagicMock(return_value=False) + ), + None, # Index + mock_llm_cls, + MagicMock(), # EmbeddingCompat + MagicMock(), # VectorDB + ), + ), + "shim": patch( + "executor.executors.legacy_executor.ExecutorToolShim", + return_value=MagicMock(), + ), + "index_key": patch( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1", + ), + } + + +# --------------------------------------------------------------------------- +# 1. TABLE type raises with routing guidance +# --------------------------------------------------------------------------- + +class TestTableLineItemGuard: + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_table_type_delegates_to_table_executor( + self, mock_key, mock_shim_cls + ): + """TABLE prompts are delegated to TableExtractorExecutor in-process.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context(output_type=PSKeys.TABLE) # "table" + llm = _mock_llm() + patches = _standard_patches(executor, llm) + + mock_table_executor = MagicMock() + mock_table_executor.execute.return_value = ExecutionResult( + success=True, + data={"output": {"table_data": "extracted"}, "metadata": {"metrics": {}}}, + ) + + with patches["_get_prompt_deps"], patches["shim"], patches["index_key"]: + with patch( + "unstract.sdk1.execution.registry.ExecutorRegistry.get", + return_value=mock_table_executor, + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + assert result.data["output"]["field1"] == {"table_data": "extracted"} + mock_table_executor.execute.assert_called_once() + # Verify the sub-context was built with table executor params + sub_ctx = mock_table_executor.execute.call_args[0][0] + assert sub_ctx.executor_name == "table" + assert sub_ctx.operation == "table_extract" + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_table_type_raises_when_plugin_missing( + self, mock_key, mock_shim_cls + ): + """TABLE prompts raise error when table executor plugin is not installed.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context(output_type=PSKeys.TABLE) # "table" + llm = _mock_llm() + patches = _standard_patches(executor, llm) + + with patches["_get_prompt_deps"], patches["shim"], patches["index_key"]: + with patch( + "unstract.sdk1.execution.registry.ExecutorRegistry.get", + side_effect=KeyError("No executor registered with name 'table'"), + ): + with pytest.raises(LegacyExecutorError, match="table executor plugin"): + executor._handle_answer_prompt(ctx) + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_line_item_type_raises_not_supported( + self, mock_key, mock_shim_cls + ): + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context(output_type=PSKeys.LINE_ITEM) # "line-item" + llm = _mock_llm() + patches = _standard_patches(executor, llm) + + with patches["_get_prompt_deps"], patches["shim"], patches["index_key"]: + with pytest.raises(LegacyExecutorError, match="not supported"): + executor._handle_answer_prompt(ctx) + + +# --------------------------------------------------------------------------- +# 2. Challenge plugin integration +# --------------------------------------------------------------------------- + +class TestChallengeIntegration: + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_challenge_invoked_when_enabled_and_installed( + self, mock_key, mock_shim_cls + ): + """Challenge plugin is instantiated and run() called.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context(enable_challenge=True, challenge_llm="ch-llm-1") + llm = _mock_llm() + mock_challenge_cls = MagicMock() + mock_challenger = MagicMock() + mock_challenge_cls.return_value = mock_challenger + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + side_effect=lambda name: ( + mock_challenge_cls if name == "challenge" else None + ), + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # Challenge class was instantiated with correct args + mock_challenge_cls.assert_called_once() + init_kwargs = mock_challenge_cls.call_args.kwargs + assert init_kwargs["run_id"] == "run-001" + assert init_kwargs["platform_key"] == "key-123" + assert init_kwargs["llm"] is llm + # run() was called + mock_challenger.run.assert_called_once() + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_challenge_skipped_when_plugin_not_installed( + self, mock_key, mock_shim_cls + ): + """When challenge enabled but plugin missing, no error.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context(enable_challenge=True, challenge_llm="ch-llm-1") + llm = _mock_llm() + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + return_value=None, + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_challenge_skipped_when_disabled( + self, mock_key, mock_shim_cls + ): + """When enable_challenge=False, plugin loader not called for challenge.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context(enable_challenge=False) + llm = _mock_llm() + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + ) as mock_get, + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # Plugin loader should NOT have been called for "challenge" + for c in mock_get.call_args_list: + assert c.args[0] != "challenge", ( + "ExecutorPluginLoader.get('challenge') should not be called" + ) + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_challenge_skipped_when_no_challenge_llm( + self, mock_key, mock_shim_cls + ): + """When enable_challenge=True but no challenge_llm, skip challenge.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + # enable_challenge=True but challenge_llm="" (empty) + ctx = _make_context(enable_challenge=True, challenge_llm="") + llm = _mock_llm() + mock_challenge_cls = MagicMock() + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + return_value=mock_challenge_cls, + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # Challenge class should NOT be instantiated (no LLM ID) + mock_challenge_cls.assert_not_called() + + +# --------------------------------------------------------------------------- +# 3. Evaluation plugin integration +# --------------------------------------------------------------------------- + +class TestEvaluationIntegration: + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_evaluation_invoked_when_enabled_and_installed( + self, mock_key, mock_shim_cls + ): + """Evaluation plugin is instantiated and run() called.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context( + eval_settings={PSKeys.EVAL_SETTINGS_EVALUATE: True} + ) + llm = _mock_llm() + mock_eval_cls = MagicMock() + mock_evaluator = MagicMock() + mock_eval_cls.return_value = mock_evaluator + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + side_effect=lambda name: ( + mock_eval_cls if name == "evaluation" else None + ), + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + mock_eval_cls.assert_called_once() + init_kwargs = mock_eval_cls.call_args.kwargs + assert init_kwargs["platform_key"] == "key-123" + assert init_kwargs["response"] == "42" # from mock LLM + mock_evaluator.run.assert_called_once() + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_evaluation_skipped_when_plugin_not_installed( + self, mock_key, mock_shim_cls + ): + """When evaluation enabled but plugin missing, no error.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context( + eval_settings={PSKeys.EVAL_SETTINGS_EVALUATE: True} + ) + llm = _mock_llm() + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + return_value=None, + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_evaluation_skipped_when_not_enabled( + self, mock_key, mock_shim_cls + ): + """When no eval_settings or evaluate=False, evaluation skipped.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + # No eval_settings at all + ctx = _make_context() + llm = _mock_llm() + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + ) as mock_get, + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # Plugin loader should NOT have been called for "evaluation" + for c in mock_get.call_args_list: + assert c.args[0] != "evaluation", ( + "ExecutorPluginLoader.get('evaluation') should not be called" + ) + + +# --------------------------------------------------------------------------- +# 4. Challenge runs before evaluation (ordering) +# --------------------------------------------------------------------------- + +class TestChallengeBeforeEvaluation: + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_challenge_runs_before_evaluation( + self, mock_key, mock_shim_cls + ): + """Challenge mutates structured_output before evaluation reads it.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context( + enable_challenge=True, + challenge_llm="ch-llm-1", + eval_settings={PSKeys.EVAL_SETTINGS_EVALUATE: True}, + ) + llm = _mock_llm() + + # Track call order + call_order = [] + + mock_challenge_cls = MagicMock() + mock_challenger = MagicMock() + mock_challenger.run.side_effect = lambda: call_order.append("challenge") + mock_challenge_cls.return_value = mock_challenger + + mock_eval_cls = MagicMock() + mock_evaluator = MagicMock() + mock_evaluator.run.side_effect = lambda: call_order.append("evaluation") + mock_eval_cls.return_value = mock_evaluator + + def plugin_get(name): + if name == "challenge": + return mock_challenge_cls + if name == "evaluation": + return mock_eval_cls + return None + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + side_effect=plugin_get, + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + assert call_order == ["challenge", "evaluation"] + + +# --------------------------------------------------------------------------- +# 5. Challenge mutates structured_output +# --------------------------------------------------------------------------- + +class TestChallengeMutation: + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_challenge_mutates_structured_output( + self, mock_key, mock_shim_cls + ): + """Challenge plugin can mutate structured_output dict.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context(enable_challenge=True, challenge_llm="ch-llm-1") + llm = _mock_llm() + + def challenge_run_side_effect(): + # Simulate challenge replacing the answer with improved version + # Access the structured_output passed to constructor + so = mock_challenge_cls.call_args.kwargs["structured_output"] + so["field1"] = "improved_42" + + mock_challenge_cls = MagicMock() + mock_challenger = MagicMock() + mock_challenger.run.side_effect = challenge_run_side_effect + mock_challenge_cls.return_value = mock_challenger + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + side_effect=lambda name: ( + mock_challenge_cls if name == "challenge" else None + ), + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # The structured_output should contain the mutated value + assert result.data[PSKeys.OUTPUT]["field1"] == "improved_42" diff --git a/workers/tests/test_sanity_phase6e.py b/workers/tests/test_sanity_phase6e.py new file mode 100644 index 0000000000..302540b666 --- /dev/null +++ b/workers/tests/test_sanity_phase6e.py @@ -0,0 +1,215 @@ +"""Phase 6E Sanity — TableExtractorExecutor + TABLE_EXTRACT operation. + +Verifies: +1. Operation.TABLE_EXTRACT enum exists with value "table_extract" +2. tasks.py log_component builder handles table_extract operation +3. TableExtractorExecutor mock — registration via entry point +4. TableExtractorExecutor mock — dispatch to correct queue +5. LegacyExecutor excludes table_extract from its _OPERATION_MAP +6. Cloud executor entry point name matches pyproject.toml +""" + +from unittest.mock import MagicMock + + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +# --------------------------------------------------------------------------- +# 1. Operation enum +# --------------------------------------------------------------------------- + +class TestTableExtractOperation: + def test_table_extract_enum_exists(self): + assert hasattr(Operation, "TABLE_EXTRACT") + assert Operation.TABLE_EXTRACT.value == "table_extract" + + def test_table_extract_in_operation_values(self): + values = {op.value for op in Operation} + assert "table_extract" in values + + +# --------------------------------------------------------------------------- +# 2. tasks.py log_component for table_extract +# --------------------------------------------------------------------------- + +class TestTasksLogComponent: + def test_table_extract_log_component(self): + """tasks.py builds correct log_component for table_extract.""" + + # Build a mock context dict + ctx_dict = { + "executor_name": "table", + "operation": "table_extract", + "run_id": "run-001", + "execution_source": "tool", + "organization_id": "org-1", + "executor_params": { + "tool_id": "tool-1", + "file_name": "invoice.pdf", + }, + "request_id": "req-1", + "log_events_id": "evt-1", + } + + # We just need to verify the log_component is built correctly. + # Deserialize the context and check the branch. + context = ExecutionContext.from_dict(ctx_dict) + params = context.executor_params + + # Simulate the tasks.py logic + if context.log_events_id: + if context.operation == "table_extract": + component = { + "tool_id": params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": context.operation, + } + assert component == { + "tool_id": "tool-1", + "run_id": "run-001", + "doc_name": "invoice.pdf", + "operation": "table_extract", + } + + +# --------------------------------------------------------------------------- +# 3. Mock TableExtractorExecutor — entry point registration +# --------------------------------------------------------------------------- + +class TestTableExtractorExecutorRegistration: + def test_mock_table_executor_discovered_via_entry_point(self): + """Simulate cloud executor discovery via entry point.""" + from unstract.sdk1.execution.executor import BaseExecutor + + # Create a mock TableExtractorExecutor + @ExecutorRegistry.register + class MockTableExtractorExecutor(BaseExecutor): + @property + def name(self) -> str: + return "table" + + def execute(self, context): + if context.operation != "table_extract": + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return ExecutionResult( + success=True, + data={"output": "table_data", "metadata": {}}, + ) + + try: + # Verify it was registered + assert "table" in ExecutorRegistry.list_executors() + executor = ExecutorRegistry.get("table") + assert executor.name == "table" + + # Verify it handles table_extract + ctx = ExecutionContext( + executor_name="table", + operation="table_extract", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert result.success + assert result.data["output"] == "table_data" + + # Verify it rejects unsupported operations + ctx2 = ExecutionContext( + executor_name="table", + operation="answer_prompt", + run_id="run-2", + execution_source="tool", + executor_params={}, + ) + result2 = executor.execute(ctx2) + assert not result2.success + finally: + # Cleanup + ExecutorRegistry.clear() + + +# --------------------------------------------------------------------------- +# 4. Queue routing for table executor +# --------------------------------------------------------------------------- + +class TestTableQueueRouting: + def test_table_executor_routes_to_correct_queue(self): + """executor_name='table' routes to celery_executor_table queue.""" + queue = ExecutionDispatcher._get_queue("table") + assert queue == "celery_executor_table" + + def test_dispatch_sends_to_table_queue(self): + """ExecutionDispatcher sends table_extract to correct queue.""" + mock_app = MagicMock() + mock_result = MagicMock() + mock_result.get.return_value = ExecutionResult( + success=True, data={"output": "ok"} + ).to_dict() + mock_app.send_task.return_value = mock_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="table", + operation="table_extract", + run_id="run-1", + execution_source="tool", + executor_params={"table_settings": {}}, + ) + dispatcher.dispatch(ctx) + + mock_app.send_task.assert_called_once() + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs.get("queue") == "celery_executor_table" + + +# --------------------------------------------------------------------------- +# 5. LegacyExecutor does NOT handle table_extract +# --------------------------------------------------------------------------- + +class TestLegacyExcludesTable: + def test_table_extract_not_in_legacy_operation_map(self): + """LegacyExecutor._OPERATION_MAP should NOT contain table_extract.""" + from executor.executors.legacy_executor import LegacyExecutor + + assert "table_extract" not in LegacyExecutor._OPERATION_MAP + + def test_legacy_returns_failure_for_table_extract(self): + """LegacyExecutor.execute() returns failure for table_extract.""" + from executor.executors.legacy_executor import LegacyExecutor + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="table_extract", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert not result.success + assert "does not support" in result.error + + +# --------------------------------------------------------------------------- +# 6. Entry point name verification +# --------------------------------------------------------------------------- + +class TestEntryPointConfig: + def test_entry_point_name_is_table(self): + """The pyproject.toml entry point name should be 'table'.""" + # This is a documentation/verification test — the entry point + # in pyproject.toml maps 'table' to TableExtractorExecutor. + # Verify the queue name matches. + assert ExecutionDispatcher._get_queue("table") == "celery_executor_table" diff --git a/workers/tests/test_sanity_phase6f.py b/workers/tests/test_sanity_phase6f.py new file mode 100644 index 0000000000..cf565e692f --- /dev/null +++ b/workers/tests/test_sanity_phase6f.py @@ -0,0 +1,191 @@ +"""Phase 6F Sanity — SmartTableExtractorExecutor + SMART_TABLE_EXTRACT operation. + +Verifies: +1. Operation.SMART_TABLE_EXTRACT enum exists with value "smart_table_extract" +2. tasks.py log_component builder handles smart_table_extract operation +3. Mock SmartTableExtractorExecutor — registration and execution +4. Queue routing: executor_name="smart_table" → celery_executor_smart_table +5. LegacyExecutor does NOT handle smart_table_extract +6. Dispatch sends to correct queue +""" + +from unittest.mock import MagicMock + + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +# --------------------------------------------------------------------------- +# 1. Operation enum +# --------------------------------------------------------------------------- + +class TestSmartTableExtractOperation: + def test_smart_table_extract_enum_exists(self): + assert hasattr(Operation, "SMART_TABLE_EXTRACT") + assert Operation.SMART_TABLE_EXTRACT.value == "smart_table_extract" + + def test_smart_table_extract_in_operation_values(self): + values = {op.value for op in Operation} + assert "smart_table_extract" in values + + +# --------------------------------------------------------------------------- +# 2. tasks.py log_component for smart_table_extract +# --------------------------------------------------------------------------- + +class TestTasksLogComponent: + def test_smart_table_extract_log_component(self): + """tasks.py handles smart_table_extract in the same branch as table_extract.""" + ctx_dict = { + "executor_name": "smart_table", + "operation": "smart_table_extract", + "run_id": "run-001", + "execution_source": "tool", + "organization_id": "org-1", + "executor_params": { + "tool_id": "tool-1", + "file_name": "data.xlsx", + }, + "request_id": "req-1", + "log_events_id": "evt-1", + } + context = ExecutionContext.from_dict(ctx_dict) + params = context.executor_params + + # Simulate the tasks.py logic — smart_table_extract shares the + # branch with table_extract + assert context.operation in ("table_extract", "smart_table_extract") + component = { + "tool_id": params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": context.operation, + } + assert component == { + "tool_id": "tool-1", + "run_id": "run-001", + "doc_name": "data.xlsx", + "operation": "smart_table_extract", + } + + +# --------------------------------------------------------------------------- +# 3. Mock SmartTableExtractorExecutor — registration and execution +# --------------------------------------------------------------------------- + +class TestSmartTableExtractorRegistration: + def test_mock_smart_table_executor_registers_and_executes(self): + """Simulate cloud executor discovery and execution.""" + @ExecutorRegistry.register + class MockSmartTableExecutor(BaseExecutor): + @property + def name(self) -> str: + return "smart_table" + + def execute(self, context): + if context.operation != "smart_table_extract": + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return ExecutionResult( + success=True, + data={ + "output": [{"col1": "val1"}], + "metadata": {"total_records": 1}, + }, + ) + + try: + assert "smart_table" in ExecutorRegistry.list_executors() + executor = ExecutorRegistry.get("smart_table") + assert executor.name == "smart_table" + + ctx = ExecutionContext( + executor_name="smart_table", + operation="smart_table_extract", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert result.success + assert result.data["output"] == [{"col1": "val1"}] + assert result.data["metadata"]["total_records"] == 1 + + # Rejects unsupported operations + ctx2 = ExecutionContext( + executor_name="smart_table", + operation="answer_prompt", + run_id="run-2", + execution_source="tool", + executor_params={}, + ) + result2 = executor.execute(ctx2) + assert not result2.success + finally: + ExecutorRegistry.clear() + + +# --------------------------------------------------------------------------- +# 4. Queue routing +# --------------------------------------------------------------------------- + +class TestSmartTableQueueRouting: + def test_smart_table_routes_to_correct_queue(self): + queue = ExecutionDispatcher._get_queue("smart_table") + assert queue == "celery_executor_smart_table" + + def test_dispatch_sends_to_smart_table_queue(self): + mock_app = MagicMock() + mock_result = MagicMock() + mock_result.get.return_value = ExecutionResult( + success=True, data={"output": "ok"} + ).to_dict() + mock_app.send_task.return_value = mock_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="smart_table", + operation="smart_table_extract", + run_id="run-1", + execution_source="tool", + executor_params={"table_settings": {}}, + ) + dispatcher.dispatch(ctx) + + mock_app.send_task.assert_called_once() + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs.get("queue") == "celery_executor_smart_table" + + +# --------------------------------------------------------------------------- +# 5. LegacyExecutor does NOT handle smart_table_extract +# --------------------------------------------------------------------------- + +class TestLegacyExcludesSmartTable: + def test_smart_table_extract_not_in_legacy_operation_map(self): + from executor.executors.legacy_executor import LegacyExecutor + assert "smart_table_extract" not in LegacyExecutor._OPERATION_MAP + + def test_legacy_returns_failure_for_smart_table_extract(self): + from executor.executors.legacy_executor import LegacyExecutor + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="smart_table_extract", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert not result.success + assert "does not support" in result.error diff --git a/workers/tests/test_sanity_phase6g.py b/workers/tests/test_sanity_phase6g.py new file mode 100644 index 0000000000..fe8da04832 --- /dev/null +++ b/workers/tests/test_sanity_phase6g.py @@ -0,0 +1,296 @@ +"""Phase 6G Sanity — SimplePromptStudioExecutor + SPS operations. + +Verifies: +1. Operation.SPS_ANSWER_PROMPT enum exists with value "sps_answer_prompt" +2. Operation.SPS_INDEX enum exists with value "sps_index" +3. Mock SimplePromptStudioExecutor — registration and execution +4. Queue routing: executor_name="simple_prompt_studio" → celery_executor_simple_prompt_studio +5. LegacyExecutor does NOT handle sps_answer_prompt or sps_index +6. Dispatch sends to correct queue +7. SimplePromptStudioExecutor rejects unsupported operations +""" + +from unittest.mock import MagicMock + + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +# --------------------------------------------------------------------------- +# 1. Operation enums +# --------------------------------------------------------------------------- + +class TestSPSOperations: + def test_sps_answer_prompt_enum_exists(self): + assert hasattr(Operation, "SPS_ANSWER_PROMPT") + assert Operation.SPS_ANSWER_PROMPT.value == "sps_answer_prompt" + + def test_sps_index_enum_exists(self): + assert hasattr(Operation, "SPS_INDEX") + assert Operation.SPS_INDEX.value == "sps_index" + + def test_sps_operations_in_operation_values(self): + values = {op.value for op in Operation} + assert "sps_answer_prompt" in values + assert "sps_index" in values + + +# --------------------------------------------------------------------------- +# 2. Mock SimplePromptStudioExecutor — registration and execution +# --------------------------------------------------------------------------- + +class TestSimplePromptStudioRegistration: + def test_mock_sps_executor_registers_and_executes(self): + """Simulate cloud executor discovery and execution.""" + @ExecutorRegistry.register + class MockSPSExecutor(BaseExecutor): + _OPERATION_MAP = { + "sps_answer_prompt": "_handle_answer_prompt", + "sps_index": "_handle_index", + } + + @property + def name(self) -> str: + return "simple_prompt_studio" + + def execute(self, context): + handler_name = self._OPERATION_MAP.get(context.operation) + if not handler_name: + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return getattr(self, handler_name)(context) + + def _handle_answer_prompt(self, context): + return ExecutionResult( + success=True, + data={ + "output": {"invoice_number": "INV-001"}, + "metadata": {}, + }, + ) + + def _handle_index(self, context): + return ExecutionResult( + success=True, + data={"output": "indexed", "metadata": {}}, + ) + + try: + assert "simple_prompt_studio" in ExecutorRegistry.list_executors() + executor = ExecutorRegistry.get("simple_prompt_studio") + assert executor.name == "simple_prompt_studio" + + # sps_answer_prompt + ctx = ExecutionContext( + executor_name="simple_prompt_studio", + operation="sps_answer_prompt", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert result.success + assert result.data["output"] == {"invoice_number": "INV-001"} + + # sps_index + ctx2 = ExecutionContext( + executor_name="simple_prompt_studio", + operation="sps_index", + run_id="run-2", + execution_source="tool", + executor_params={}, + ) + result2 = executor.execute(ctx2) + assert result2.success + assert result2.data["output"] == "indexed" + + # Rejects unsupported operations + ctx3 = ExecutionContext( + executor_name="simple_prompt_studio", + operation="extract", + run_id="run-3", + execution_source="tool", + executor_params={}, + ) + result3 = executor.execute(ctx3) + assert not result3.success + finally: + ExecutorRegistry.clear() + + +# --------------------------------------------------------------------------- +# 3. Queue routing +# --------------------------------------------------------------------------- + +class TestSPSQueueRouting: + def test_sps_routes_to_correct_queue(self): + queue = ExecutionDispatcher._get_queue("simple_prompt_studio") + assert queue == "celery_executor_simple_prompt_studio" + + def test_dispatch_sends_to_sps_queue(self): + mock_app = MagicMock() + mock_result = MagicMock() + mock_result.get.return_value = ExecutionResult( + success=True, data={"output": {"field": "value"}} + ).to_dict() + mock_app.send_task.return_value = mock_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="simple_prompt_studio", + operation="sps_answer_prompt", + run_id="run-1", + execution_source="tool", + executor_params={"tool_settings": {}, "output": {}}, + ) + dispatcher.dispatch(ctx) + + mock_app.send_task.assert_called_once() + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs.get("queue") == "celery_executor_simple_prompt_studio" + + def test_dispatch_sps_index_to_correct_queue(self, tmp_path): + mock_app = MagicMock() + mock_result = MagicMock() + mock_result.get.return_value = ExecutionResult( + success=True, data={"output": "indexed"} + ).to_dict() + mock_app.send_task.return_value = mock_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="simple_prompt_studio", + operation="sps_index", + run_id="run-1", + execution_source="tool", + executor_params={"output": {}, "file_path": str(tmp_path / "test.pdf")}, + ) + dispatcher.dispatch(ctx) + + mock_app.send_task.assert_called_once() + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs.get("queue") == "celery_executor_simple_prompt_studio" + + +# --------------------------------------------------------------------------- +# 4. LegacyExecutor does NOT handle SPS operations +# --------------------------------------------------------------------------- + +class TestLegacyExcludesSPS: + def test_sps_answer_prompt_not_in_legacy_operation_map(self): + from executor.executors.legacy_executor import LegacyExecutor + assert "sps_answer_prompt" not in LegacyExecutor._OPERATION_MAP + + def test_sps_index_not_in_legacy_operation_map(self): + from executor.executors.legacy_executor import LegacyExecutor + assert "sps_index" not in LegacyExecutor._OPERATION_MAP + + def test_legacy_returns_failure_for_sps_answer_prompt(self): + from executor.executors.legacy_executor import LegacyExecutor + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="sps_answer_prompt", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert not result.success + assert "does not support" in result.error + + def test_legacy_returns_failure_for_sps_index(self): + from executor.executors.legacy_executor import LegacyExecutor + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="sps_index", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert not result.success + assert "does not support" in result.error + + +# --------------------------------------------------------------------------- +# 5. tasks.py log_component for SPS operations +# --------------------------------------------------------------------------- + +class TestTasksLogComponent: + def test_sps_answer_prompt_uses_default_log_component(self): + """SPS operations use the default log_component branch in tasks.py.""" + ctx_dict = { + "executor_name": "simple_prompt_studio", + "operation": "sps_answer_prompt", + "run_id": "run-001", + "execution_source": "tool", + "organization_id": "org-1", + "executor_params": { + "tool_id": "tool-1", + "file_name": "invoice.pdf", + }, + "request_id": "req-1", + "log_events_id": "evt-1", + } + context = ExecutionContext.from_dict(ctx_dict) + params = context.executor_params + + # SPS operations fall through to the default branch + assert context.operation not in ("ide_index", "structure_pipeline", + "table_extract", "smart_table_extract") + component = { + "tool_id": params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": context.operation, + } + assert component == { + "tool_id": "tool-1", + "run_id": "run-001", + "doc_name": "invoice.pdf", + "operation": "sps_answer_prompt", + } + + def test_sps_index_uses_default_log_component(self): + """SPS index also uses the default log_component branch.""" + ctx_dict = { + "executor_name": "simple_prompt_studio", + "operation": "sps_index", + "run_id": "run-002", + "execution_source": "tool", + "executor_params": { + "tool_id": "tool-2", + "file_name": "contract.pdf", + }, + "request_id": "req-2", + "log_events_id": "evt-2", + } + context = ExecutionContext.from_dict(ctx_dict) + params = context.executor_params + + assert context.operation not in ("ide_index", "structure_pipeline", + "table_extract", "smart_table_extract") + component = { + "tool_id": params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": context.operation, + } + assert component["operation"] == "sps_index" diff --git a/workers/tests/test_sanity_phase6h.py b/workers/tests/test_sanity_phase6h.py new file mode 100644 index 0000000000..3b0ed2039c --- /dev/null +++ b/workers/tests/test_sanity_phase6h.py @@ -0,0 +1,267 @@ +"""Phase 6H Sanity — AgenticPromptStudioExecutor + agentic operations. + +Verifies: +1. All 8 agentic Operation enums exist +2. AGENTIC_EXTRACTION removed from Operation enum +3. Mock AgenticPromptStudioExecutor — registration and all 8 operations +4. Queue routing: executor_name="agentic" → celery_executor_agentic +5. LegacyExecutor does NOT handle any agentic operations +6. Dispatch sends to correct queue +7. Structure tool routes to agentic executor (not legacy) +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +AGENTIC_OPERATIONS = [ + "agentic_extract", + "agentic_summarize", + "agentic_uniformize", + "agentic_finalize", + "agentic_generate_prompt", + "agentic_generate_prompt_pipeline", + "agentic_compare", + "agentic_tune_field", +] + + +# --------------------------------------------------------------------------- +# 1. Operation enums +# --------------------------------------------------------------------------- + +class TestAgenticOperations: + @pytest.mark.parametrize("op", AGENTIC_OPERATIONS) + def test_agentic_operation_enum_exists(self, op): + values = {o.value for o in Operation} + assert op in values + + def test_agentic_extraction_removed(self): + """Old AGENTIC_EXTRACTION enum no longer exists.""" + assert not hasattr(Operation, "AGENTIC_EXTRACTION") + values = {o.value for o in Operation} + assert "agentic_extraction" not in values + + +# --------------------------------------------------------------------------- +# 2. Mock AgenticPromptStudioExecutor — registration and all operations +# --------------------------------------------------------------------------- + +class TestAgenticExecutorRegistration: + def test_mock_agentic_executor_registers_and_routes_all_ops(self): + """Simulate cloud executor discovery and execution of all 8 ops.""" + @ExecutorRegistry.register + class MockAgenticExecutor(BaseExecutor): + _OPERATION_MAP = {op: f"_handle_{op}" for op in AGENTIC_OPERATIONS} + + @property + def name(self) -> str: + return "agentic" + + def execute(self, context): + handler_name = self._OPERATION_MAP.get(context.operation) + if not handler_name: + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return ExecutionResult( + success=True, + data={ + "output": {"operation": context.operation}, + "metadata": {}, + }, + ) + + try: + assert "agentic" in ExecutorRegistry.list_executors() + executor = ExecutorRegistry.get("agentic") + assert executor.name == "agentic" + + # Test all 8 operations route successfully + for op in AGENTIC_OPERATIONS: + ctx = ExecutionContext( + executor_name="agentic", + operation=op, + run_id=f"run-{op}", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert result.success, f"Operation {op} failed" + assert result.data["output"]["operation"] == op + + # Rejects unsupported operations + ctx = ExecutionContext( + executor_name="agentic", + operation="answer_prompt", + run_id="run-unsupported", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert not result.success + finally: + ExecutorRegistry.clear() + + +# --------------------------------------------------------------------------- +# 3. Queue routing +# --------------------------------------------------------------------------- + +class TestAgenticQueueRouting: + def test_agentic_routes_to_correct_queue(self): + queue = ExecutionDispatcher._get_queue("agentic") + assert queue == "celery_executor_agentic" + + @pytest.mark.parametrize("op", AGENTIC_OPERATIONS) + def test_dispatch_sends_to_agentic_queue(self, op): + mock_app = MagicMock() + mock_result = MagicMock() + mock_result.get.return_value = ExecutionResult( + success=True, data={"output": {}} + ).to_dict() + mock_app.send_task.return_value = mock_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="agentic", + operation=op, + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + dispatcher.dispatch(ctx) + + mock_app.send_task.assert_called_once() + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs.get("queue") == "celery_executor_agentic" + + +# --------------------------------------------------------------------------- +# 4. LegacyExecutor does NOT handle agentic operations +# --------------------------------------------------------------------------- + +class TestLegacyExcludesAgentic: + @pytest.mark.parametrize("op", AGENTIC_OPERATIONS) + def test_agentic_op_not_in_legacy_operation_map(self, op): + from executor.executors.legacy_executor import LegacyExecutor + assert op not in LegacyExecutor._OPERATION_MAP + + def test_legacy_returns_failure_for_agentic_extract(self): + from executor.executors.legacy_executor import LegacyExecutor + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="agentic_extract", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert not result.success + assert "does not support" in result.error + + def test_legacy_returns_failure_for_agentic_summarize(self): + from executor.executors.legacy_executor import LegacyExecutor + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="agentic_summarize", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert not result.success + assert "does not support" in result.error + + +# --------------------------------------------------------------------------- +# 5. Structure tool routes to agentic executor +# --------------------------------------------------------------------------- + +class TestStructureToolAgenticRouting: + @patch("unstract.sdk1.x2txt.X2Text") + def test_structure_tool_dispatches_agentic_extract(self, mock_x2text_cls, tmp_path): + """Verify _run_agentic_extraction sends executor_name='agentic'.""" + + from file_processing.structure_tool_task import _run_agentic_extraction + + mock_dispatcher = MagicMock() + mock_dispatcher.dispatch.return_value = ExecutionResult( + success=True, data={"output": {"field": "value"}} + ) + + # Mock X2Text extraction + mock_x2text_instance = MagicMock() + mock_x2text_instance.process.return_value = MagicMock( + extracted_text="extracted text" + ) + mock_x2text_cls.return_value = mock_x2text_instance + + _run_agentic_extraction( + tool_metadata={"name": "test"}, + input_file_path=str(tmp_path / "test.pdf"), + output_dir_path=str(tmp_path / "output"), + tool_instance_metadata={}, + dispatcher=mock_dispatcher, + shim=MagicMock(), + file_execution_id="exec-001", + organization_id="org-001", + source_file_name="test.pdf", + fs=MagicMock(), + ) + + # Verify dispatch was called with correct routing + mock_dispatcher.dispatch.assert_called_once() + dispatched_ctx = mock_dispatcher.dispatch.call_args[0][0] + assert dispatched_ctx.executor_name == "agentic" + assert dispatched_ctx.operation == "agentic_extract" + assert dispatched_ctx.organization_id == "org-001" + + +# --------------------------------------------------------------------------- +# 6. tasks.py log_component for agentic operations +# --------------------------------------------------------------------------- + +class TestTasksLogComponent: + @pytest.mark.parametrize("op", AGENTIC_OPERATIONS) + def test_agentic_ops_use_default_log_component(self, op): + """Agentic operations fall through to default log_component.""" + ctx_dict = { + "executor_name": "agentic", + "operation": op, + "run_id": "run-001", + "execution_source": "tool", + "executor_params": { + "tool_id": "tool-1", + "file_name": "doc.pdf", + }, + "request_id": "req-1", + "log_events_id": "evt-1", + } + context = ExecutionContext.from_dict(ctx_dict) + + # Agentic ops should NOT match ide_index, structure_pipeline, + # or table_extract/smart_table_extract branches + assert context.operation not in ( + "ide_index", "structure_pipeline", + "table_extract", "smart_table_extract", + ) diff --git a/workers/tests/test_sanity_phase6i.py b/workers/tests/test_sanity_phase6i.py new file mode 100644 index 0000000000..635dfa7ca3 --- /dev/null +++ b/workers/tests/test_sanity_phase6i.py @@ -0,0 +1,272 @@ +"""Phase 6I Sanity — Backend Summarizer Migration. + +Verifies: +1. Summarize operation exists and routes through LegacyExecutor +2. Summarize executor_params contract matches _handle_summarize expectations +3. Dispatch routes summarize to celery_executor_legacy queue +4. Summarize result has expected shape (data.data = summary text) +5. Full Celery chain for summarize operation +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +# Patches +_PATCH_GET_PROMPT_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" +) + + +def _register_legacy(): + from executor.executors.legacy_executor import LegacyExecutor + ExecutorRegistry.clear() + ExecutorRegistry.register(LegacyExecutor) + + +# --------------------------------------------------------------------------- +# 1. Summarize operation enum +# --------------------------------------------------------------------------- + +class TestSummarizeOperation: + def test_summarize_enum_exists(self): + assert hasattr(Operation, "SUMMARIZE") + assert Operation.SUMMARIZE.value == "summarize" + + def test_summarize_in_legacy_operation_map(self): + from executor.executors.legacy_executor import LegacyExecutor + assert "summarize" in LegacyExecutor._OPERATION_MAP + + +# --------------------------------------------------------------------------- +# 2. Executor params contract +# --------------------------------------------------------------------------- + +class TestSummarizeParamsContract: + def test_summarize_params_match_handler_expectations(self): + """Verify the params the backend summarizer sends match + what _handle_summarize expects.""" + # These are the keys the cloud summarizer.py now sends + backend_params = { + "llm_adapter_instance_id": "llm-uuid", + "summarize_prompt": "Summarize the document...", + "context": "This is the full document text...", + "prompt_keys": ["invoice_number", "total_amount"], + "PLATFORM_SERVICE_API_KEY": "platform-key-123", + } + + # _handle_summarize reads these keys + assert "llm_adapter_instance_id" in backend_params + assert "summarize_prompt" in backend_params + assert "context" in backend_params + assert "prompt_keys" in backend_params + assert "PLATFORM_SERVICE_API_KEY" in backend_params + + +# --------------------------------------------------------------------------- +# 3. Queue routing +# --------------------------------------------------------------------------- + +class TestSummarizeQueueRouting: + def test_summarize_routes_to_legacy_queue(self): + """Summarize dispatches to celery_executor_legacy (LegacyExecutor).""" + queue = ExecutionDispatcher._get_queue("legacy") + assert queue == "celery_executor_legacy" + + def test_dispatch_sends_summarize_to_legacy_queue(self): + mock_app = MagicMock() + mock_result = MagicMock() + mock_result.get.return_value = ExecutionResult( + success=True, data={"data": "Summary text here"} + ).to_dict() + mock_app.send_task.return_value = mock_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="legacy", + operation="summarize", + run_id="run-summarize", + execution_source="ide", + organization_id="org-1", + executor_params={ + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize...", + "context": "Document text", + "prompt_keys": ["field1"], + "PLATFORM_SERVICE_API_KEY": "key-1", + }, + ) + result = dispatcher.dispatch(ctx) + + mock_app.send_task.assert_called_once() + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs.get("queue") == "celery_executor_legacy" + assert result.success + assert result.data["data"] == "Summary text here" + + +# --------------------------------------------------------------------------- +# 4. Result shape +# --------------------------------------------------------------------------- + +class TestSummarizeResultShape: + @patch(_PATCH_GET_PROMPT_DEPS) + def test_summarize_returns_data_key(self, mock_deps): + """_handle_summarize returns ExecutionResult with data.data = str.""" + mock_llm = MagicMock() + mock_llm_instance = MagicMock() + mock_llm.return_value = mock_llm_instance + + mock_deps.return_value = ( + MagicMock(), # RetrievalService + MagicMock(), # PostProcessor + MagicMock(), # VariableReplacement + MagicMock(), # JsonRepair + mock_llm, # LLM + MagicMock(), # Embedding + MagicMock(), # VectorDB + ) + + # Mock AnswerPromptService.run_completion + with patch( + "executor.executors.answer_prompt.AnswerPromptService.run_completion", + return_value="This is the summary.", + ): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="summarize", + run_id="run-result-shape", + execution_source="ide", + organization_id="org-1", + executor_params={ + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize the document.", + "context": "Full document text here.", + "prompt_keys": ["total"], + "PLATFORM_SERVICE_API_KEY": "key-1", + }, + ) + result = executor.execute(ctx) + + assert result.success + assert result.data["data"] == "This is the summary." + + @patch(_PATCH_GET_PROMPT_DEPS) + def test_summarize_missing_context_returns_failure(self, mock_deps): + """Missing context param returns failure without LLM call.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="summarize", + run_id="run-missing-ctx", + execution_source="ide", + executor_params={ + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize.", + "context": "", # empty + "PLATFORM_SERVICE_API_KEY": "key-1", + }, + ) + result = executor.execute(ctx) + + assert not result.success + assert "context" in result.error.lower() + + @patch(_PATCH_GET_PROMPT_DEPS) + def test_summarize_missing_llm_returns_failure(self, mock_deps): + """Missing llm_adapter_instance_id returns failure.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="summarize", + run_id="run-missing-llm", + execution_source="ide", + executor_params={ + "llm_adapter_instance_id": "", # empty + "summarize_prompt": "Summarize.", + "context": "Some text", + "PLATFORM_SERVICE_API_KEY": "key-1", + }, + ) + result = executor.execute(ctx) + + assert not result.success + assert "llm_adapter_instance_id" in result.error.lower() + + +# --------------------------------------------------------------------------- +# 5. Full Celery chain +# --------------------------------------------------------------------------- + +@pytest.fixture +def eager_app(): + """Configure executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + yield app + app.conf.update(original) + + +class TestSummarizeCeleryChain: + @patch(_PATCH_GET_PROMPT_DEPS) + def test_summarize_full_celery_chain(self, mock_deps, eager_app): + """Summarize through full Celery task chain.""" + mock_llm = MagicMock() + mock_llm_instance = MagicMock() + mock_llm.return_value = mock_llm_instance + + mock_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm, MagicMock(), MagicMock(), + ) + + with patch( + "executor.executors.answer_prompt.AnswerPromptService.run_completion", + return_value="Celery chain summary.", + ): + _register_legacy() + + ctx = ExecutionContext( + executor_name="legacy", + operation="summarize", + run_id="run-celery-summarize", + execution_source="ide", + organization_id="org-1", + executor_params={ + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize.", + "context": "Document text for celery chain.", + "prompt_keys": ["amount"], + "PLATFORM_SERVICE_API_KEY": "key-1", + }, + ) + + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success + assert result.data["data"] == "Celery chain summary." diff --git a/workers/tests/test_sanity_phase6j.py b/workers/tests/test_sanity_phase6j.py new file mode 100644 index 0000000000..c4e7c6631e --- /dev/null +++ b/workers/tests/test_sanity_phase6j.py @@ -0,0 +1,663 @@ +"""Phase 6J — Comprehensive Phase 6 sanity tests. + +Consolidated regression + integration tests for the full Phase 6 +plugin migration. Verifies: + +1. Full Operation enum coverage — every operation has exactly one executor +2. Multi-executor coexistence in ExecutorRegistry +3. End-to-end Celery chain for each cloud executor (mock executors) +4. Cross-cutting highlight plugin works across executors +5. Plugin loader → executor registration → dispatch → result flow +6. Queue routing for all executor names +7. Graceful degradation when cloud plugins missing +8. tasks.py log_component for all operation types +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture(autouse=True) +def _clean_registry(): + ExecutorRegistry.clear() + yield + ExecutorRegistry.clear() + + +@pytest.fixture +def eager_app(): + """Configure executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + yield app + app.conf.update(original) + + +def _register_legacy(): + from executor.executors.legacy_executor import LegacyExecutor + ExecutorRegistry.register(LegacyExecutor) + + +# Mock cloud executors for multi-executor tests +def _register_mock_cloud_executors(): + """Register mock cloud executors alongside LegacyExecutor.""" + + @ExecutorRegistry.register + class MockTableExecutor(BaseExecutor): + @property + def name(self) -> str: + return "table" + + def execute(self, context): + if context.operation != "table_extract": + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return ExecutionResult( + success=True, + data={"output": "table_data", "metadata": {}}, + ) + + @ExecutorRegistry.register + class MockSmartTableExecutor(BaseExecutor): + @property + def name(self) -> str: + return "smart_table" + + def execute(self, context): + if context.operation != "smart_table_extract": + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return ExecutionResult( + success=True, + data={"output": "smart_table_data", "metadata": {}}, + ) + + @ExecutorRegistry.register + class MockSPSExecutor(BaseExecutor): + @property + def name(self) -> str: + return "simple_prompt_studio" + + def execute(self, context): + if context.operation not in ("sps_answer_prompt", "sps_index"): + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return ExecutionResult( + success=True, + data={"output": f"sps_{context.operation}", "metadata": {}}, + ) + + @ExecutorRegistry.register + class MockAgenticExecutor(BaseExecutor): + _OPS = { + "agentic_extract", "agentic_summarize", "agentic_uniformize", + "agentic_finalize", "agentic_generate_prompt", + "agentic_generate_prompt_pipeline", "agentic_compare", + "agentic_tune_field", + } + + @property + def name(self) -> str: + return "agentic" + + def execute(self, context): + if context.operation not in self._OPS: + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return ExecutionResult( + success=True, + data={"output": f"agentic_{context.operation}", "metadata": {}}, + ) + + +# --------------------------------------------------------------------------- +# 1. Full Operation enum coverage — every operation has exactly one executor +# --------------------------------------------------------------------------- + +# Map of every Operation value to the executor that handles it +OPERATION_TO_EXECUTOR = { + # LegacyExecutor (OSS) + "extract": "legacy", + "index": "legacy", + "answer_prompt": "legacy", + "single_pass_extraction": "legacy", + "summarize": "legacy", + "ide_index": "legacy", + "structure_pipeline": "legacy", + # Cloud executors + "table_extract": "table", + "smart_table_extract": "smart_table", + "sps_answer_prompt": "simple_prompt_studio", + "sps_index": "simple_prompt_studio", + "agentic_extract": "agentic", + "agentic_summarize": "agentic", + "agentic_uniformize": "agentic", + "agentic_finalize": "agentic", + "agentic_generate_prompt": "agentic", + "agentic_generate_prompt_pipeline": "agentic", + "agentic_compare": "agentic", + "agentic_tune_field": "agentic", +} + + +class TestOperationEnumCoverage: + def test_every_operation_is_mapped(self): + """Every Operation enum value has an assigned executor.""" + for op in Operation: + assert op.value in OPERATION_TO_EXECUTOR, ( + f"Operation {op.value} not mapped to any executor" + ) + + def test_no_extra_mappings(self): + """No stale mappings for removed operations.""" + valid_ops = {op.value for op in Operation} + for mapped_op in OPERATION_TO_EXECUTOR: + assert mapped_op in valid_ops, ( + f"Mapped operation '{mapped_op}' not in Operation enum" + ) + + def test_operation_count(self): + """Verify total operation count matches expectations.""" + assert len(Operation) == 19 # 7 legacy + 2 table + 2 sps + 8 agentic + + def test_legacy_operations_in_operation_map(self): + """All legacy operations are in LegacyExecutor._OPERATION_MAP.""" + from executor.executors.legacy_executor import LegacyExecutor + + for op_value, executor_name in OPERATION_TO_EXECUTOR.items(): + if executor_name == "legacy": + assert op_value in LegacyExecutor._OPERATION_MAP, ( + f"Legacy operation {op_value} missing from _OPERATION_MAP" + ) + + def test_cloud_operations_not_in_legacy_map(self): + """Cloud operations are NOT in LegacyExecutor._OPERATION_MAP.""" + from executor.executors.legacy_executor import LegacyExecutor + + for op_value, executor_name in OPERATION_TO_EXECUTOR.items(): + if executor_name != "legacy": + assert op_value not in LegacyExecutor._OPERATION_MAP, ( + f"Cloud operation {op_value} should NOT be in legacy map" + ) + + +# --------------------------------------------------------------------------- +# 2. Multi-executor coexistence in registry +# --------------------------------------------------------------------------- + +class TestMultiExecutorCoexistence: + def test_all_five_executors_registered(self): + """Legacy + 4 cloud executors all coexist in registry.""" + _register_legacy() + _register_mock_cloud_executors() + + executors = ExecutorRegistry.list_executors() + assert "legacy" in executors + assert "table" in executors + assert "smart_table" in executors + assert "simple_prompt_studio" in executors + assert "agentic" in executors + assert len(executors) == 5 + + def test_each_executor_has_correct_name(self): + _register_legacy() + _register_mock_cloud_executors() + + for name in ["legacy", "table", "smart_table", "simple_prompt_studio", "agentic"]: + executor = ExecutorRegistry.get(name) + assert executor.name == name + + def test_wrong_executor_rejects_operation(self): + """Dispatching a table operation to legacy returns failure.""" + _register_legacy() + _register_mock_cloud_executors() + + legacy = ExecutorRegistry.get("legacy") + ctx = ExecutionContext( + executor_name="legacy", + operation="table_extract", + run_id="run-1", + execution_source="tool", + ) + result = legacy.execute(ctx) + assert not result.success + assert "does not support" in result.error + + def test_correct_executor_handles_operation(self): + """Each operation routes to the right executor.""" + _register_legacy() + _register_mock_cloud_executors() + + test_cases = [ + ("table", "table_extract"), + ("smart_table", "smart_table_extract"), + ("simple_prompt_studio", "sps_answer_prompt"), + ("simple_prompt_studio", "sps_index"), + ("agentic", "agentic_extract"), + ("agentic", "agentic_compare"), + ] + for executor_name, operation in test_cases: + executor = ExecutorRegistry.get(executor_name) + ctx = ExecutionContext( + executor_name=executor_name, + operation=operation, + run_id=f"run-{operation}", + execution_source="tool", + ) + result = executor.execute(ctx) + assert result.success, f"{executor_name}/{operation} failed" + + +# --------------------------------------------------------------------------- +# 3. End-to-end Celery chain for cloud executors +# --------------------------------------------------------------------------- + +class TestCeleryChainCloudExecutors: + def test_table_extract_celery_chain(self, eager_app): + """TABLE extraction through full Celery task chain.""" + _register_legacy() + _register_mock_cloud_executors() + + ctx = ExecutionContext( + executor_name="table", + operation="table_extract", + run_id="run-celery-table", + execution_source="tool", + ) + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success + assert result.data["output"] == "table_data" + + def test_smart_table_extract_celery_chain(self, eager_app): + """SMART TABLE extraction through full Celery task chain.""" + _register_legacy() + _register_mock_cloud_executors() + + ctx = ExecutionContext( + executor_name="smart_table", + operation="smart_table_extract", + run_id="run-celery-smart-table", + execution_source="tool", + ) + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success + assert result.data["output"] == "smart_table_data" + + def test_sps_answer_prompt_celery_chain(self, eager_app): + """SPS answer_prompt through full Celery task chain.""" + _register_legacy() + _register_mock_cloud_executors() + + ctx = ExecutionContext( + executor_name="simple_prompt_studio", + operation="sps_answer_prompt", + run_id="run-celery-sps", + execution_source="tool", + ) + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success + + def test_agentic_extract_celery_chain(self, eager_app): + """Agentic extraction through full Celery task chain.""" + _register_legacy() + _register_mock_cloud_executors() + + ctx = ExecutionContext( + executor_name="agentic", + operation="agentic_extract", + run_id="run-celery-agentic", + execution_source="tool", + ) + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success + + def test_unregistered_executor_returns_failure(self, eager_app): + """Dispatching to unregistered executor returns failure.""" + _register_legacy() + # Don't register cloud executors + + ctx = ExecutionContext( + executor_name="table", + operation="table_extract", + run_id="run-missing", + execution_source="tool", + ) + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert not result.success + assert "table" in result.error.lower() + + +# --------------------------------------------------------------------------- +# 4. Cross-cutting highlight plugin across executors +# --------------------------------------------------------------------------- + +class TestCrossCuttingHighlight: + @patch("importlib.metadata.entry_points", return_value=[]) + def test_highlight_plugin_not_installed_no_error(self, _mock_eps): + """When highlight plugin not installed, extraction still works.""" + from executor.executors.plugins.loader import ExecutorPluginLoader + + ExecutorPluginLoader.clear() + assert ExecutorPluginLoader.get("highlight-data") is None + # No error — graceful degradation + + def test_mock_highlight_plugin_shared_across_executors(self, tmp_path): + """Multiple executors can use the same highlight plugin instance.""" + from executor.executors.plugins.loader import ExecutorPluginLoader + + class FakeHighlight: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def run(self, response, **kwargs): + return {"highlighted": True} + + def get_highlight_data(self): + return {"lines": [1, 2, 3]} + + def get_confidence_data(self): + return {"confidence": 0.95} + + fake_ep = MagicMock() + fake_ep.name = "highlight-data" + fake_ep.load.return_value = FakeHighlight + + with patch( + "importlib.metadata.entry_points", + return_value=[fake_ep], + ): + ExecutorPluginLoader.clear() + cls = ExecutorPluginLoader.get("highlight-data") + assert cls is FakeHighlight + + # Both legacy and agentic contexts can create instances + legacy_hl = cls(file_path=str(tmp_path / "doc.txt"), execution_source="ide") + agentic_hl = cls(file_path=str(tmp_path / "other.txt"), execution_source="tool") + + assert legacy_hl.get_highlight_data() == {"lines": [1, 2, 3]} + assert agentic_hl.get_confidence_data() == {"confidence": 0.95} + + +# --------------------------------------------------------------------------- +# 5. Plugin loader → registration → dispatch → result flow +# --------------------------------------------------------------------------- + +class TestPluginDiscoveryToDispatchFlow: + def test_full_discovery_to_dispatch_flow(self): + """Simulate: entry point discovery → register → dispatch → result.""" + # Step 1: "Discover" a cloud executor via entry point + @ExecutorRegistry.register + class DiscoveredExecutor(BaseExecutor): + @property + def name(self): + return "discovered" + + def execute(self, context): + return ExecutionResult( + success=True, + data={"output": "discovered_result"}, + ) + + # Step 2: Verify registration + assert "discovered" in ExecutorRegistry.list_executors() + + # Step 3: Dispatch via mock Celery + mock_app = MagicMock() + mock_result = MagicMock() + mock_result.get.return_value = ExecutionResult( + success=True, data={"output": "discovered_result"} + ).to_dict() + mock_app.send_task.return_value = mock_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="discovered", + operation="custom_op", + run_id="run-flow", + execution_source="tool", + ) + result = dispatcher.dispatch(ctx) + + # Step 4: Verify result + assert result.success + assert result.data["output"] == "discovered_result" + + # Step 5: Verify queue routing + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs["queue"] == "celery_executor_discovered" + + +# --------------------------------------------------------------------------- +# 6. Queue routing for all executor names +# --------------------------------------------------------------------------- + +EXECUTOR_QUEUE_MAP = { + "legacy": "celery_executor_legacy", + "table": "celery_executor_table", + "smart_table": "celery_executor_smart_table", + "simple_prompt_studio": "celery_executor_simple_prompt_studio", + "agentic": "celery_executor_agentic", +} + + +class TestQueueRoutingAllExecutors: + @pytest.mark.parametrize( + "executor_name,expected_queue", + list(EXECUTOR_QUEUE_MAP.items()), + ) + def test_queue_name_for_executor(self, executor_name, expected_queue): + assert ExecutionDispatcher._get_queue(executor_name) == expected_queue + + +# --------------------------------------------------------------------------- +# 7. Graceful degradation when cloud plugins missing +# --------------------------------------------------------------------------- + +class TestGracefulDegradation: + def test_legacy_works_without_cloud_executors(self, eager_app): + """Legacy operations work even when no cloud executors installed.""" + _register_legacy() + + # Only legacy should be in registry + assert ExecutorRegistry.list_executors() == ["legacy"] + + # Legacy executor can be retrieved from the registry + executor = ExecutorRegistry.get("legacy") + assert executor is not None + assert executor.name == "legacy" + + def test_cloud_op_on_legacy_returns_meaningful_error(self): + """Attempting a cloud operation on legacy gives clear error.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + for cloud_op in ["table_extract", "smart_table_extract", + "sps_answer_prompt", "agentic_extract"]: + ctx = ExecutionContext( + executor_name="legacy", + operation=cloud_op, + run_id=f"run-{cloud_op}", + execution_source="tool", + ) + result = executor.execute(ctx) + assert not result.success + assert "does not support" in result.error + + def test_missing_executor_via_orchestrator(self): + """Orchestrator returns failure for unregistered executor.""" + _register_legacy() + orchestrator = ExecutionOrchestrator() + + ctx = ExecutionContext( + executor_name="table", + operation="table_extract", + run_id="run-no-table", + execution_source="tool", + ) + result = orchestrator.execute(ctx) + assert not result.success + assert "table" in result.error.lower() + + +# --------------------------------------------------------------------------- +# 8. tasks.py log_component for all operation types +# --------------------------------------------------------------------------- + +class TestLogComponentAllOperations: + """Verify tasks.py log_component builder handles all operation types.""" + + def _build_log_component(self, operation, executor_params=None): + """Simulate the tasks.py log_component logic.""" + params = executor_params or { + "tool_id": "t-1", + "file_name": "doc.pdf", + } + ctx = ExecutionContext.from_dict({ + "executor_name": "legacy", + "operation": operation, + "run_id": "run-log", + "execution_source": "tool", + "executor_params": params, + "request_id": "req-1", + "log_events_id": "evt-1", + }) + + # Replicate tasks.py logic + if ctx.operation == "ide_index": + extract_params = params.get("extract_params", {}) + return { + "tool_id": extract_params.get("tool_id", ""), + "run_id": ctx.run_id, + "doc_name": str(extract_params.get("file_name", "")), + "operation": ctx.operation, + } + elif ctx.operation == "structure_pipeline": + answer_params = params.get("answer_params", {}) + pipeline_opts = params.get("pipeline_options", {}) + return { + "tool_id": answer_params.get("tool_id", ""), + "run_id": ctx.run_id, + "doc_name": str(pipeline_opts.get("source_file_name", "")), + "operation": ctx.operation, + } + else: + return { + "tool_id": params.get("tool_id", ""), + "run_id": ctx.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": ctx.operation, + } + + def test_ide_index_extracts_nested_params(self): + comp = self._build_log_component("ide_index", { + "extract_params": {"tool_id": "t-nested", "file_name": "nested.pdf"}, + }) + assert comp["tool_id"] == "t-nested" + assert comp["doc_name"] == "nested.pdf" + + def test_structure_pipeline_extracts_nested_params(self): + comp = self._build_log_component("structure_pipeline", { + "answer_params": {"tool_id": "t-pipe"}, + "pipeline_options": {"source_file_name": "pipe.pdf"}, + }) + assert comp["tool_id"] == "t-pipe" + assert comp["doc_name"] == "pipe.pdf" + + def test_table_extract_uses_direct_params(self): + comp = self._build_log_component("table_extract") + assert comp["tool_id"] == "t-1" + assert comp["operation"] == "table_extract" + + def test_smart_table_extract_uses_direct_params(self): + comp = self._build_log_component("smart_table_extract") + assert comp["operation"] == "smart_table_extract" + + @pytest.mark.parametrize("op", [ + "extract", "index", "answer_prompt", "single_pass_extraction", + "summarize", "sps_answer_prompt", "sps_index", + "agentic_extract", "agentic_summarize", "agentic_compare", + ]) + def test_default_branch_for_standard_ops(self, op): + comp = self._build_log_component(op) + assert comp["tool_id"] == "t-1" + assert comp["doc_name"] == "doc.pdf" + assert comp["operation"] == op + + +# --------------------------------------------------------------------------- +# 9. ExecutionResult serialization round-trip +# --------------------------------------------------------------------------- + +class TestResultRoundTrip: + def test_success_result_round_trip(self): + original = ExecutionResult( + success=True, + data={"output": {"field": "value"}, "metadata": {"tokens": 100}}, + ) + restored = ExecutionResult.from_dict(original.to_dict()) + assert restored.success == original.success + assert restored.data == original.data + + def test_failure_result_round_trip(self): + original = ExecutionResult.failure(error="Something went wrong") + restored = ExecutionResult.from_dict(original.to_dict()) + assert not restored.success + assert restored.error == "Something went wrong" + + def test_context_round_trip(self): + original = ExecutionContext( + executor_name="agentic", + operation="agentic_extract", + run_id="run-rt", + execution_source="tool", + organization_id="org-1", + executor_params={"key": "value"}, + log_events_id="evt-1", + ) + restored = ExecutionContext.from_dict(original.to_dict()) + assert restored.executor_name == "agentic" + assert restored.operation == "agentic_extract" + assert restored.organization_id == "org-1" + assert restored.executor_params == {"key": "value"} + assert restored.log_events_id == "evt-1" diff --git a/workers/tests/test_usage.py b/workers/tests/test_usage.py new file mode 100644 index 0000000000..fc08ac825b --- /dev/null +++ b/workers/tests/test_usage.py @@ -0,0 +1,312 @@ +"""Phase 2G — Usage tracking tests. + +Verifies: +1. UsageHelper.push_usage_data wraps Audit correctly +2. Invalid kwargs returns False +3. Invalid platform_api_key returns False +4. Audit exceptions are caught and return False +5. format_float_positional formats correctly +6. SDK1 adapters already push usage (integration check) +7. answer_prompt handler returns metrics in ExecutionResult +""" + +from unittest.mock import MagicMock, patch + + +from executor.executors.usage import UsageHelper + + +# --------------------------------------------------------------------------- +# 1. push_usage_data success +# --------------------------------------------------------------------------- + + +class TestPushUsageData: + @patch("unstract.sdk1.audit.Audit") + def test_push_success(self, mock_audit_cls): + """Successful push returns True and calls Audit.""" + mock_audit = MagicMock() + mock_audit_cls.return_value = mock_audit + + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs={"run_id": "run-001", "execution_id": "exec-001"}, + platform_api_key="test-key", + token_counter=MagicMock(), + model_name="gpt-4", + ) + + assert result is True + mock_audit.push_usage_data.assert_called_once() + call_kwargs = mock_audit.push_usage_data.call_args + assert call_kwargs.kwargs["platform_api_key"] == "test-key" + assert call_kwargs.kwargs["model_name"] == "gpt-4" + assert call_kwargs.kwargs["event_type"] == "llm" + + @patch("unstract.sdk1.audit.Audit") + def test_push_passes_token_counter(self, mock_audit_cls): + """Token counter is passed through to Audit.""" + mock_audit = MagicMock() + mock_audit_cls.return_value = mock_audit + mock_counter = MagicMock() + + UsageHelper.push_usage_data( + event_type="embedding", + kwargs={"run_id": "run-002"}, + platform_api_key="key-2", + token_counter=mock_counter, + ) + + call_kwargs = mock_audit.push_usage_data.call_args + assert call_kwargs.kwargs["token_counter"] is mock_counter + + +# --------------------------------------------------------------------------- +# 2. Invalid kwargs +# --------------------------------------------------------------------------- + + +class TestPushValidation: + def test_none_kwargs_returns_false(self): + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs=None, + platform_api_key="key", + ) + assert result is False + + def test_empty_kwargs_returns_false(self): + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs={}, + platform_api_key="key", + ) + assert result is False + + def test_non_dict_kwargs_returns_false(self): + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs="not a dict", + platform_api_key="key", + ) + assert result is False + + +# --------------------------------------------------------------------------- +# 3. Invalid platform_api_key +# --------------------------------------------------------------------------- + + +class TestPushApiKeyValidation: + def test_none_key_returns_false(self): + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs={"run_id": "r1"}, + platform_api_key=None, + ) + assert result is False + + def test_empty_key_returns_false(self): + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs={"run_id": "r1"}, + platform_api_key="", + ) + assert result is False + + def test_non_string_key_returns_false(self): + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs={"run_id": "r1"}, + platform_api_key=12345, + ) + assert result is False + + +# --------------------------------------------------------------------------- +# 4. Audit exceptions are caught +# --------------------------------------------------------------------------- + + +class TestPushErrorHandling: + @patch("unstract.sdk1.audit.Audit") + def test_audit_exception_returns_false(self, mock_audit_cls): + """Audit errors are caught and return False.""" + mock_audit = MagicMock() + mock_audit.push_usage_data.side_effect = Exception("Network error") + mock_audit_cls.return_value = mock_audit + + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs={"run_id": "r1"}, + platform_api_key="key", + token_counter=MagicMock(), + ) + + assert result is False + + @patch("unstract.sdk1.audit.Audit") + def test_import_error_returns_false(self, mock_audit_cls): + """Import errors are caught gracefully.""" + mock_audit_cls.side_effect = ImportError("no module") + + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs={"run_id": "r1"}, + platform_api_key="key", + ) + + assert result is False + + +# --------------------------------------------------------------------------- +# 5. format_float_positional +# --------------------------------------------------------------------------- + + +class TestFormatFloat: + def test_normal_float(self): + assert UsageHelper.format_float_positional(0.0001234) == "0.0001234" + + def test_trailing_zeros_removed(self): + assert UsageHelper.format_float_positional(1.50) == "1.5" + + def test_integer_value(self): + assert UsageHelper.format_float_positional(42.0) == "42" + + def test_zero(self): + assert UsageHelper.format_float_positional(0.0) == "0" + + def test_small_value(self): + result = UsageHelper.format_float_positional(0.00000001) + assert "0.00000001" == result + + def test_custom_precision(self): + result = UsageHelper.format_float_positional(1.123456789, precision=3) + assert result == "1.123" + + +# --------------------------------------------------------------------------- +# 6. SDK1 adapters already push usage +# --------------------------------------------------------------------------- + + +class TestAdapterUsageTracking: + def test_llm_calls_audit_push(self): + """Verify the LLM adapter imports and calls Audit.push_usage_data. + + This is a static analysis check — we verify the SDK1 LLM module + references Audit.push_usage_data, confirming adapters handle + usage tracking internally. + """ + import inspect + + from unstract.sdk1.llm import LLM + + source = inspect.getsource(LLM) + assert "push_usage_data" in source + assert "Audit" in source + + +# --------------------------------------------------------------------------- +# 7. answer_prompt handler returns metrics +# --------------------------------------------------------------------------- + + +class TestMetricsInResult: + @patch( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-test", + ) + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_answer_prompt_returns_metrics( + self, mock_shim_cls, mock_get_deps, _mock_idx, tmp_path + ): + """answer_prompt result includes metrics dict.""" + from unstract.sdk1.execution.context import ExecutionContext + from unstract.sdk1.execution.registry import ExecutorRegistry + + ExecutorRegistry.clear() + from executor.executors.legacy_executor import LegacyExecutor + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + + executor = ExecutorRegistry.get("legacy") + + # Mock all dependencies + mock_llm = MagicMock() + mock_llm.get_metrics.return_value = {"total_tokens": 100} + mock_llm.get_usage_reason.return_value = "extraction" + mock_llm.complete.return_value = { + "response": MagicMock(text="test answer"), + "highlight_data": [], + "confidence_data": None, + "word_confidence_data": None, + "line_numbers": [], + "whisper_hash": "", + } + + mock_llm_cls = MagicMock(return_value=mock_llm) + mock_index = MagicMock() + mock_index.return_value.generate_index_key.return_value = "doc-123" + + mock_get_deps.return_value = ( + MagicMock(), # AnswerPromptService — use real for construct + MagicMock(), # RetrievalService + MagicMock(), # VariableReplacementService + mock_index, # Index + mock_llm_cls, # LLM + MagicMock(), # EmbeddingCompat + MagicMock(), # VectorDB + ) + + # Patch AnswerPromptService methods at their real location + with patch( + "executor.executors.answer_prompt.AnswerPromptService.extract_variable", + return_value="test prompt", + ), patch( + "executor.executors.answer_prompt.AnswerPromptService.construct_and_run_prompt", + return_value="test answer", + ): + ctx = ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id="run-metrics-001", + execution_source="tool", + organization_id="org-test", + request_id="req-metrics-001", + executor_params={ + "tool_settings": {}, + "outputs": [ + { + "name": "field1", + "prompt": "What is X?", + "chunk-size": 512, + "chunk-overlap": 64, + "vector-db": "vdb-1", + "embedding": "emb-1", + "x2text_adapter": "x2t-1", + "llm": "llm-1", + "type": "text", + "retrieval-strategy": "simple", + "similarity-top-k": 5, + }, + ], + "tool_id": "tool-1", + "file_hash": "hash123", + "file_path": str(tmp_path / "test.txt"), + "file_name": "test.txt", + "PLATFORM_SERVICE_API_KEY": "test-key", + }, + ) + result = executor.execute(ctx) + + assert result.success is True + assert "metrics" in result.data + assert "field1" in result.data["metrics"] + + ExecutorRegistry.clear() diff --git a/workers/uv.lock b/workers/uv.lock index 02ea3d3ffd..4d540d78bc 100644 --- a/workers/uv.lock +++ b/workers/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.12" resolution-markers = [ "python_full_version >= '3.14'", @@ -3090,7 +3090,7 @@ dependencies = [ { name = "pinecone-plugin-interface" }, { name = "python-dateutil" }, { name = "typing-extensions" }, - { name = "urllib3", marker = "python_full_version < '4.0'" }, + { name = "urllib3", marker = "python_full_version < '4'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/bc/9d/07a7f2136ce04cabd21d69c057dc2915867082b0047e6873e424388d4475/pinecone-7.0.1.tar.gz", hash = "sha256:49ff7b0f5be4a2ddec5aaa709758a9f2df56baa58ad46507d081409e246a81ec", size = 207930, upload-time = "2025-05-21T19:39:01.218Z" } wheels = [