Skip to content

Commit cf63aba

Browse files
CopilotMte90
andcommitted
Fix thread pool deadlock by using separate executors for file and embedding tasks
Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com>
1 parent 1dccff7 commit cf63aba

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

ai/analyzer.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,16 @@
5656
# With SDK timeout of 15s and max_retries=2, this allows 3 × 15s = 45s + 15s buffer = 60s
5757
EMBEDDING_TIMEOUT = 60 # Timeout in seconds for each embedding API call (including retries)
5858
FILE_PROCESSING_TIMEOUT = 300 # Timeout in seconds for processing a single file (5 minutes)
59-
_THREADPOOL_WORKERS = max(16, EMBEDDING_CONCURRENCY + 8)
60-
_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=_THREADPOOL_WORKERS)
59+
60+
# Separate executors to avoid deadlock:
61+
# - File executor: runs _process_file_sync tasks (one per file)
62+
# - Embedding executor: runs _get_embedding_with_semaphore tasks (multiple per file)
63+
# Without separation, all file threads can block waiting for embedding results that can't run
64+
# because all threads are occupied by file tasks.
65+
_FILE_EXECUTOR_WORKERS = 16
66+
_EMBEDDING_EXECUTOR_WORKERS = max(16, EMBEDDING_CONCURRENCY + 8)
67+
_FILE_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=_FILE_EXECUTOR_WORKERS)
68+
_EMBEDDING_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=_EMBEDDING_EXECUTOR_WORKERS)
6169

6270
logger = get_logger(__name__)
6371

@@ -219,7 +227,7 @@ def _process_file_sync(
219227
for idx, chunk_doc in batch:
220228
# Submit task to executor; semaphore will be acquired inside the worker
221229
embedding_start_time = time.time()
222-
future = _EXECUTOR.submit(_get_embedding_with_semaphore, semaphore, chunk_doc.text, rel_path, idx, embedding_model)
230+
future = _EMBEDDING_EXECUTOR.submit(_get_embedding_with_semaphore, semaphore, chunk_doc.text, rel_path, idx, embedding_model)
223231
embedding_futures.append((idx, chunk_doc, future, embedding_start_time))
224232

225233
# Wait for batch to complete and store results
@@ -399,7 +407,7 @@ def analyze_local_path_sync(
399407
counters[0] += 1
400408
file_num = counters[0]
401409

402-
fut = _EXECUTOR.submit(
410+
fut = _FILE_EXECUTOR.submit(
403411
_process_file_sync,
404412
semaphore,
405413
database_path,

0 commit comments

Comments
 (0)