@@ -333,13 +333,16 @@ def _process_file_sync(
333333 rel_path : str ,
334334 cfg : Optional [Dict [str , Any ]],
335335 incremental : bool = True ,
336- processed_count : Optional [list ] = None ,
336+ processed_count : Optional [list ] = None , # [counter: int, lock: threading.Lock]
337337 total_files : int = 0 ,
338338):
339339 """
340340 Synchronous implementation of per-file processing.
341341 Intended to run on a ThreadPoolExecutor worker thread.
342342 Returns a dict: {"stored": bool, "embedded": bool, "skipped": bool}
343+
344+ Args:
345+ processed_count: Thread-safe counter as [count, lock] for progress tracking
343346 """
344347 try :
345348 # read file content
@@ -368,8 +371,10 @@ def _process_file_sync(
368371
369372 # Log file processing with progress
370373 if processed_count is not None and total_files > 0 :
371- current = len (processed_count )
372- logger .info (f"Processing file ({ current } /{ total_files } ): { rel_path } " )
374+ # processed_count is [counter, lock] tuple for thread-safety
375+ with processed_count [1 ]:
376+ current = processed_count [0 ]
377+ logger .info (f"Processing file ({ current } /{ total_files } ): { rel_path } " )
373378 else :
374379 logger .info (f"Processing file: { rel_path } " )
375380
@@ -533,9 +538,8 @@ def analyze_local_path_sync(
533538 total_files = len (file_paths )
534539 logger .info (f"Found { total_files } files to process" )
535540
536- # Thread-safe counter for progress tracking
537- processed_count = []
538- processed_lock = threading .Lock ()
541+ # Thread-safe counter for progress tracking: [count, lock]
542+ processed_count = [0 , threading .Lock ()]
539543
540544 # Process files in chunks to avoid too many futures at once.
541545 CHUNK_SUBMIT = 256
@@ -559,8 +563,10 @@ def analyze_local_path_sync(
559563 for fut in concurrent .futures .as_completed (futures ):
560564 try :
561565 r = fut .result ()
562- with processed_lock :
563- processed_count .append (1 )
566+ with processed_count [1 ]:
567+ processed_count [0 ] += 1
568+ current_processed = processed_count [0 ]
569+
564570 if isinstance (r , dict ):
565571 if r .get ("stored" ):
566572 file_count += 1
@@ -570,7 +576,6 @@ def analyze_local_path_sync(
570576 skipped_count += 1
571577
572578 # Log periodic progress updates (every 10 files)
573- current_processed = len (processed_count )
574579 if current_processed % 10 == 0 :
575580 logger .info (f"Progress: { current_processed } /{ total_files } files processed ({ file_count } stored, { emb_count } with embeddings, { skipped_count } skipped)" )
576581 except Exception :
0 commit comments