@@ -173,43 +173,27 @@ def init_db(database_path: str) -> None:
173173 """
174174 Initialize database schema. Safe to call multiple times.
175175 Creates:
176- - analyses (embedding_count column kept for backward compat but not used as source of truth)
177- - files
176+ - files (stores full content of indexed files)
178177 - chunks (with embedding BLOB column for sqlite-vector)
179178 """
180179 conn = _get_connection (database_path )
181180 try :
182181 cur = conn .cursor ()
183- # analyses table: embedding_count column kept for compatibility but will be computed live
184- cur .execute (
185- """
186- CREATE TABLE IF NOT EXISTS analyses (
187- id INTEGER PRIMARY KEY AUTOINCREMENT,
188- name TEXT NOT NULL,
189- path TEXT NOT NULL,
190- status TEXT NOT NULL,
191- embedding_count INTEGER DEFAULT 0,
192- created_at TEXT DEFAULT (datetime('now'))
193- )
194- """
195- )
196-
182+
197183 # files table (stores full content, used to reconstruct chunks)
198184 cur .execute (
199185 """
200186 CREATE TABLE IF NOT EXISTS files (
201187 id INTEGER PRIMARY KEY AUTOINCREMENT,
202- analysis_id INTEGER NOT NULL,
203188 path TEXT NOT NULL,
204189 content TEXT,
205190 language TEXT,
206191 snippet TEXT,
207- created_at TEXT DEFAULT (datetime('now')),
208- FOREIGN KEY (analysis_id) REFERENCES analyses(id) ON DELETE CASCADE
192+ created_at TEXT DEFAULT (datetime('now'))
209193 )
210194 """
211195 )
212- cur .execute ("CREATE INDEX IF NOT EXISTS idx_files_analysis ON files(analysis_id );" )
196+ cur .execute ("CREATE INDEX IF NOT EXISTS idx_files_path ON files(path );" )
213197
214198 # chunks table: metadata for chunked documents; includes embedding BLOB column
215199 cur .execute (
@@ -231,39 +215,15 @@ def init_db(database_path: str) -> None:
231215 conn .close ()
232216
233217
234- def create_analysis (database_path : str , name : str , path : str , status : str = "pending" ) -> int :
235- conn = _get_connection (database_path )
236- try :
237- cur = conn .cursor ()
238- cur .execute (
239- "INSERT INTO analyses (name, path, status) VALUES (?, ?, ?)" ,
240- (name , path , status ),
241- )
242- conn .commit ()
243- return int (cur .lastrowid )
244- finally :
245- conn .close ()
246-
247-
248- def update_analysis_status (database_path : str , analysis_id : int , status : str ) -> None :
249- conn = _get_connection (database_path )
250- try :
251- cur = conn .cursor ()
252- cur .execute ("UPDATE analyses SET status = ? WHERE id = ?" , (status , analysis_id ))
253- conn .commit ()
254- finally :
255- conn .close ()
256-
257-
258- def store_file (database_path , analysis_id , path , content , language ):
218+ def store_file (database_path , path , content , language ):
259219 """
260220 Insert a file record into the DB using a queued single-writer to avoid
261221 sqlite 'database is locked' errors in multithreaded scenarios.
262222 Returns lastrowid (same as the previous store_file implementation).
263223 """
264224 snippet = (content [:512 ] if content else "" )
265- sql = "INSERT INTO files (analysis_id, path, content, language, snippet) VALUES (?, ?, ?, ?, ?)"
266- params = (analysis_id , path , content , language , snippet )
225+ sql = "INSERT INTO files (path, content, language, snippet) VALUES (?, ?, ?, ?)"
226+ params = (path , content , language , snippet )
267227
268228 writer = _get_writer (database_path )
269229 # We wait for the background writer to complete the insert and then return the row id.
@@ -289,75 +249,66 @@ def insert_chunk_row_with_null_embedding(database_path: str, file_id: int, path:
289249 conn .close ()
290250
291251
292- def list_analyses (database_path : str ) -> List [ Dict [str , Any ] ]:
252+ def get_project_stats (database_path : str ) -> Dict [str , Any ]:
293253 """
294- Return analyses with computed file_count and computed embedding_count (from chunks.embedding) .
295- This ensures the UI shows accurate, up-to-date counts based on actual rows .
254+ Get statistics for a project database .
255+ Returns file_count and embedding_count .
296256 """
297257 conn = _get_connection (database_path )
298258 try :
299259 cur = conn .cursor ()
300- rows = cur .execute (
301- """
302- SELECT
303- a.id,
304- a.name,
305- a.path,
306- a.status,
307- (SELECT COUNT(*) FROM files f WHERE f.analysis_id = a.id) AS file_count,
308- (SELECT COUNT(*) FROM chunks ch JOIN files f2 ON ch.file_id = f2.id
309- WHERE f2.analysis_id = a.id AND ch.embedding IS NOT NULL) AS embedding_count,
310- a.created_at
311- FROM analyses a
312- ORDER BY a.id DESC
313- """
314- ).fetchall ()
315- results : List [Dict [str , Any ]] = []
316- for r in rows :
317- results .append (
318- {
319- "id" : r ["id" ],
320- "name" : r ["name" ],
321- "path" : r ["path" ],
322- "status" : r ["status" ],
323- "file_count" : int (r ["file_count" ]),
324- "embedding_count" : int (r ["embedding_count" ]),
325- "created_at" : r ["created_at" ],
326- }
327- )
328- return results
260+
261+ # Count files
262+ cur .execute ("SELECT COUNT(*) FROM files" )
263+ file_count = cur .fetchone ()[0 ]
264+
265+ # Count embeddings
266+ cur .execute ("SELECT COUNT(*) FROM chunks WHERE embedding IS NOT NULL" )
267+ embedding_count = cur .fetchone ()[0 ]
268+
269+ return {
270+ "file_count" : int (file_count ),
271+ "embedding_count" : int (embedding_count )
272+ }
329273 finally :
330274 conn .close ()
331275
332276
333- def list_files_for_analysis (database_path : str , analysis_id : int ) -> List [Dict [str , Any ]]:
277+ def list_files (database_path : str ) -> List [Dict [str , Any ]]:
278+ """
279+ List all files in a project database.
280+ """
334281 conn = _get_connection (database_path )
335282 try :
336283 rows = conn .execute (
337- "SELECT id, path, snippet FROM files WHERE analysis_id = ? ORDER BY id DESC" , ( analysis_id ,)
284+ "SELECT id, path, snippet, language, created_at FROM files ORDER BY id DESC"
338285 ).fetchall ()
339- return [{"id" : r ["id" ], "path" : r ["path" ], "snippet" : r ["snippet" ]} for r in rows ]
286+ return [
287+ {
288+ "id" : r ["id" ],
289+ "path" : r ["path" ],
290+ "snippet" : r ["snippet" ],
291+ "language" : r ["language" ],
292+ "created_at" : r ["created_at" ]
293+ }
294+ for r in rows
295+ ]
340296 finally :
341297 conn .close ()
342298
343299
344- def delete_analysis (database_path : str , analysis_id : int ) -> None :
300+ def clear_project_data (database_path : str ) -> None :
345301 """
346- Delete an analysis and cascade-delete associated files / chunks .
347- Foreign key enforcement varies by SQLite build; do explicit deletes for safety .
302+ Clear all files and chunks from a project database .
303+ Used when re-indexing a project .
348304 """
349305 conn = _get_connection (database_path )
350306 try :
351307 cur = conn .cursor ()
352- # delete chunks for files in analysis
353- cur .execute (
354- "DELETE FROM chunks WHERE file_id IN (SELECT id FROM files WHERE analysis_id = ?)" ,
355- (analysis_id ,),
356- )
357- # delete files
358- cur .execute ("DELETE FROM files WHERE analysis_id = ?" , (analysis_id ,))
359- # delete analysis row
360- cur .execute ("DELETE FROM analyses WHERE id = ?" , (analysis_id ,))
308+ # Delete chunks first due to foreign key
309+ cur .execute ("DELETE FROM chunks" )
310+ # Delete files
311+ cur .execute ("DELETE FROM files" )
361312 conn .commit ()
362313 finally :
363314 conn .close ()
0 commit comments