From 712d5ddaba9aa1c955858607820e4e814ac8baf2 Mon Sep 17 00:00:00 2001 From: hamed musallam Date: Wed, 14 Jan 2026 16:03:16 +0100 Subject: [PATCH 1/2] feat: add spectra parsing API for file upload and URL input --- app/routers/spectra.py | 224 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 197 insertions(+), 27 deletions(-) diff --git a/app/routers/spectra.py b/app/routers/spectra.py index 853be2e..0bc89a4 100644 --- a/app/routers/spectra.py +++ b/app/routers/spectra.py @@ -1,6 +1,9 @@ -from fastapi import APIRouter, HTTPException, status, UploadFile +from fastapi import APIRouter, HTTPException, status, UploadFile, File, Query from app.schemas import HealthCheck import subprocess +import tempfile +import os +import json router = APIRouter( prefix="/spectra", @@ -9,6 +12,9 @@ responses={404: {"description": "Not Found"}}, ) +# Container name for nmr-cli (from docker-compose.yml) +NMR_CLI_CONTAINER = "nmr-converter" + @router.get("/", include_in_schema=False) @router.get( @@ -33,42 +39,206 @@ def get_health() -> HealthCheck: return HealthCheck(status="OK") +CAPTURE_SNAPSHOT_QUERY = Query( + False, alias="Capture snapshot", description="Generate a image snapshot of the spectra") +AUTO_PROCESSING_QUERY = Query( + False, alias="Automatic processing", + description="Enable automatic processing of spectrum (FID → FT spectra)" +) +AUTO_DETECTION_QUERY = Query( + False, alias="Automatic detection", + description="Enable ranges and zones automatic detection" +) + + +def run_command( + file_path: Optional[str] = None, + url: Optional[str] = None, + capture_snapshot: bool = False, + auto_processing: bool = False, + auto_detection: bool = False, +) -> dict: + + cmd = ["nmr-cli", "parse-spectra"] + + if url: + cmd.extend(["-u", url]) + elif file_path: + cmd.extend(["-p", file_path]) + + if capture_snapshot: + cmd.append("-s") + if auto_processing: + cmd.append("-p") + if auto_detection: + cmd.append("-d") + + try: + result = subprocess.run( + ["docker", "exec", NMR_CLI_CONTAINER] + cmd, + capture_output=True, + text=False, + timeout=120 + ) + except subprocess.TimeoutExpired: + raise HTTPException( + status_code=408, detail="Processing timeout exceeded") + except FileNotFoundError: + raise HTTPException( + status_code=500, detail="Docker not found or nmr-converter container not running.") + + if result.returncode != 0: + error_msg = result.stderr.decode( + "utf-8") if result.stderr else "Unknown error" + raise HTTPException( + status_code=422, detail=f"NMR CLI error: {error_msg}") + + # Parse output + try: + return json.loads(result.stdout.decode("utf-8")) + except json.JSONDecodeError as e: + raise HTTPException( + status_code=500, detail=f"Invalid JSON from NMR CLI: {e}") + + +def copy_file_to_container(local_path: str, container_path: str) -> None: + """Copy a file to the nmr-converter container.""" + try: + subprocess.run( + ["docker", "cp", local_path, + f"{NMR_CLI_CONTAINER}:{container_path}"], + check=True, + capture_output=True, + timeout=30 + ) + except subprocess.CalledProcessError as e: + error_msg = e.stderr.decode("utf-8") if e.stderr else "Unknown error" + raise HTTPException( + status_code=500, detail=f"Failed to copy file to container: {error_msg}") + + +def remove_file_from_container(container_path: str) -> None: + """Remove a file from the nmr-converter container.""" + try: + subprocess.run( + ["docker", "exec", NMR_CLI_CONTAINER, "rm", "-f", container_path], + capture_output=True, + timeout=10 + ) + except Exception: + pass + + +# Parse from File Upload +@router.post( + "/parse/file", + tags=["spectra"], + summary="Parse spectra from uploaded file", + response_description="Spectra data in JSON format", + status_code=status.HTTP_200_OK, +) +async def parse_spectra_from_file( + file: UploadFile = File(..., + description="Upload a spectra file"), + capture_snapshot: bool = CAPTURE_SNAPSHOT_QUERY, + auto_processing: bool = AUTO_PROCESSING_QUERY, + auto_detection: bool = AUTO_DETECTION_QUERY, +): + """ + ## Parse spectra from uploaded file + + **Processing Options:** + - `capture_snapshot (s)` : Capture snapshot of the spectra + - `auto_processing (p)` : Enable automatic processing of spectrum (FID → FT spectra) + - `auto_detection (d)` : Enable ranges and zones automatic detection + + Returns: + Spectra data in JSON format + """ + + local_tmp_path = None + container_tmp_path = None + + try: + contents = await file.read() + + with tempfile.NamedTemporaryFile( + delete=False, + suffix=Path(file.filename).suffix + ) as tmp_file: + tmp_file.write(contents) + local_tmp_path = tmp_file.name + + container_tmp_path = f"/tmp/{Path(local_tmp_path).name}" + + # Copy file to nmr-converter container + copy_file_to_container(local_tmp_path, container_tmp_path) + + # Run nmr-cli and get JSON output + return run_command( + file_path=container_tmp_path, + capture_snapshot=capture_snapshot, + auto_processing=auto_processing, + auto_detection=auto_detection, + ) + + except HTTPException: + raise + except Exception as e: + raise HTTPException( + status_code=422, detail=f"Error parsing the spectra file: {e}") + finally: + if local_tmp_path and os.path.exists(local_tmp_path): + os.unlink(local_tmp_path) + if container_tmp_path: + remove_file_from_container(container_tmp_path) + await file.close() + + +# Parse from URL @router.post( - "/parse", + "/parse/url", tags=["spectra"], - summary="Parse the input spectra format and extract metadata", - response_description="", + summary="Parse spectra from URL", + response_description="Spectra data in JSON format", status_code=status.HTTP_200_OK, ) -async def parse_spectra(file: UploadFile): +async def parse_spectra_from_url( + url: str = Query(..., alias="URL"), + capture_snapshot: bool = CAPTURE_SNAPSHOT_QUERY, + auto_processing: bool = AUTO_PROCESSING_QUERY, + auto_detection: bool = AUTO_DETECTION_QUERY, +): """ - ## Parse the spectra file and extract meta-data - Endpoint uses NMR-load-save to read the input spectra file (.jdx,.nmredata,.dx) and extracts metadata + ## Parse spectra from URL + + **Processing Options:** + - `capture_snapshot (s)` : Capture snapshot of the spectra + - `auto_processing (p)` : Enable automatic processing of spectrum (FID → FT spectra) + - `auto_detection (d)` : Enable ranges and zones automatic detection Returns: - data: spectra data in JSON format + Spectra data in JSON format """ + if not url or not url.strip(): + raise HTTPException( + status_code=400, + detail="URL is required", + headers={"X-Error": "No URL provided"}, + ) + try: - contents = file.file.read() - file_path = "/tmp/" + file.filename - with open(file_path, "wb") as f: - f.write(contents) - p = subprocess.Popen( - "npx nmr-cli -p " + file_path, stdout=subprocess.PIPE, shell=True + output = run_command( + url=url.strip(), + capture_snapshot=capture_snapshot, + auto_processing=auto_processing, + auto_detection=auto_detection, ) - (output, err) = p.communicate() - p_status = p.wait() + return output + + except HTTPException: + raise except Exception as e: raise HTTPException( - status_code=422, - detail="Error parsing the structure " - + e.message - + ". Error: " - + err - + ". Status:" - + p_status, - headers={"X-Error": "RDKit molecule input parse error"}, - ) - finally: - file.file.close() + status_code=422, detail=f"Error parsing spectra from URL: {e}") From a7d6b636be889ffa3e3542daa90c6c5fbac5fcc0 Mon Sep 17 00:00:00 2001 From: hamed musallam Date: Wed, 14 Jan 2026 22:10:45 +0100 Subject: [PATCH 2/2] refactor: parse spectra endpoints --- app/routers/spectra.py | 122 +++++++++++++++++++++++------------------ 1 file changed, 70 insertions(+), 52 deletions(-) diff --git a/app/routers/spectra.py b/app/routers/spectra.py index 0bc89a4..ea2c05d 100644 --- a/app/routers/spectra.py +++ b/app/routers/spectra.py @@ -1,9 +1,11 @@ -from fastapi import APIRouter, HTTPException, status, UploadFile, File, Query +from fastapi import APIRouter, HTTPException, status, UploadFile, File, Form from app.schemas import HealthCheck +from pydantic import BaseModel, HttpUrl, Field import subprocess import tempfile import os import json +from pathlib import Path router = APIRouter( prefix="/spectra", @@ -16,6 +18,23 @@ NMR_CLI_CONTAINER = "nmr-converter" +class UrlParseRequest(BaseModel): + """Request model for parsing spectra from URL""" + url: HttpUrl = Field(..., description="URL of the spectra file") + capture_snapshot: bool = Field( + False, + description="Generate an image snapshot of the spectra" + ) + auto_processing: bool = Field( + False, + description="Enable automatic processing of spectrum (FID → FT spectra)" + ) + auto_detection: bool = Field( + False, + description="Enable ranges and zones automatic detection" + ) + + @router.get("/", include_in_schema=False) @router.get( "/health", @@ -39,25 +58,14 @@ def get_health() -> HealthCheck: return HealthCheck(status="OK") -CAPTURE_SNAPSHOT_QUERY = Query( - False, alias="Capture snapshot", description="Generate a image snapshot of the spectra") -AUTO_PROCESSING_QUERY = Query( - False, alias="Automatic processing", - description="Enable automatic processing of spectrum (FID → FT spectra)" -) -AUTO_DETECTION_QUERY = Query( - False, alias="Automatic detection", - description="Enable ranges and zones automatic detection" -) - - def run_command( - file_path: Optional[str] = None, - url: Optional[str] = None, + file_path: str = None, + url: str = None, capture_snapshot: bool = False, auto_processing: bool = False, auto_detection: bool = False, ) -> dict: + """Execute nmr-cli command in Docker container""" cmd = ["nmr-cli", "parse-spectra"] @@ -82,23 +90,31 @@ def run_command( ) except subprocess.TimeoutExpired: raise HTTPException( - status_code=408, detail="Processing timeout exceeded") + status_code=408, + detail="Processing timeout exceeded" + ) except FileNotFoundError: raise HTTPException( - status_code=500, detail="Docker not found or nmr-converter container not running.") + status_code=500, + detail="Docker not found or nmr-converter container not running." + ) if result.returncode != 0: error_msg = result.stderr.decode( "utf-8") if result.stderr else "Unknown error" raise HTTPException( - status_code=422, detail=f"NMR CLI error: {error_msg}") + status_code=422, + detail=f"NMR CLI error: {error_msg}" + ) # Parse output try: return json.loads(result.stdout.decode("utf-8")) except json.JSONDecodeError as e: raise HTTPException( - status_code=500, detail=f"Invalid JSON from NMR CLI: {e}") + status_code=500, + detail=f"Invalid JSON from NMR CLI: {e}" + ) def copy_file_to_container(local_path: str, container_path: str) -> None: @@ -114,7 +130,9 @@ def copy_file_to_container(local_path: str, container_path: str) -> None: except subprocess.CalledProcessError as e: error_msg = e.stderr.decode("utf-8") if e.stderr else "Unknown error" raise HTTPException( - status_code=500, detail=f"Failed to copy file to container: {error_msg}") + status_code=500, + detail=f"Failed to copy file to container: {error_msg}" + ) def remove_file_from_container(container_path: str) -> None: @@ -129,7 +147,6 @@ def remove_file_from_container(container_path: str) -> None: pass -# Parse from File Upload @router.post( "/parse/file", tags=["spectra"], @@ -138,16 +155,26 @@ def remove_file_from_container(container_path: str) -> None: status_code=status.HTTP_200_OK, ) async def parse_spectra_from_file( - file: UploadFile = File(..., - description="Upload a spectra file"), - capture_snapshot: bool = CAPTURE_SNAPSHOT_QUERY, - auto_processing: bool = AUTO_PROCESSING_QUERY, - auto_detection: bool = AUTO_DETECTION_QUERY, + file: UploadFile = File(..., description="Upload a spectra file"), + capture_snapshot: bool = Form( + False, + description="Generate an image snapshot of the spectra" + ), + auto_processing: bool = Form( + False, + description="Enable automatic processing of spectrum (FID → FT spectra)" + ), + auto_detection: bool = Form( + False, + description="Enable ranges and zones automatic detection" + ), ): """ ## Parse spectra from uploaded file - **Processing Options:** + Upload a spectra file along with processing options using multipart/form-data. + + Processing Options: - `capture_snapshot (s)` : Capture snapshot of the spectra - `auto_processing (p)` : Enable automatic processing of spectrum (FID → FT spectra) - `auto_detection (d)` : Enable ranges and zones automatic detection @@ -186,7 +213,9 @@ async def parse_spectra_from_file( raise except Exception as e: raise HTTPException( - status_code=422, detail=f"Error parsing the spectra file: {e}") + status_code=422, + detail=f"Error parsing the spectra file: {e}" + ) finally: if local_tmp_path and os.path.exists(local_tmp_path): os.unlink(local_tmp_path) @@ -195,7 +224,6 @@ async def parse_spectra_from_file( await file.close() -# Parse from URL @router.post( "/parse/url", tags=["spectra"], @@ -203,16 +231,13 @@ async def parse_spectra_from_file( response_description="Spectra data in JSON format", status_code=status.HTTP_200_OK, ) -async def parse_spectra_from_url( - url: str = Query(..., alias="URL"), - capture_snapshot: bool = CAPTURE_SNAPSHOT_QUERY, - auto_processing: bool = AUTO_PROCESSING_QUERY, - auto_detection: bool = AUTO_DETECTION_QUERY, -): +async def parse_spectra_from_url(request: UrlParseRequest): """ - ## Parse spectra from URL + Parse spectra from URL + + Provide a URL to a spectra file along with processing options using JSON body. - **Processing Options:** + Processing Options: - `capture_snapshot (s)` : Capture snapshot of the spectra - `auto_processing (p)` : Enable automatic processing of spectrum (FID → FT spectra) - `auto_detection (d)` : Enable ranges and zones automatic detection @@ -220,25 +245,18 @@ async def parse_spectra_from_url( Returns: Spectra data in JSON format """ - if not url or not url.strip(): - raise HTTPException( - status_code=400, - detail="URL is required", - headers={"X-Error": "No URL provided"}, - ) - try: - output = run_command( - url=url.strip(), - capture_snapshot=capture_snapshot, - auto_processing=auto_processing, - auto_detection=auto_detection, + return run_command( + url=str(request.url), + capture_snapshot=request.capture_snapshot, + auto_processing=request.auto_processing, + auto_detection=request.auto_detection, ) - return output - except HTTPException: raise except Exception as e: raise HTTPException( - status_code=422, detail=f"Error parsing spectra from URL: {e}") + status_code=422, + detail=f"Error parsing spectra from URL: {e}" + )