From caad8590a89a5f0a66a1ef3b0aeee4a1170647e3 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Thu, 9 Apr 2026 18:13:24 +0600 Subject: [PATCH 01/13] Add preprocess helper and file-based prediction endpoint --- app/app.py | 57 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/app/app.py b/app/app.py index d9569a8..76d4844 100644 --- a/app/app.py +++ b/app/app.py @@ -1,4 +1,5 @@ -from fastapi import FastAPI, Depends, Request +from pydantic import BaseModel +from fastapi import FastAPI, Depends, Request, UploadFile from fastapi.responses import JSONResponse, FileResponse from fastapi.staticfiles import StaticFiles from fastapi.exceptions import RequestValidationError @@ -12,6 +13,7 @@ import numpy as np import os +# Helper for loading and self-healing the artifacts def load_or_create_models() -> Tuple[Pipeline,np.ndarray]: model_path = Path("models","estimator.pkl") columns_path = Path("models","column_names.pkl") @@ -34,6 +36,18 @@ def load_or_create_models() -> Tuple[Pipeline,np.ndarray]: return pipe,column_names except Exception as e: raise RuntimeError(f"Artifacts could not be loaded: {e}") + +def preprocess_data(value:BaseModel) -> dict: + # Preprocessing + value:dict = value.model_dump(mode="json") + kick = ["u","g","r","i","z"] + final_value = {key:val for key,val in value.items() if key not in kick} + final_value["u_g_color"] = safe_sub("u","g",value) + final_value["g_r_color"] = safe_sub("g","r",value) + final_value["r_i_color"] = safe_sub("r","i",value) + final_value["i_z_color"] = safe_sub("i","z",value) + + return final_value @asynccontextmanager async def lifespan(app:FastAPI): @@ -91,15 +105,7 @@ def home(): def prediction_ops(value:UserInput, dep:Tuple[Pipeline,np.ndarray] = Depends(get_model)): pipe, column_names = dep column_names:List[str] = column_names.tolist() - - # Preprocessing - value:dict = value.model_dump(mode="json") - kick = ["u","g","r","i","z"] - final_value = {key:val for key,val in value.items() if key not in kick} - final_value["u_g_color"] = safe_sub("u","g",value) - final_value["g_r_color"] = safe_sub("g","r",value) - final_value["r_i_color"] = safe_sub("r","i",value) - final_value["i_z_color"] = safe_sub("i","z",value) + final_value:dict = preprocess_data(value) # Order Check and running prediction final_res = [] @@ -127,3 +133,34 @@ def prediction_ops(value:UserInput, dep:Tuple[Pipeline,np.ndarray] = Depends(get status_code=201, content=msg ) +@app.post("/predict/file") +def prediction_via_file_ops(value:UploadFile, dep: Tuple[Pipeline,np.ndarray] = Depends(get_model)): + pipe, column_names = dep + column_names:List[str] = column_names.tolist() + final_value:dict = preprocess_data(value) + + # Order Check and running prediction + final_res = [] + for col in column_names: + if col == "class": + continue + else: + final_res.append(final_value.get(col,None)) + final_res = np.array(final_res).reshape(1,-1) + + pred_label = int(pipe.predict(final_res)) + pred_proba = pipe.predict_proba(final_res).tolist() + + # Postprocessing + label_map = {0: "GALAXY", 1: "STAR", 2: "QSO"} + pred_label = label_map.get(pred_label) + pred_proba = {lmv:round(proba,3) for lmv,proba in zip(label_map.values(), pred_proba)} + + msg = { + "message": "prediction successful", + "prediction": pred_label, + "probabilities": pred_proba + } + return JSONResponse( + status_code=201, content=msg + ) \ No newline at end of file From 3fbbc8e598d2c4620827b1578b13d412fff3480a Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Thu, 9 Apr 2026 20:12:57 +0600 Subject: [PATCH 02/13] Add `python-multipart` to `requirements.txt` --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 609c7d4..2b41ee6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,6 +34,7 @@ pydantic_core==2.41.5 pyparsing==3.3.1 python-dateutil==2.9.0.post0 python-dotenv==1.2.1 +python-multipart==0.0.24 pytz==2025.2 requests==2.32.5 scikit-learn==1.8.0 From 0786e647531b95cedb4723c630c1cac9924a5575 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Thu, 9 Apr 2026 20:13:20 +0600 Subject: [PATCH 03/13] Add batch file prediction endpoint in `app.py` --- app/app.py | 68 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 17 deletions(-) diff --git a/app/app.py b/app/app.py index 76d4844..fecfe09 100644 --- a/app/app.py +++ b/app/app.py @@ -2,15 +2,18 @@ from fastapi import FastAPI, Depends, Request, UploadFile from fastapi.responses import JSONResponse, FileResponse from fastapi.staticfiles import StaticFiles -from fastapi.exceptions import RequestValidationError +from fastapi.exceptions import RequestValidationError, HTTPException from typing import Tuple,List from sklearn.pipeline import Pipeline from models.fit import main from .schema.validation import UserInput from pathlib import Path +from uuid import uuid4 from contextlib import asynccontextmanager -import joblib +import shutil import numpy as np +import pandas as pd +import joblib import os # Helper for loading and self-healing the artifacts @@ -37,6 +40,7 @@ def load_or_create_models() -> Tuple[Pipeline,np.ndarray]: except Exception as e: raise RuntimeError(f"Artifacts could not be loaded: {e}") +# Helper for performing feature engineering def preprocess_data(value:BaseModel) -> dict: # Preprocessing value:dict = value.model_dump(mode="json") @@ -49,6 +53,23 @@ def preprocess_data(value:BaseModel) -> dict: return final_value +# Helper for validating user-provided csv files +def upload_validator(df:pd.DataFrame,col_names:List[str]) -> pd.DataFrame: + if df.columns.tolist() != col_names: + raise HTTPException( + status_code=422, detail="Uploaded csv file does not match the expected " \ + "columns or their order" + ) + + try: + df = df.astype(float) + except Exception as e: + raise HTTPException( + status_code=422, + detail="All values must be numeric (float-compatible)" + ) + return df + @asynccontextmanager async def lifespan(app:FastAPI): # Load pipeline at start @@ -134,30 +155,43 @@ def prediction_ops(value:UserInput, dep:Tuple[Pipeline,np.ndarray] = Depends(get ) @app.post("/predict/file") -def prediction_via_file_ops(value:UploadFile, dep: Tuple[Pipeline,np.ndarray] = Depends(get_model)): +async def prediction_via_file_ops(payload:UploadFile, dep: Tuple[Pipeline,np.ndarray] = Depends(get_model)): pipe, column_names = dep column_names:List[str] = column_names.tolist() - final_value:dict = preprocess_data(value) + column_names = [i for i in column_names if i not in ["class"]] + + ## Save the incoming .csv file on disk + accepted_exts = [".csv"] + UPLOAD_DIR = Path("app","uploads") + UPLOAD_DIR.mkdir(exist_ok=True) + extension = Path(payload.filename).suffix + if extension not in accepted_exts: + raise HTTPException( + status_code=422, + detail=f"Uploaded data must be in '.csv' format, got {extension} instead" + ) + filename = f"{uuid4()}{extension}" + DESTINATION = UPLOAD_DIR / filename - # Order Check and running prediction - final_res = [] - for col in column_names: - if col == "class": - continue - else: - final_res.append(final_value.get(col,None)) - final_res = np.array(final_res).reshape(1,-1) + try: + with open(DESTINATION,"wb") as f: + shutil.copyfileobj(payload.file,f) + except Exception as e: + return f"Error during payload dump procedure: {e}" + + df = pd.read_csv(DESTINATION) + df = upload_validator(df,column_names) - pred_label = int(pipe.predict(final_res)) - pred_proba = pipe.predict_proba(final_res).tolist() + pred_label:list[float] = pipe.predict(df).tolist() + pred_proba:list[list[float]] = pipe.predict_proba(df).tolist() # Postprocessing label_map = {0: "GALAXY", 1: "STAR", 2: "QSO"} - pred_label = label_map.get(pred_label) - pred_proba = {lmv:round(proba,3) for lmv,proba in zip(label_map.values(), pred_proba)} + pred_label:list[str] = [label_map.get(pred) for pred in pred_label] + pred_proba = [[round(r) for r in pred] for pred in pred_proba] msg = { - "message": "prediction successful", + "message": "batch prediction successful", "prediction": pred_label, "probabilities": pred_proba } From 1a961f9adfc2c4a850f57111d376f8ea262ab700 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Thu, 9 Apr 2026 20:14:00 +0600 Subject: [PATCH 04/13] Add `sample_generator.py` for basic testing --- Datasets/sample_generator.py | 55 ++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 Datasets/sample_generator.py diff --git a/Datasets/sample_generator.py b/Datasets/sample_generator.py new file mode 100644 index 0000000..79a2e9b --- /dev/null +++ b/Datasets/sample_generator.py @@ -0,0 +1,55 @@ +import pandas as pd +from pathlib import Path + +DATASET_PATH = Path("Datasets","SDSS_DR18.csv") +OUTPUT_PATH = Path("Datasets","samples.csv") + +def preprocess(df: pd.DataFrame) -> pd.DataFrame: + # Drop irrelevant columns (ignore if already absent) + drop_cols = ["objid", "specobjid", "run", "rerun", "camcol", + "field", "plate", "mjd", "fiberid"] + df = df.drop(columns=[c for c in drop_cols if c in df.columns]) + + # Map string classes to numeric labels + df["class"] = df["class"].map({"GALAXY": 0, "STAR": 1, "QSO": 2}) + + # Feature reduction + df = df[["ra", "dec", "redshift", "u", "g", "r", "i", "z", "psfMag_r", "class"]].copy() + + # Feature engineering — color contrast columns + df["u_g_color"] = df["u"] - df["g"] + df["g_r_color"] = df["g"] - df["r"] + df["r_i_color"] = df["r"] - df["i"] + df["i_z_color"] = df["i"] - df["z"] + df = df.drop(columns=["u", "g", "r", "i", "z"]) + + # Move `class` to the end + popped_class = df.pop("class") + df.insert(len(df.columns), "class", popped_class) + + return df + + +def stratified_sample(df: pd.DataFrame, total_samples: int, class_col: str = "class", random_state: int = 42) -> pd.DataFrame: + class_counts = df[class_col].value_counts(normalize=True) + class_n = (class_counts * total_samples).round().astype(int) + + # Fix rounding drift + diff = total_samples - class_n.sum() + class_n.iloc[0] += diff + + return df.groupby(class_col, group_keys=False).apply( + lambda x: x.sample(n=min(class_n[x.name], len(x)), random_state=random_state) + ) + + +# --- Usage --- +df_raw = pd.read_csv(DATASET_PATH) + +df_processed = preprocess(df_raw) + +sample = stratified_sample(df_processed, total_samples=100) + +sample = sample.drop(columns=["class"]) + +sample.to_csv(OUTPUT_PATH,index=False) \ No newline at end of file From 934ee6db8ed81318edc6a0d82f69ee7c85e797de Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Thu, 9 Apr 2026 20:17:28 +0600 Subject: [PATCH 05/13] Add docstring documentation for `sample_generator.py` --- Datasets/sample_generator.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/Datasets/sample_generator.py b/Datasets/sample_generator.py index 79a2e9b..110f28c 100644 --- a/Datasets/sample_generator.py +++ b/Datasets/sample_generator.py @@ -1,3 +1,14 @@ +""" +sample_generator.py + +This script is used to generate samples directly from the main dataset. +These samples are used to test the `/predict/file` route. +To run it, enter this in your command line: +``` +python -m Datasets.sample_generator.py +``` +""" + import pandas as pd from pathlib import Path @@ -5,25 +16,20 @@ OUTPUT_PATH = Path("Datasets","samples.csv") def preprocess(df: pd.DataFrame) -> pd.DataFrame: - # Drop irrelevant columns (ignore if already absent) drop_cols = ["objid", "specobjid", "run", "rerun", "camcol", "field", "plate", "mjd", "fiberid"] df = df.drop(columns=[c for c in drop_cols if c in df.columns]) - # Map string classes to numeric labels df["class"] = df["class"].map({"GALAXY": 0, "STAR": 1, "QSO": 2}) - # Feature reduction df = df[["ra", "dec", "redshift", "u", "g", "r", "i", "z", "psfMag_r", "class"]].copy() - # Feature engineering — color contrast columns df["u_g_color"] = df["u"] - df["g"] df["g_r_color"] = df["g"] - df["r"] df["r_i_color"] = df["r"] - df["i"] df["i_z_color"] = df["i"] - df["z"] df = df.drop(columns=["u", "g", "r", "i", "z"]) - # Move `class` to the end popped_class = df.pop("class") df.insert(len(df.columns), "class", popped_class) @@ -34,7 +40,6 @@ def stratified_sample(df: pd.DataFrame, total_samples: int, class_col: str = "cl class_counts = df[class_col].value_counts(normalize=True) class_n = (class_counts * total_samples).round().astype(int) - # Fix rounding drift diff = total_samples - class_n.sum() class_n.iloc[0] += diff @@ -43,7 +48,6 @@ def stratified_sample(df: pd.DataFrame, total_samples: int, class_col: str = "cl ) -# --- Usage --- df_raw = pd.read_csv(DATASET_PATH) df_processed = preprocess(df_raw) From 22f74ab63d921f861830e16cb5b6eac529918f99 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Thu, 9 Apr 2026 20:55:52 +0600 Subject: [PATCH 06/13] Add new front end modification for the new batch prediction feature --- app/static/script.js | 177 +++++++++++++++++++++++++++++++++++++++ app/static/style.css | 137 ++++++++++++++++++++++++++++++ app/templates/index.html | 107 +++++++++++++++++++++++ 3 files changed, 421 insertions(+) diff --git a/app/static/script.js b/app/static/script.js index ccd2f7a..8485fcb 100644 --- a/app/static/script.js +++ b/app/static/script.js @@ -7,6 +7,7 @@ document.addEventListener("DOMContentLoaded", function() { // Initialize components initTabNavigation(); initPredictForm(); + initBatchPredictForm(); initKeyboardShortcuts(); }); @@ -286,3 +287,179 @@ window.CosmoClassifier = { formatNumber, debounce }; + +/** + * Batch Prediction Form Handler + */ +function initBatchPredictForm() { + const fileInput = document.getElementById("batchFile"); + const dropzone = document.getElementById("fileUploadDropzone"); + const fileInfo = document.getElementById("fileInfo"); + const selectedFileName = document.getElementById("selectedFileName"); + const selectedFileSize = document.getElementById("selectedFileSize"); + const removeFileBtn = document.getElementById("removeFileBtn"); + const submitBtn = document.getElementById("batchPredictBtn"); + const form = document.getElementById("batchPredictForm"); + + if(!fileInput) return; + + const awaitingBatch = document.getElementById("awaiting-batch"); + const batchResult = document.getElementById("batch-result"); + const tbody = document.getElementById("batchTableBody"); + + const MAX_SIZE = 5 * 1024 * 1024; // 5MB + + function handleFile(file) { + if (!file) return; + + if (!file.name.toLowerCase().endsWith('.csv')) { + showToast("Only .csv files are allowed", "error"); + fileInput.value = ""; + return; + } + + if (file.size > MAX_SIZE) { + showToast("File size exceeds 5MB limit", "error"); + fileInput.value = ""; + return; + } + + selectedFileName.textContent = file.name; + selectedFileSize.textContent = (file.size / 1024 / 1024).toFixed(2) + " MB"; + + dropzone.classList.add("hidden"); + fileInfo.classList.remove("hidden"); + submitBtn.disabled = false; + } + + fileInput.addEventListener("change", (e) => { + handleFile(e.target.files[0]); + }); + + ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => { + dropzone.addEventListener(eventName, preventDefaults, false); + }); + + function preventDefaults(e) { + e.preventDefault(); + e.stopPropagation(); + } + + ['dragenter', 'dragover'].forEach(eventName => { + dropzone.addEventListener(eventName, () => { + dropzone.style.borderColor = "var(--accent-secondary)"; + }, false); + }); + + ['dragleave', 'drop'].forEach(eventName => { + dropzone.addEventListener(eventName, () => { + dropzone.style.borderColor = ""; + }, false); + }); + + dropzone.addEventListener('drop', (e) => { + let dt = e.dataTransfer; + let files = dt.files; + if (files.length) { + fileInput.files = files; + handleFile(files[0]); + } + }, false); + + removeFileBtn.addEventListener("click", () => { + fileInput.value = ""; + dropzone.classList.remove("hidden"); + fileInfo.classList.add("hidden"); + submitBtn.disabled = true; + }); + + form.addEventListener("submit", async (e) => { + e.preventDefault(); + + if (!fileInput.files[0]) return; + + setLoadingState(submitBtn, true); + awaitingBatch.style.display = 'none'; + batchResult.classList.add("hidden"); + + try { + const formData = new FormData(); + formData.append("payload", fileInput.files[0]); + + const response = await fetch("/predict/file", { + method: "POST", + body: formData + }); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.detail || errorData.message || "Batch prediction failed"); + } + + const data = await response.json(); + + renderBatchResults(data); + showToast('Batch classification complete!', 'success'); + + } catch (error) { + console.error(error); + showToast(`Error: ${error.message}`, 'error'); + awaitingBatch.style.display = 'flex'; + } finally { + setLoadingState(submitBtn, false); + } + }); + + let batchChart = null; + + function renderBatchResults(data) { + const preds = data.prediction; + const probs = data.probabilities; + + tbody.innerHTML = ""; + + let counts = { "GALAXY": 0, "STAR": 0, "QSO": 0 }; + + preds.forEach((pred, index) => { + counts[pred] = (counts[pred] || 0) + 1; + + const maxProb = Math.max(...probs[index]); + + const tr = document.createElement("tr"); + tr.innerHTML = ` + Row ${index + 1} + ${pred} + ${maxProb}% + `; + tbody.appendChild(tr); + }); + + batchResult.classList.remove("hidden"); + + // Render Chart + const ctx = document.getElementById('batchPieChart').getContext('2d'); + if (batchChart) batchChart.destroy(); + + batchChart = new Chart(ctx, { + type: 'doughnut', + data: { + labels: ['GALAXY', 'STAR', 'QSO'], + datasets: [{ + data: [counts['GALAXY'], counts['STAR'], counts['QSO']], + backgroundColor: ['#7000ff', '#00d4ff', '#ff6b6b'], + borderWidth: 0 + }] + }, + options: { + responsive: true, + maintainAspectRatio: false, + plugins: { + legend: { + position: 'bottom', + labels: { color: '#ffffff' } + } + } + } + }); + } +} diff --git a/app/static/style.css b/app/static/style.css index 58ff40c..6088a75 100644 --- a/app/static/style.css +++ b/app/static/style.css @@ -1034,6 +1034,143 @@ input:focus ~ .input-focus-line { text-shadow: 0 0 10px rgba(0, 212, 255, 0.3); } +/* ======================================== + Batch UI Enhancements + ======================================== */ + +.file-upload-container { + border: 2px dashed rgba(255, 255, 255, 0.2); + border-radius: 12px; + padding: 2rem 1rem; + text-align: center; + transition: all 0.3s ease; + background: rgba(0, 0, 0, 0.2); + position: relative; + cursor: pointer; + margin-bottom: 1.5rem; +} + +.file-upload-container:hover { + border-color: var(--accent-secondary); + background: rgba(0, 212, 255, 0.05); +} + +.file-input-hidden { + position: absolute; + width: 0; + height: 0; + opacity: 0; + visibility: hidden; +} + +.file-upload-label { + display: flex; + flex-direction: column; + align-items: center; + cursor: pointer; + gap: 1rem; +} + +.upload-icon { + width: 48px; + height: 48px; + color: var(--accent-secondary); + background: rgba(0, 212, 255, 0.1); + border-radius: 50%; + padding: 12px; +} + +.upload-title { + font-size: 1.1rem; + font-weight: 600; + color: var(--text-primary); + display: block; +} + +.upload-subtitle { + font-size: 0.85rem; + color: var(--text-muted); +} + +.file-info { + display: flex; + align-items: center; + justify-content: center; + gap: 1rem; + margin-top: 1rem; + padding: 0.5rem 1rem; + background: rgba(0, 212, 255, 0.1); + border-radius: 8px; + border: 1px solid rgba(0, 212, 255, 0.2); +} + +.file-name { + font-family: var(--font-mono); + color: var(--text-primary); + font-size: 0.9rem; + max-width: 200px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.file-size { + color: var(--text-secondary); + font-size: 0.8rem; +} + +.btn-remove-file { + background: none; + border: none; + color: var(--error); + cursor: pointer; + font-size: 1.2rem; + line-height: 1; +} + +.btn-remove-file:hover { + color: #ff8a80; +} + +.batch-table-container { + max-height: 300px; + overflow-y: auto; + border-radius: 8px; + border: 1px solid var(--border-color); + background: rgba(0, 0, 0, 0.2); +} + +.batch-table { + width: 100%; + border-collapse: collapse; + text-align: left; + font-size: 0.85rem; +} + +.batch-table th, .batch-table td { + padding: 0.5rem; + border-bottom: 1px solid rgba(255, 255, 255, 0.05); +} + +.batch-table th { + background: rgba(255, 255, 255, 0.05); + color: var(--text-secondary); + font-weight: 600; + position: sticky; + top: 0; + z-index: 10; +} + +.mt-4 { + margin-top: 1rem; +} + +.chart-container { + width: 100%; + max-width: 200px; + margin: 0 auto; +} + /* ======================================== Responsive Design ======================================== */ diff --git a/app/templates/index.html b/app/templates/index.html index b306408..407bccd 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -39,6 +39,12 @@ Predict + + + + +
+ +
+ + + + +
+
+
+

Batch Results

+
+
+ +
+
+ + + + + +
+

Awaiting data...

+ Upload a CSV file to see predictions +
+ + + +
+
+
+ + + +
@@ -293,6 +398,8 @@

~100K Training Samples

+ + From 8b60bc718926b878cff095ef7d63965c485e7f92 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Thu, 9 Apr 2026 20:56:01 +0600 Subject: [PATCH 07/13] Modify CI script --- .github/workflows/python-app.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index f7fbfbd..25f9d2f 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -59,5 +59,12 @@ jobs: nohup uvicorn app.app:app & sleep 10 curl -I http://127.0.0.1:8000 + + # Test batch prediction endpoint + echo "ra,dec,redshift,psfMag_r,u,g,r,i,z" > dummy_test.csv + echo "0.1,0.2,0.3,1.0,2.0,3.0,4.0,5.0,6.0" >> dummy_test.csv + curl -X POST http://127.0.0.1:8000/predict/file -H "accept: application/json" -F "payload=@dummy_test.csv;type=text/csv" + rm dummy_test.csv + pkill -f uvicorn From 574d63643f67d2dd5a6474bada73e86aa4756aa6 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Thu, 9 Apr 2026 20:56:22 +0600 Subject: [PATCH 08/13] Fix input handling bug in `app.py` --- app/app.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/app/app.py b/app/app.py index fecfe09..8dcf17a 100644 --- a/app/app.py +++ b/app/app.py @@ -157,8 +157,7 @@ def prediction_ops(value:UserInput, dep:Tuple[Pipeline,np.ndarray] = Depends(get @app.post("/predict/file") async def prediction_via_file_ops(payload:UploadFile, dep: Tuple[Pipeline,np.ndarray] = Depends(get_model)): pipe, column_names = dep - column_names:List[str] = column_names.tolist() - column_names = [i for i in column_names if i not in ["class"]] + expected_upload_cols = ['ra', 'dec', 'redshift', 'psfMag_r', 'u', 'g', 'r', 'i', 'z'] ## Save the incoming .csv file on disk accepted_exts = [".csv"] @@ -180,7 +179,20 @@ async def prediction_via_file_ops(payload:UploadFile, dep: Tuple[Pipeline,np.nda return f"Error during payload dump procedure: {e}" df = pd.read_csv(DESTINATION) - df = upload_validator(df,column_names) + df = upload_validator(df, expected_upload_cols) + + # Feature Engineering (Vectorized) + df['u_g_color'] = df['u'] - df['g'] + df['g_r_color'] = df['g'] - df['r'] + df['r_i_color'] = df['r'] - df['i'] + df['i_z_color'] = df['i'] - df['z'] + + # Drop original bands + df = df.drop(columns=['u', 'g', 'r', 'i', 'z']) + + # Reorder columns to match the pipeline's expected order (excluding 'class') + model_features = [col for col in column_names.tolist() if col != "class"] + df = df[model_features] pred_label:list[float] = pipe.predict(df).tolist() pred_proba:list[list[float]] = pipe.predict_proba(df).tolist() From 4919e0f526e3afabee7f7c2dc61b464b961cd88c Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Thu, 9 Apr 2026 20:57:01 +0600 Subject: [PATCH 09/13] Modify sample_generator.py to return valid sample files --- Datasets/sample_generator.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/Datasets/sample_generator.py b/Datasets/sample_generator.py index 110f28c..bc943b3 100644 --- a/Datasets/sample_generator.py +++ b/Datasets/sample_generator.py @@ -22,16 +22,7 @@ def preprocess(df: pd.DataFrame) -> pd.DataFrame: df["class"] = df["class"].map({"GALAXY": 0, "STAR": 1, "QSO": 2}) - df = df[["ra", "dec", "redshift", "u", "g", "r", "i", "z", "psfMag_r", "class"]].copy() - - df["u_g_color"] = df["u"] - df["g"] - df["g_r_color"] = df["g"] - df["r"] - df["r_i_color"] = df["r"] - df["i"] - df["i_z_color"] = df["i"] - df["z"] - df = df.drop(columns=["u", "g", "r", "i", "z"]) - - popped_class = df.pop("class") - df.insert(len(df.columns), "class", popped_class) + df = df[["ra", "dec", "redshift","psfMag_r", "u", "g", "r", "i", "z", "class"]].copy() return df From d8e6305c22ef6b0fe3ac6cc6162d0018cb12ad03 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Thu, 9 Apr 2026 21:05:25 +0600 Subject: [PATCH 10/13] Add dataset format guidelines to UI and fix blank output bar bug --- app/static/script.js | 12 +++++- app/static/style.css | 81 ++++++++++++++++++++++++++++++++++++++++ app/templates/index.html | 17 +++++++++ 3 files changed, 108 insertions(+), 2 deletions(-) diff --git a/app/static/script.js b/app/static/script.js index 8485fcb..1f5e1f0 100644 --- a/app/static/script.js +++ b/app/static/script.js @@ -66,6 +66,14 @@ function initPredictForm() { const formData = new FormData(form); const dataObj = Object.fromEntries(formData.entries()); + // Manual validation check to ensure no empty values + const requiredFields = ["ra", "dec", "redshift", "psfMag_r", "u", "g", "r", "i", "z"]; + for (let field of requiredFields) { + if (dataObj[field] === undefined || dataObj[field] === "") { + throw new Error("Please fill in all inputs before analyzing."); + } + } + // Convert numeric strings to numbers for (let key in dataObj) { if (!isNaN(dataObj[key]) && dataObj[key] !== "") { @@ -423,12 +431,12 @@ function initBatchPredictForm() { preds.forEach((pred, index) => { counts[pred] = (counts[pred] || 0) + 1; - const maxProb = Math.max(...probs[index]); + const maxProb = (Math.max(...probs[index]) * 100).toFixed(1); const tr = document.createElement("tr"); tr.innerHTML = ` Row ${index + 1} - ${pred} + ${pred} ${maxProb}% `; tbody.appendChild(tr); diff --git a/app/static/style.css b/app/static/style.css index 6088a75..3447a6f 100644 --- a/app/static/style.css +++ b/app/static/style.css @@ -1152,6 +1152,18 @@ input:focus ~ .input-focus-line { border-bottom: 1px solid rgba(255, 255, 255, 0.05); } +.batch-badge { + font-size: 0.85rem; + padding: 2px 8px; + border-radius: 4px; + font-weight: 600; + color: #fff; +} + +.batch-galaxy { background: var(--galaxy); } +.batch-star { background: var(--star); } +.batch-qso { background: var(--qso); } + .batch-table th { background: rgba(255, 255, 255, 0.05); color: var(--text-secondary); @@ -1171,6 +1183,75 @@ input:focus ~ .input-focus-line { margin: 0 auto; } +.batch-guidelines { + margin-top: 1.5rem; + padding: 1rem; + background: rgba(68, 138, 255, 0.05); + border: 1px solid rgba(68, 138, 255, 0.2); + border-radius: 12px; +} + +.guidelines-header { + display: flex; + align-items: center; + gap: 0.5rem; + margin-bottom: 0.75rem; + color: var(--info); +} + +.info-icon { + width: 18px; + height: 18px; +} + +.guidelines-header h4 { + font-size: 0.9rem; + font-weight: 600; +} + +.guideline-list { + list-style: none; + display: flex; + flex-direction: column; + gap: 0.5rem; +} + +.guideline-list li { + font-size: 0.8rem; + color: var(--text-secondary); + line-height: 1.4; + padding-left: 1.2rem; + position: relative; +} + +.guideline-list li::before { + content: '•'; + position: absolute; + left: 0; + color: var(--info); +} + +.guideline-list code { + background: rgba(255, 255, 255, 0.1); + padding: 0.1rem 0.3rem; + border-radius: 4px; + font-family: var(--font-mono); + font-size: 0.75rem; + color: var(--accent-secondary); +} + +.guideline-warning { + color: #ff8a80 !important; + background: rgba(255, 82, 82, 0.05); + padding: 0.5rem 0.5rem 0.5rem 1.2rem !important; + border-radius: 6px; + margin-top: 0.25rem; +} + +.guideline-warning::before { + display: none; +} + /* ======================================== Responsive Design ======================================== */ diff --git a/app/templates/index.html b/app/templates/index.html index 407bccd..d76cbbf 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -273,6 +273,23 @@

Batch Classification

+ + +
+
+ + + + + +

Dataset Format Guidelines

+
+
    +
  • Target Columns: ra, dec, redshift, psfMag_r, u, g, r, i, z
  • +
  • Exact Headers: The CSV file must contain precisely these lowercase headers in the correct arrangement. All fields must be numeric.
  • +
  • ⚠️ Action Failure: Failing to format the columns correctly, including non-numeric artifacts, or submitting extraneous indices will immediately trigger a payload rejection and throw server-side errors, aborting the process.
  • +
+
From 5b02acff037bb843dbc19e3ea78e08aab2ebe34a Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Thu, 9 Apr 2026 21:06:38 +0600 Subject: [PATCH 11/13] Fix "always 1% confidence" bug in `app.py` by modifying the `round` function --- app/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/app.py b/app/app.py index 8dcf17a..2520998 100644 --- a/app/app.py +++ b/app/app.py @@ -200,7 +200,7 @@ async def prediction_via_file_ops(payload:UploadFile, dep: Tuple[Pipeline,np.nda # Postprocessing label_map = {0: "GALAXY", 1: "STAR", 2: "QSO"} pred_label:list[str] = [label_map.get(pred) for pred in pred_label] - pred_proba = [[round(r) for r in pred] for pred in pred_proba] + pred_proba = [[round(r, 3) for r in pred] for pred in pred_proba] msg = { "message": "batch prediction successful", From 86298a67307f1a96fca5248d3dfbff6290d4c92c Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Thu, 9 Apr 2026 21:11:02 +0600 Subject: [PATCH 12/13] Modify `app.py` to prevent saving the buffer on disk and processing the entire cycle in memory --- app/app.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/app/app.py b/app/app.py index 2520998..377f312 100644 --- a/app/app.py +++ b/app/app.py @@ -8,9 +8,7 @@ from models.fit import main from .schema.validation import UserInput from pathlib import Path -from uuid import uuid4 from contextlib import asynccontextmanager -import shutil import numpy as np import pandas as pd import joblib @@ -159,26 +157,18 @@ async def prediction_via_file_ops(payload:UploadFile, dep: Tuple[Pipeline,np.nda pipe, column_names = dep expected_upload_cols = ['ra', 'dec', 'redshift', 'psfMag_r', 'u', 'g', 'r', 'i', 'z'] - ## Save the incoming .csv file on disk accepted_exts = [".csv"] - UPLOAD_DIR = Path("app","uploads") - UPLOAD_DIR.mkdir(exist_ok=True) extension = Path(payload.filename).suffix - if extension not in accepted_exts: + if extension.lower() not in accepted_exts: raise HTTPException( status_code=422, detail=f"Uploaded data must be in '.csv' format, got {extension} instead" ) - filename = f"{uuid4()}{extension}" - DESTINATION = UPLOAD_DIR / filename try: - with open(DESTINATION,"wb") as f: - shutil.copyfileobj(payload.file,f) + df = pd.read_csv(payload.file) except Exception as e: - return f"Error during payload dump procedure: {e}" - - df = pd.read_csv(DESTINATION) + raise HTTPException(status_code=422, detail=f"Failed to parse CSV file tracking: {str(e)}") df = upload_validator(df, expected_upload_cols) # Feature Engineering (Vectorized) @@ -186,8 +176,6 @@ async def prediction_via_file_ops(payload:UploadFile, dep: Tuple[Pipeline,np.nda df['g_r_color'] = df['g'] - df['r'] df['r_i_color'] = df['r'] - df['i'] df['i_z_color'] = df['i'] - df['z'] - - # Drop original bands df = df.drop(columns=['u', 'g', 'r', 'i', 'z']) # Reorder columns to match the pipeline's expected order (excluding 'class') From 16d811f9283a42082ec418226e5399d8c1a0137d Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Thu, 9 Apr 2026 21:15:35 +0600 Subject: [PATCH 13/13] Add `httpx` in `requirements.txt` --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 2b41ee6..b53b9df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,6 +15,8 @@ Flask==3.1.2 fonttools==4.61.1 gunicorn==23.0.0 h11==0.16.0 +httpcore==1.0.9 +httpx==0.28.1 idna==3.11 imbalanced-learn==0.14.1 itsdangerous==2.2.0