From caad8590a89a5f0a66a1ef3b0aeee4a1170647e3 Mon Sep 17 00:00:00 2001
From: Sakib Hossain <sakibhossainalif29@gmail.com>
Date: Thu, 9 Apr 2026 18:13:24 +0600
Subject: [PATCH 01/13] Add preprocess helper and file-based prediction
 endpoint

---
 app/app.py | 57 ++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 10 deletions(-)

diff --git a/app/app.py b/app/app.py
index d9569a8..76d4844 100644
--- a/app/app.py
+++ b/app/app.py
@@ -1,4 +1,5 @@
-from fastapi import FastAPI, Depends, Request
+from pydantic import BaseModel
+from fastapi import FastAPI, Depends, Request, UploadFile
 from fastapi.responses import JSONResponse, FileResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.exceptions import RequestValidationError
@@ -12,6 +13,7 @@
 import numpy as np
 import os
 
+# Helper for loading and self-healing the artifacts
 def load_or_create_models() -> Tuple[Pipeline,np.ndarray]:
     model_path = Path("models","estimator.pkl")
     columns_path = Path("models","column_names.pkl")
@@ -34,6 +36,18 @@ def load_or_create_models() -> Tuple[Pipeline,np.ndarray]:
         return pipe,column_names
     except Exception as e:
         raise RuntimeError(f"Artifacts could not be loaded: {e}")
+    
+def preprocess_data(value:BaseModel) -> dict:
+    # Preprocessing
+    value:dict = value.model_dump(mode="json")
+    kick = ["u","g","r","i","z"]
+    final_value = {key:val for key,val in value.items() if key not in kick}
+    final_value["u_g_color"] = safe_sub("u","g",value)
+    final_value["g_r_color"] = safe_sub("g","r",value)
+    final_value["r_i_color"] = safe_sub("r","i",value)
+    final_value["i_z_color"] = safe_sub("i","z",value)
+
+    return final_value
 
 @asynccontextmanager
 async def lifespan(app:FastAPI):
@@ -91,15 +105,7 @@ def home():
 def prediction_ops(value:UserInput, dep:Tuple[Pipeline,np.ndarray] = Depends(get_model)):
     pipe, column_names = dep
     column_names:List[str] = column_names.tolist()
-
-    # Preprocessing
-    value:dict = value.model_dump(mode="json")
-    kick = ["u","g","r","i","z"]
-    final_value = {key:val for key,val in value.items() if key not in kick}
-    final_value["u_g_color"] = safe_sub("u","g",value)
-    final_value["g_r_color"] = safe_sub("g","r",value)
-    final_value["r_i_color"] = safe_sub("r","i",value)
-    final_value["i_z_color"] = safe_sub("i","z",value)
+    final_value:dict = preprocess_data(value)
 
     # Order Check and running prediction
     final_res = []
@@ -127,3 +133,34 @@ def prediction_ops(value:UserInput, dep:Tuple[Pipeline,np.ndarray] = Depends(get
         status_code=201, content=msg
     )
 
+@app.post("/predict/file")
+def prediction_via_file_ops(value:UploadFile, dep: Tuple[Pipeline,np.ndarray] = Depends(get_model)):
+    pipe, column_names = dep
+    column_names:List[str] = column_names.tolist()
+    final_value:dict = preprocess_data(value)
+
+    # Order Check and running prediction
+    final_res = []
+    for col in column_names:
+        if col == "class":
+            continue
+        else:
+            final_res.append(final_value.get(col,None))
+    final_res = np.array(final_res).reshape(1,-1)
+
+    pred_label = int(pipe.predict(final_res))
+    pred_proba = pipe.predict_proba(final_res).tolist()
+
+    # Postprocessing
+    label_map = {0: "GALAXY", 1: "STAR", 2: "QSO"}
+    pred_label = label_map.get(pred_label)
+    pred_proba = {lmv:round(proba,3) for lmv,proba in zip(label_map.values(), pred_proba)}
+
+    msg = {
+        "message": "prediction successful",
+        "prediction": pred_label, 
+        "probabilities": pred_proba
+    }
+    return JSONResponse(
+        status_code=201, content=msg
+    )
\ No newline at end of file

From 3fbbc8e598d2c4620827b1578b13d412fff3480a Mon Sep 17 00:00:00 2001
From: Sakib Hossain <sakibhossainalif29@gmail.com>
Date: Thu, 9 Apr 2026 20:12:57 +0600
Subject: [PATCH 02/13] Add `python-multipart` to `requirements.txt`

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 609c7d4..2b41ee6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -34,6 +34,7 @@ pydantic_core==2.41.5
 pyparsing==3.3.1
 python-dateutil==2.9.0.post0
 python-dotenv==1.2.1
+python-multipart==0.0.24
 pytz==2025.2
 requests==2.32.5
 scikit-learn==1.8.0

From 0786e647531b95cedb4723c630c1cac9924a5575 Mon Sep 17 00:00:00 2001
From: Sakib Hossain <sakibhossainalif29@gmail.com>
Date: Thu, 9 Apr 2026 20:13:20 +0600
Subject: [PATCH 03/13] Add batch file prediction endpoint in `app.py`

---
 app/app.py | 68 ++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 51 insertions(+), 17 deletions(-)

diff --git a/app/app.py b/app/app.py
index 76d4844..fecfe09 100644
--- a/app/app.py
+++ b/app/app.py
@@ -2,15 +2,18 @@
 from fastapi import FastAPI, Depends, Request, UploadFile
 from fastapi.responses import JSONResponse, FileResponse
 from fastapi.staticfiles import StaticFiles
-from fastapi.exceptions import RequestValidationError
+from fastapi.exceptions import RequestValidationError, HTTPException
 from typing import Tuple,List
 from sklearn.pipeline import Pipeline
 from models.fit import main
 from .schema.validation import UserInput
 from pathlib import Path
+from uuid import uuid4
 from contextlib import asynccontextmanager
-import joblib
+import shutil
 import numpy as np
+import pandas as pd
+import joblib
 import os
 
 # Helper for loading and self-healing the artifacts
@@ -37,6 +40,7 @@ def load_or_create_models() -> Tuple[Pipeline,np.ndarray]:
     except Exception as e:
         raise RuntimeError(f"Artifacts could not be loaded: {e}")
     
+# Helper for performing feature engineering
 def preprocess_data(value:BaseModel) -> dict:
     # Preprocessing
     value:dict = value.model_dump(mode="json")
@@ -49,6 +53,23 @@ def preprocess_data(value:BaseModel) -> dict:
 
     return final_value
 
+# Helper for validating user-provided csv files
+def upload_validator(df:pd.DataFrame,col_names:List[str]) -> pd.DataFrame:
+    if df.columns.tolist() != col_names:
+        raise HTTPException(
+            status_code=422, detail="Uploaded csv file does not match the expected " \
+            "columns or their order"
+        )
+    
+    try:
+        df = df.astype(float)
+    except Exception as e:
+        raise HTTPException(
+            status_code=422,
+            detail="All values must be numeric (float-compatible)"
+        )
+    return df
+
 @asynccontextmanager
 async def lifespan(app:FastAPI):
     # Load pipeline at start
@@ -134,30 +155,43 @@ def prediction_ops(value:UserInput, dep:Tuple[Pipeline,np.ndarray] = Depends(get
     )
 
 @app.post("/predict/file")
-def prediction_via_file_ops(value:UploadFile, dep: Tuple[Pipeline,np.ndarray] = Depends(get_model)):
+async def prediction_via_file_ops(payload:UploadFile, dep: Tuple[Pipeline,np.ndarray] = Depends(get_model)):
     pipe, column_names = dep
     column_names:List[str] = column_names.tolist()
-    final_value:dict = preprocess_data(value)
+    column_names = [i for i in column_names if i not in ["class"]]
+
+    ## Save the incoming .csv file on disk
+    accepted_exts = [".csv"]
+    UPLOAD_DIR = Path("app","uploads")
+    UPLOAD_DIR.mkdir(exist_ok=True)
+    extension = Path(payload.filename).suffix
+    if extension not in accepted_exts:
+        raise HTTPException(
+            status_code=422, 
+            detail=f"Uploaded data must be in '.csv' format, got {extension} instead"
+        )
+    filename = f"{uuid4()}{extension}"
+    DESTINATION = UPLOAD_DIR / filename
 
-    # Order Check and running prediction
-    final_res = []
-    for col in column_names:
-        if col == "class":
-            continue
-        else:
-            final_res.append(final_value.get(col,None))
-    final_res = np.array(final_res).reshape(1,-1)
+    try:
+        with open(DESTINATION,"wb") as f:
+            shutil.copyfileobj(payload.file,f)
+    except Exception as e:
+        return f"Error during payload dump procedure: {e}"
+
+    df = pd.read_csv(DESTINATION)
+    df = upload_validator(df,column_names)
 
-    pred_label = int(pipe.predict(final_res))
-    pred_proba = pipe.predict_proba(final_res).tolist()
+    pred_label:list[float] = pipe.predict(df).tolist()
+    pred_proba:list[list[float]] = pipe.predict_proba(df).tolist()
 
     # Postprocessing
     label_map = {0: "GALAXY", 1: "STAR", 2: "QSO"}
-    pred_label = label_map.get(pred_label)
-    pred_proba = {lmv:round(proba,3) for lmv,proba in zip(label_map.values(), pred_proba)}
+    pred_label:list[str] = [label_map.get(pred) for pred in pred_label]
+    pred_proba = [[round(r) for r in pred] for pred in pred_proba]
 
     msg = {
-        "message": "prediction successful",
+        "message": "batch prediction successful",
         "prediction": pred_label, 
         "probabilities": pred_proba
     }

From 1a961f9adfc2c4a850f57111d376f8ea262ab700 Mon Sep 17 00:00:00 2001
From: Sakib Hossain <sakibhossainalif29@gmail.com>
Date: Thu, 9 Apr 2026 20:14:00 +0600
Subject: [PATCH 04/13] Add `sample_generator.py` for basic testing

---
 Datasets/sample_generator.py | 55 ++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 Datasets/sample_generator.py

diff --git a/Datasets/sample_generator.py b/Datasets/sample_generator.py
new file mode 100644
index 0000000..79a2e9b
--- /dev/null
+++ b/Datasets/sample_generator.py
@@ -0,0 +1,55 @@
+import pandas as pd
+from pathlib import Path
+
+DATASET_PATH = Path("Datasets","SDSS_DR18.csv")
+OUTPUT_PATH = Path("Datasets","samples.csv")
+
+def preprocess(df: pd.DataFrame) -> pd.DataFrame:
+    # Drop irrelevant columns (ignore if already absent)
+    drop_cols = ["objid", "specobjid", "run", "rerun", "camcol",
+                 "field", "plate", "mjd", "fiberid"]
+    df = df.drop(columns=[c for c in drop_cols if c in df.columns])
+
+    # Map string classes to numeric labels
+    df["class"] = df["class"].map({"GALAXY": 0, "STAR": 1, "QSO": 2})
+
+    # Feature reduction
+    df = df[["ra", "dec", "redshift", "u", "g", "r", "i", "z", "psfMag_r", "class"]].copy()
+
+    # Feature engineering — color contrast columns
+    df["u_g_color"] = df["u"] - df["g"]
+    df["g_r_color"] = df["g"] - df["r"]
+    df["r_i_color"] = df["r"] - df["i"]
+    df["i_z_color"] = df["i"] - df["z"]
+    df = df.drop(columns=["u", "g", "r", "i", "z"])
+
+    # Move `class` to the end
+    popped_class = df.pop("class")
+    df.insert(len(df.columns), "class", popped_class)
+
+    return df
+
+
+def stratified_sample(df: pd.DataFrame, total_samples: int, class_col: str = "class", random_state: int = 42) -> pd.DataFrame:
+    class_counts = df[class_col].value_counts(normalize=True)
+    class_n = (class_counts * total_samples).round().astype(int)
+
+    # Fix rounding drift
+    diff = total_samples - class_n.sum()
+    class_n.iloc[0] += diff
+
+    return df.groupby(class_col, group_keys=False).apply(
+        lambda x: x.sample(n=min(class_n[x.name], len(x)), random_state=random_state)
+    )
+
+
+# --- Usage ---
+df_raw = pd.read_csv(DATASET_PATH)
+
+df_processed = preprocess(df_raw)
+
+sample = stratified_sample(df_processed, total_samples=100)
+
+sample = sample.drop(columns=["class"])
+
+sample.to_csv(OUTPUT_PATH,index=False)
\ No newline at end of file

From 934ee6db8ed81318edc6a0d82f69ee7c85e797de Mon Sep 17 00:00:00 2001
From: Sakib Hossain <sakibhossainalif29@gmail.com>
Date: Thu, 9 Apr 2026 20:17:28 +0600
Subject: [PATCH 05/13] Add docstring documentation for `sample_generator.py`

---
 Datasets/sample_generator.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/Datasets/sample_generator.py b/Datasets/sample_generator.py
index 79a2e9b..110f28c 100644
--- a/Datasets/sample_generator.py
+++ b/Datasets/sample_generator.py
@@ -1,3 +1,14 @@
+"""
+sample_generator.py
+
+This script is used to generate samples directly from the main dataset. 
+These samples are used to test the `/predict/file` route. 
+To run it, enter this in your command line:
+```
+python -m Datasets.sample_generator.py
+```
+"""
+
 import pandas as pd
 from pathlib import Path
 
@@ -5,25 +16,20 @@
 OUTPUT_PATH = Path("Datasets","samples.csv")
 
 def preprocess(df: pd.DataFrame) -> pd.DataFrame:
-    # Drop irrelevant columns (ignore if already absent)
     drop_cols = ["objid", "specobjid", "run", "rerun", "camcol",
                  "field", "plate", "mjd", "fiberid"]
     df = df.drop(columns=[c for c in drop_cols if c in df.columns])
 
-    # Map string classes to numeric labels
     df["class"] = df["class"].map({"GALAXY": 0, "STAR": 1, "QSO": 2})
 
-    # Feature reduction
     df = df[["ra", "dec", "redshift", "u", "g", "r", "i", "z", "psfMag_r", "class"]].copy()
 
-    # Feature engineering — color contrast columns
     df["u_g_color"] = df["u"] - df["g"]
     df["g_r_color"] = df["g"] - df["r"]
     df["r_i_color"] = df["r"] - df["i"]
     df["i_z_color"] = df["i"] - df["z"]
     df = df.drop(columns=["u", "g", "r", "i", "z"])
 
-    # Move `class` to the end
     popped_class = df.pop("class")
     df.insert(len(df.columns), "class", popped_class)
 
@@ -34,7 +40,6 @@ def stratified_sample(df: pd.DataFrame, total_samples: int, class_col: str = "cl
     class_counts = df[class_col].value_counts(normalize=True)
     class_n = (class_counts * total_samples).round().astype(int)
 
-    # Fix rounding drift
     diff = total_samples - class_n.sum()
     class_n.iloc[0] += diff
 
@@ -43,7 +48,6 @@ def stratified_sample(df: pd.DataFrame, total_samples: int, class_col: str = "cl
     )
 
 
-# --- Usage ---
 df_raw = pd.read_csv(DATASET_PATH)
 
 df_processed = preprocess(df_raw)

From 22f74ab63d921f861830e16cb5b6eac529918f99 Mon Sep 17 00:00:00 2001
From: Sakib Hossain <sakibhossainalif29@gmail.com>
Date: Thu, 9 Apr 2026 20:55:52 +0600
Subject: [PATCH 06/13] Add new front end modification for the new batch
 prediction feature

---
 app/static/script.js     | 177 +++++++++++++++++++++++++++++++++++++++
 app/static/style.css     | 137 ++++++++++++++++++++++++++++++
 app/templates/index.html | 107 +++++++++++++++++++++++
 3 files changed, 421 insertions(+)

diff --git a/app/static/script.js b/app/static/script.js
index ccd2f7a..8485fcb 100644
--- a/app/static/script.js
+++ b/app/static/script.js
@@ -7,6 +7,7 @@ document.addEventListener("DOMContentLoaded", function() {
     // Initialize components
     initTabNavigation();
     initPredictForm();
+    initBatchPredictForm();
     initKeyboardShortcuts();
 });
 
@@ -286,3 +287,179 @@ window.CosmoClassifier = {
     formatNumber,
     debounce
 };
+
+/**
+ * Batch Prediction Form Handler
+ */
+function initBatchPredictForm() {
+    const fileInput = document.getElementById("batchFile");
+    const dropzone = document.getElementById("fileUploadDropzone");
+    const fileInfo = document.getElementById("fileInfo");
+    const selectedFileName = document.getElementById("selectedFileName");
+    const selectedFileSize = document.getElementById("selectedFileSize");
+    const removeFileBtn = document.getElementById("removeFileBtn");
+    const submitBtn = document.getElementById("batchPredictBtn");
+    const form = document.getElementById("batchPredictForm");
+    
+    if(!fileInput) return;
+
+    const awaitingBatch = document.getElementById("awaiting-batch");
+    const batchResult = document.getElementById("batch-result");
+    const tbody = document.getElementById("batchTableBody");
+
+    const MAX_SIZE = 5 * 1024 * 1024; // 5MB
+
+    function handleFile(file) {
+        if (!file) return;
+        
+        if (!file.name.toLowerCase().endsWith('.csv')) {
+            showToast("Only .csv files are allowed", "error");
+            fileInput.value = "";
+            return;
+        }
+        
+        if (file.size > MAX_SIZE) {
+            showToast("File size exceeds 5MB limit", "error");
+            fileInput.value = "";
+            return;
+        }
+
+        selectedFileName.textContent = file.name;
+        selectedFileSize.textContent = (file.size / 1024 / 1024).toFixed(2) + " MB";
+        
+        dropzone.classList.add("hidden");
+        fileInfo.classList.remove("hidden");
+        submitBtn.disabled = false;
+    }
+
+    fileInput.addEventListener("change", (e) => {
+        handleFile(e.target.files[0]);
+    });
+
+    ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
+        dropzone.addEventListener(eventName, preventDefaults, false);
+    });
+
+    function preventDefaults(e) {
+        e.preventDefault();
+        e.stopPropagation();
+    }
+
+    ['dragenter', 'dragover'].forEach(eventName => {
+        dropzone.addEventListener(eventName, () => {
+            dropzone.style.borderColor = "var(--accent-secondary)";
+        }, false);
+    });
+
+    ['dragleave', 'drop'].forEach(eventName => {
+        dropzone.addEventListener(eventName, () => {
+            dropzone.style.borderColor = "";
+        }, false);
+    });
+
+    dropzone.addEventListener('drop', (e) => {
+        let dt = e.dataTransfer;
+        let files = dt.files;
+        if (files.length) {
+            fileInput.files = files;
+            handleFile(files[0]);
+        }
+    }, false);
+
+    removeFileBtn.addEventListener("click", () => {
+        fileInput.value = "";
+        dropzone.classList.remove("hidden");
+        fileInfo.classList.add("hidden");
+        submitBtn.disabled = true;
+    });
+
+    form.addEventListener("submit", async (e) => {
+        e.preventDefault();
+        
+        if (!fileInput.files[0]) return;
+
+        setLoadingState(submitBtn, true);
+        awaitingBatch.style.display = 'none';
+        batchResult.classList.add("hidden");
+
+        try {
+            const formData = new FormData();
+            formData.append("payload", fileInput.files[0]);
+
+            const response = await fetch("/predict/file", {
+                method: "POST",
+                body: formData
+            });
+
+            if (!response.ok) {
+                const errorData = await response.json();
+                throw new Error(errorData.detail || errorData.message || "Batch prediction failed");
+            }
+
+            const data = await response.json();
+            
+            renderBatchResults(data);
+            showToast('Batch classification complete!', 'success');
+            
+        } catch (error) {
+            console.error(error);
+            showToast(`Error: ${error.message}`, 'error');
+            awaitingBatch.style.display = 'flex';
+        } finally {
+            setLoadingState(submitBtn, false);
+        }
+    });
+
+    let batchChart = null;
+
+    function renderBatchResults(data) {
+        const preds = data.prediction;
+        const probs = data.probabilities;
+        
+        tbody.innerHTML = "";
+        
+        let counts = { "GALAXY": 0, "STAR": 0, "QSO": 0 };
+
+        preds.forEach((pred, index) => {
+            counts[pred] = (counts[pred] || 0) + 1;
+            
+            const maxProb = Math.max(...probs[index]);
+            
+            const tr = document.createElement("tr");
+            tr.innerHTML = `
+                <td>Row ${index + 1}</td>
+                <td><span class="pred-class ${pred.toLowerCase()}" style="font-size: 0.85rem; padding: 2px 6px; border-radius: 4px; background: rgba(255,255,255,0.1);">${pred}</span></td>
+                <td>${maxProb}%</td>
+            `;
+            tbody.appendChild(tr);
+        });
+
+        batchResult.classList.remove("hidden");
+
+        // Render Chart
+        const ctx = document.getElementById('batchPieChart').getContext('2d');
+        if (batchChart) batchChart.destroy();
+
+        batchChart = new Chart(ctx, {
+            type: 'doughnut',
+            data: {
+                labels: ['GALAXY', 'STAR', 'QSO'],
+                datasets: [{
+                    data: [counts['GALAXY'], counts['STAR'], counts['QSO']],
+                    backgroundColor: ['#7000ff', '#00d4ff', '#ff6b6b'],
+                    borderWidth: 0
+                }]
+            },
+            options: {
+                responsive: true,
+                maintainAspectRatio: false,
+                plugins: {
+                    legend: {
+                        position: 'bottom',
+                        labels: { color: '#ffffff' }
+                    }
+                }
+            }
+        });
+    }
+}
diff --git a/app/static/style.css b/app/static/style.css
index 58ff40c..6088a75 100644
--- a/app/static/style.css
+++ b/app/static/style.css
@@ -1034,6 +1034,143 @@ input:focus ~ .input-focus-line {
     text-shadow: 0 0 10px rgba(0, 212, 255, 0.3);
 }
 
+/* ========================================
+   Batch UI Enhancements
+   ======================================== */
+
+.file-upload-container {
+    border: 2px dashed rgba(255, 255, 255, 0.2);
+    border-radius: 12px;
+    padding: 2rem 1rem;
+    text-align: center;
+    transition: all 0.3s ease;
+    background: rgba(0, 0, 0, 0.2);
+    position: relative;
+    cursor: pointer;
+    margin-bottom: 1.5rem;
+}
+
+.file-upload-container:hover {
+    border-color: var(--accent-secondary);
+    background: rgba(0, 212, 255, 0.05);
+}
+
+.file-input-hidden {
+    position: absolute;
+    width: 0;
+    height: 0;
+    opacity: 0;
+    visibility: hidden;
+}
+
+.file-upload-label {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    cursor: pointer;
+    gap: 1rem;
+}
+
+.upload-icon {
+    width: 48px;
+    height: 48px;
+    color: var(--accent-secondary);
+    background: rgba(0, 212, 255, 0.1);
+    border-radius: 50%;
+    padding: 12px;
+}
+
+.upload-title {
+    font-size: 1.1rem;
+    font-weight: 600;
+    color: var(--text-primary);
+    display: block;
+}
+
+.upload-subtitle {
+    font-size: 0.85rem;
+    color: var(--text-muted);
+}
+
+.file-info {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 1rem;
+    margin-top: 1rem;
+    padding: 0.5rem 1rem;
+    background: rgba(0, 212, 255, 0.1);
+    border-radius: 8px;
+    border: 1px solid rgba(0, 212, 255, 0.2);
+}
+
+.file-name {
+    font-family: var(--font-mono);
+    color: var(--text-primary);
+    font-size: 0.9rem;
+    max-width: 200px;
+    overflow: hidden;
+    text-overflow: ellipsis;
+    white-space: nowrap;
+}
+
+.file-size {
+    color: var(--text-secondary);
+    font-size: 0.8rem;
+}
+
+.btn-remove-file {
+    background: none;
+    border: none;
+    color: var(--error);
+    cursor: pointer;
+    font-size: 1.2rem;
+    line-height: 1;
+}
+
+.btn-remove-file:hover {
+    color: #ff8a80;
+}
+
+.batch-table-container {
+    max-height: 300px;
+    overflow-y: auto;
+    border-radius: 8px;
+    border: 1px solid var(--border-color);
+    background: rgba(0, 0, 0, 0.2);
+}
+
+.batch-table {
+    width: 100%;
+    border-collapse: collapse;
+    text-align: left;
+    font-size: 0.85rem;
+}
+
+.batch-table th, .batch-table td {
+    padding: 0.5rem;
+    border-bottom: 1px solid rgba(255, 255, 255, 0.05);
+}
+
+.batch-table th {
+    background: rgba(255, 255, 255, 0.05);
+    color: var(--text-secondary);
+    font-weight: 600;
+    position: sticky;
+    top: 0;
+    z-index: 10;
+}
+
+.mt-4 {
+    margin-top: 1rem;
+}
+
+.chart-container {
+    width: 100%;
+    max-width: 200px;
+    margin: 0 auto;
+}
+
 /* ========================================
    Responsive Design
    ======================================== */
diff --git a/app/templates/index.html b/app/templates/index.html
index b306408..407bccd 100644
--- a/app/templates/index.html
+++ b/app/templates/index.html
@@ -39,6 +39,12 @@
                 </svg>
                 Predict
             </button>
+            <button class="nav-btn" data-tab="batch">
+                <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                    <path d="M21.2 15c.7-1.2 1-2.5.7-3.9-.6-2-2.4-3.5-4.4-3.5h-1.2c-.7-3-3.2-5.2-6.2-5.6-3-.3-5.9 1.3-7.3 4-1.2 2.5-1 5.4.5 7.6M12 19v-9m0 0l-3 3m3-3l3 3"/>
+                </svg>
+                Batch Predict
+            </button>
             <button class="nav-btn" data-tab="model">
                 <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
                     <path d="M12 2L2 7l10 5 10-5-10-5z"/>
@@ -219,6 +225,105 @@ <h4 class="confusion-title">Confusion Matrix</h4>
         </div>
     </section>
 
+    <!-- Batch Prediction Tab -->
+    <section class="tab-content" id="batch-tab">
+        <div class="container container-split">
+            <div class="section-header centered">
+                <h1 class="section-title">Batch Classification</h1>
+                <p class="section-subtitle">Upload a CSV file containing multiple objects, with 11 features corresponding to SDSS DR18</p>
+            </div>
+
+            <!-- 2-Panel Horizontal Layout for Batch -->
+            <div class="three-panel-layout">
+                <!-- Left Panel: File Upload -->
+                <div class="panel panel-left panel-batch">
+                    <form id="batchPredictForm" class="predict-form batch-form">
+                        <div class="file-upload-container">
+                            <input type="file" id="batchFile" name="payload" accept=".csv" class="file-input-hidden" required>
+                            <label for="batchFile" class="file-upload-label" id="fileUploadDropzone">
+                                <div class="upload-icon">
+                                    <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                                        <path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"></path>
+                                        <polyline points="14 2 14 8 20 8"></polyline>
+                                        <line x1="12" y1="18" x2="12" y2="12"></line>
+                                        <line x1="9" y1="15" x2="15" y2="15"></line>
+                                    </svg>
+                                </div>
+                                <div class="upload-text">
+                                    <span class="upload-title">Click or drag CSV file here</span>
+                                    <span class="upload-subtitle">Max size: 5MB</span>
+                                </div>
+                            </label>
+                            <div class="file-info hidden" id="fileInfo">
+                                <span class="file-name" id="selectedFileName">filename.csv</span>
+                                <span class="file-size" id="selectedFileSize">1.2MB</span>
+                                <button type="button" class="btn-remove-file" id="removeFileBtn" title="Remove File">✕</button>
+                            </div>
+                        </div>
+                        
+                        <div class="form-actions">
+                            <button type="submit" id="batchPredictBtn" class="btn-primary" disabled>
+                                <span class="btn-text">Start Batch Prediction</span>
+                                <span class="btn-icon">
+                                    <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                                        <polygon points="5 3 19 12 5 21 5 3"></polygon>
+                                    </svg>
+                                </span>
+                                <div class="btn-glow"></div>
+                            </button>
+                        </div>
+                    </form>
+                </div>
+                
+                <!-- Right Panel: Batch Results -->
+                <div class="panel panel-right">
+                    <div class="result-box result-batch">
+                        <div class="result-box-header">
+                            <h3>Batch Results</h3>
+                        </div>
+                        <div class="result-box-content">
+                            <!-- Awaiting File -->
+                            <div id="awaiting-batch" class="awaiting-state">
+                                <div class="awaiting-icon">
+                                    <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                                        <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"></path>
+                                        <polyline points="17 8 12 3 7 8"></polyline>
+                                        <line x1="12" y1="3" x2="12" y2="15"></line>
+                                    </svg>
+                                </div>
+                                <p>Awaiting data...</p>
+                                <span>Upload a CSV file to see predictions</span>
+                            </div>
+
+                            <!-- Batch Results Section -->
+                            <div id="batch-result" class="result-section hidden">
+                                <div class="batch-summary">
+                                    <div class="chart-container">
+                                        <canvas id="batchPieChart"></canvas>
+                                    </div>
+                                </div>
+                                <div class="batch-table-container mt-4">
+                                    <table class="batch-table">
+                                        <thead>
+                                            <tr>
+                                                <th>Row ID</th>
+                                                <th>Prediction</th>
+                                                <th>Confidence</th>
+                                            </tr>
+                                        </thead>
+                                        <tbody id="batchTableBody">
+                                            <!-- Rows populated by JS -->
+                                        </tbody>
+                                    </table>
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </section>
+
     <!-- Model Info Tab -->
     <section class="tab-content" id="model-tab">
         <div class="container container-wide">
@@ -293,6 +398,8 @@ <h3>~100K Training Samples</h3>
 <!-- Toast Notification Container -->
 <div id="toast-container" class="toast-container"></div>
 
+<!-- Chart.js for Pie Chart -->
+<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
 <script src="/static/script.js"></script>
 </body>
 </html>

From 8b60bc718926b878cff095ef7d63965c485e7f92 Mon Sep 17 00:00:00 2001
From: Sakib Hossain <sakibhossainalif29@gmail.com>
Date: Thu, 9 Apr 2026 20:56:01 +0600
Subject: [PATCH 07/13] Modify CI script

---
 .github/workflows/python-app.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index f7fbfbd..25f9d2f 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -59,5 +59,12 @@ jobs:
         nohup uvicorn app.app:app &
         sleep 10
         curl -I http://127.0.0.1:8000
+        
+        # Test batch prediction endpoint
+        echo "ra,dec,redshift,psfMag_r,u,g,r,i,z" > dummy_test.csv
+        echo "0.1,0.2,0.3,1.0,2.0,3.0,4.0,5.0,6.0" >> dummy_test.csv
+        curl -X POST http://127.0.0.1:8000/predict/file -H "accept: application/json" -F "payload=@dummy_test.csv;type=text/csv"
+        rm dummy_test.csv
+        
         pkill -f uvicorn
 

From 574d63643f67d2dd5a6474bada73e86aa4756aa6 Mon Sep 17 00:00:00 2001
From: Sakib Hossain <sakibhossainalif29@gmail.com>
Date: Thu, 9 Apr 2026 20:56:22 +0600
Subject: [PATCH 08/13] Fix input handling bug in `app.py`

---
 app/app.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/app/app.py b/app/app.py
index fecfe09..8dcf17a 100644
--- a/app/app.py
+++ b/app/app.py
@@ -157,8 +157,7 @@ def prediction_ops(value:UserInput, dep:Tuple[Pipeline,np.ndarray] = Depends(get
 @app.post("/predict/file")
 async def prediction_via_file_ops(payload:UploadFile, dep: Tuple[Pipeline,np.ndarray] = Depends(get_model)):
     pipe, column_names = dep
-    column_names:List[str] = column_names.tolist()
-    column_names = [i for i in column_names if i not in ["class"]]
+    expected_upload_cols = ['ra', 'dec', 'redshift', 'psfMag_r', 'u', 'g', 'r', 'i', 'z']
 
     ## Save the incoming .csv file on disk
     accepted_exts = [".csv"]
@@ -180,7 +179,20 @@ async def prediction_via_file_ops(payload:UploadFile, dep: Tuple[Pipeline,np.nda
         return f"Error during payload dump procedure: {e}"
 
     df = pd.read_csv(DESTINATION)
-    df = upload_validator(df,column_names)
+    df = upload_validator(df, expected_upload_cols)
+    
+    # Feature Engineering (Vectorized)
+    df['u_g_color'] = df['u'] - df['g']
+    df['g_r_color'] = df['g'] - df['r']
+    df['r_i_color'] = df['r'] - df['i']
+    df['i_z_color'] = df['i'] - df['z']
+    
+    # Drop original bands
+    df = df.drop(columns=['u', 'g', 'r', 'i', 'z'])
+    
+    # Reorder columns to match the pipeline's expected order (excluding 'class')
+    model_features = [col for col in column_names.tolist() if col != "class"]
+    df = df[model_features]
 
     pred_label:list[float] = pipe.predict(df).tolist()
     pred_proba:list[list[float]] = pipe.predict_proba(df).tolist()

From 4919e0f526e3afabee7f7c2dc61b464b961cd88c Mon Sep 17 00:00:00 2001
From: Sakib Hossain <sakibhossainalif29@gmail.com>
Date: Thu, 9 Apr 2026 20:57:01 +0600
Subject: [PATCH 09/13] Modify sample_generator.py to return  valid sample
 files

---
 Datasets/sample_generator.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/Datasets/sample_generator.py b/Datasets/sample_generator.py
index 110f28c..bc943b3 100644
--- a/Datasets/sample_generator.py
+++ b/Datasets/sample_generator.py
@@ -22,16 +22,7 @@ def preprocess(df: pd.DataFrame) -> pd.DataFrame:
 
     df["class"] = df["class"].map({"GALAXY": 0, "STAR": 1, "QSO": 2})
 
-    df = df[["ra", "dec", "redshift", "u", "g", "r", "i", "z", "psfMag_r", "class"]].copy()
-
-    df["u_g_color"] = df["u"] - df["g"]
-    df["g_r_color"] = df["g"] - df["r"]
-    df["r_i_color"] = df["r"] - df["i"]
-    df["i_z_color"] = df["i"] - df["z"]
-    df = df.drop(columns=["u", "g", "r", "i", "z"])
-
-    popped_class = df.pop("class")
-    df.insert(len(df.columns), "class", popped_class)
+    df = df[["ra", "dec", "redshift","psfMag_r",  "u", "g", "r", "i", "z", "class"]].copy()
 
     return df
 

From d8e6305c22ef6b0fe3ac6cc6162d0018cb12ad03 Mon Sep 17 00:00:00 2001
From: Sakib Hossain <sakibhossainalif29@gmail.com>
Date: Thu, 9 Apr 2026 21:05:25 +0600
Subject: [PATCH 10/13] Add dataset format guidelines to UI and fix blank
 output bar bug

---
 app/static/script.js     | 12 +++++-
 app/static/style.css     | 81 ++++++++++++++++++++++++++++++++++++++++
 app/templates/index.html | 17 +++++++++
 3 files changed, 108 insertions(+), 2 deletions(-)

diff --git a/app/static/script.js b/app/static/script.js
index 8485fcb..1f5e1f0 100644
--- a/app/static/script.js
+++ b/app/static/script.js
@@ -66,6 +66,14 @@ function initPredictForm() {
             const formData = new FormData(form);
             const dataObj = Object.fromEntries(formData.entries());
             
+            // Manual validation check to ensure no empty values
+            const requiredFields = ["ra", "dec", "redshift", "psfMag_r", "u", "g", "r", "i", "z"];
+            for (let field of requiredFields) {
+                if (dataObj[field] === undefined || dataObj[field] === "") {
+                    throw new Error("Please fill in all inputs before analyzing.");
+                }
+            }
+            
             // Convert numeric strings to numbers
             for (let key in dataObj) {
                 if (!isNaN(dataObj[key]) && dataObj[key] !== "") {
@@ -423,12 +431,12 @@ function initBatchPredictForm() {
         preds.forEach((pred, index) => {
             counts[pred] = (counts[pred] || 0) + 1;
             
-            const maxProb = Math.max(...probs[index]);
+            const maxProb = (Math.max(...probs[index]) * 100).toFixed(1);
             
             const tr = document.createElement("tr");
             tr.innerHTML = `
                 <td>Row ${index + 1}</td>
-                <td><span class="pred-class ${pred.toLowerCase()}" style="font-size: 0.85rem; padding: 2px 6px; border-radius: 4px; background: rgba(255,255,255,0.1);">${pred}</span></td>
+                <td><span class="batch-badge batch-${pred.toLowerCase()}">${pred}</span></td>
                 <td>${maxProb}%</td>
             `;
             tbody.appendChild(tr);
diff --git a/app/static/style.css b/app/static/style.css
index 6088a75..3447a6f 100644
--- a/app/static/style.css
+++ b/app/static/style.css
@@ -1152,6 +1152,18 @@ input:focus ~ .input-focus-line {
     border-bottom: 1px solid rgba(255, 255, 255, 0.05);
 }
 
+.batch-badge {
+    font-size: 0.85rem;
+    padding: 2px 8px;
+    border-radius: 4px;
+    font-weight: 600;
+    color: #fff;
+}
+
+.batch-galaxy { background: var(--galaxy); }
+.batch-star { background: var(--star); }
+.batch-qso { background: var(--qso); }
+
 .batch-table th {
     background: rgba(255, 255, 255, 0.05);
     color: var(--text-secondary);
@@ -1171,6 +1183,75 @@ input:focus ~ .input-focus-line {
     margin: 0 auto;
 }
 
+.batch-guidelines {
+    margin-top: 1.5rem;
+    padding: 1rem;
+    background: rgba(68, 138, 255, 0.05);
+    border: 1px solid rgba(68, 138, 255, 0.2);
+    border-radius: 12px;
+}
+
+.guidelines-header {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    margin-bottom: 0.75rem;
+    color: var(--info);
+}
+
+.info-icon {
+    width: 18px;
+    height: 18px;
+}
+
+.guidelines-header h4 {
+    font-size: 0.9rem;
+    font-weight: 600;
+}
+
+.guideline-list {
+    list-style: none;
+    display: flex;
+    flex-direction: column;
+    gap: 0.5rem;
+}
+
+.guideline-list li {
+    font-size: 0.8rem;
+    color: var(--text-secondary);
+    line-height: 1.4;
+    padding-left: 1.2rem;
+    position: relative;
+}
+
+.guideline-list li::before {
+    content: '•';
+    position: absolute;
+    left: 0;
+    color: var(--info);
+}
+
+.guideline-list code {
+    background: rgba(255, 255, 255, 0.1);
+    padding: 0.1rem 0.3rem;
+    border-radius: 4px;
+    font-family: var(--font-mono);
+    font-size: 0.75rem;
+    color: var(--accent-secondary);
+}
+
+.guideline-warning {
+    color: #ff8a80 !important;
+    background: rgba(255, 82, 82, 0.05);
+    padding: 0.5rem 0.5rem 0.5rem 1.2rem !important;
+    border-radius: 6px;
+    margin-top: 0.25rem;
+}
+
+.guideline-warning::before {
+    display: none;
+}
+
 /* ========================================
    Responsive Design
    ======================================== */
diff --git a/app/templates/index.html b/app/templates/index.html
index 407bccd..d76cbbf 100644
--- a/app/templates/index.html
+++ b/app/templates/index.html
@@ -273,6 +273,23 @@ <h1 class="section-title">Batch Classification</h1>
                             </button>
                         </div>
                     </form>
+
+                    <!-- Batch Guidelines -->
+                    <div class="batch-guidelines">
+                        <div class="guidelines-header">
+                            <svg viewBox="0 0 24 24" fill="none" class="info-icon" stroke="currentColor" stroke-width="2">
+                                <circle cx="12" cy="12" r="10"></circle>
+                                <line x1="12" y1="16" x2="12" y2="12"></line>
+                                <line x1="12" y1="8" x2="12.01" y2="8"></line>
+                            </svg>
+                            <h4>Dataset Format Guidelines</h4>
+                        </div>
+                        <ul class="guideline-list">
+                            <li><strong>Target Columns:</strong> <code>ra</code>, <code>dec</code>, <code>redshift</code>, <code>psfMag_r</code>, <code>u</code>, <code>g</code>, <code>r</code>, <code>i</code>, <code>z</code></li>
+                            <li><strong>Exact Headers:</strong> The CSV file must contain precisely these lowercase headers in the correct arrangement. All fields must be numeric.</li>
+                            <li class="guideline-warning"><strong>⚠️ Action Failure:</strong> Failing to format the columns correctly, including non-numeric artifacts, or submitting extraneous indices will immediately trigger a payload rejection and throw server-side errors, aborting the process.</li>
+                        </ul>
+                    </div>
                 </div>
                 
                 <!-- Right Panel: Batch Results -->

From 5b02acff037bb843dbc19e3ea78e08aab2ebe34a Mon Sep 17 00:00:00 2001
From: Sakib Hossain <sakibhossainalif29@gmail.com>
Date: Thu, 9 Apr 2026 21:06:38 +0600
Subject: [PATCH 11/13] Fix "always 1% confidence" bug in `app.py` by modifying
 the `round` function

---
 app/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/app.py b/app/app.py
index 8dcf17a..2520998 100644
--- a/app/app.py
+++ b/app/app.py
@@ -200,7 +200,7 @@ async def prediction_via_file_ops(payload:UploadFile, dep: Tuple[Pipeline,np.nda
     # Postprocessing
     label_map = {0: "GALAXY", 1: "STAR", 2: "QSO"}
     pred_label:list[str] = [label_map.get(pred) for pred in pred_label]
-    pred_proba = [[round(r) for r in pred] for pred in pred_proba]
+    pred_proba = [[round(r, 3) for r in pred] for pred in pred_proba]
 
     msg = {
         "message": "batch prediction successful",

From 86298a67307f1a96fca5248d3dfbff6290d4c92c Mon Sep 17 00:00:00 2001
From: Sakib Hossain <sakibhossainalif29@gmail.com>
Date: Thu, 9 Apr 2026 21:11:02 +0600
Subject: [PATCH 12/13] Modify  `app.py` to prevent saving the buffer on disk
 and processing the entire cycle in memory

---
 app/app.py | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/app/app.py b/app/app.py
index 2520998..377f312 100644
--- a/app/app.py
+++ b/app/app.py
@@ -8,9 +8,7 @@
 from models.fit import main
 from .schema.validation import UserInput
 from pathlib import Path
-from uuid import uuid4
 from contextlib import asynccontextmanager
-import shutil
 import numpy as np
 import pandas as pd
 import joblib
@@ -159,26 +157,18 @@ async def prediction_via_file_ops(payload:UploadFile, dep: Tuple[Pipeline,np.nda
     pipe, column_names = dep
     expected_upload_cols = ['ra', 'dec', 'redshift', 'psfMag_r', 'u', 'g', 'r', 'i', 'z']
 
-    ## Save the incoming .csv file on disk
     accepted_exts = [".csv"]
-    UPLOAD_DIR = Path("app","uploads")
-    UPLOAD_DIR.mkdir(exist_ok=True)
     extension = Path(payload.filename).suffix
-    if extension not in accepted_exts:
+    if extension.lower() not in accepted_exts:
         raise HTTPException(
             status_code=422, 
             detail=f"Uploaded data must be in '.csv' format, got {extension} instead"
         )
-    filename = f"{uuid4()}{extension}"
-    DESTINATION = UPLOAD_DIR / filename
 
     try:
-        with open(DESTINATION,"wb") as f:
-            shutil.copyfileobj(payload.file,f)
+        df = pd.read_csv(payload.file)
     except Exception as e:
-        return f"Error during payload dump procedure: {e}"
-
-    df = pd.read_csv(DESTINATION)
+        raise HTTPException(status_code=422, detail=f"Failed to parse CSV file tracking: {str(e)}")
     df = upload_validator(df, expected_upload_cols)
     
     # Feature Engineering (Vectorized)
@@ -186,8 +176,6 @@ async def prediction_via_file_ops(payload:UploadFile, dep: Tuple[Pipeline,np.nda
     df['g_r_color'] = df['g'] - df['r']
     df['r_i_color'] = df['r'] - df['i']
     df['i_z_color'] = df['i'] - df['z']
-    
-    # Drop original bands
     df = df.drop(columns=['u', 'g', 'r', 'i', 'z'])
     
     # Reorder columns to match the pipeline's expected order (excluding 'class')

From 16d811f9283a42082ec418226e5399d8c1a0137d Mon Sep 17 00:00:00 2001
From: Sakib Hossain <sakibhossainalif29@gmail.com>
Date: Thu, 9 Apr 2026 21:15:35 +0600
Subject: [PATCH 13/13] Add `httpx` in `requirements.txt`

---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 2b41ee6..b53b9df 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,6 +15,8 @@ Flask==3.1.2
 fonttools==4.61.1
 gunicorn==23.0.0
 h11==0.16.0
+httpcore==1.0.9
+httpx==0.28.1
 idna==3.11
 imbalanced-learn==0.14.1
 itsdangerous==2.2.0