MIT-Emerging-Talent · Nov 23, 2025 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/smart_pantry_manager/data/clean_data b/smart_pantry_manager/data/clean_data
@@ -0,0 +1,128 @@
+# spell-checker: disable
+"""
+Clean recipe CSV and create SQLite DB with diet type.
+"""
+
+import re
+import sqlite3
+
+import pandas as pd
+
+# Load CSV
+df = pd.read_csv("smart_pantry_manager/data/Recipe_Dataset.csv")
+
+# Forbidden (haram) ingredients
+haram_keywords = [
+    "pork",
+    "ham",
+    "bacon",
+    "prosciutto",
+    "pancetta",
+    "sausage",
+    "wine",
+    "beer",
+    "bourbon",
+    "rum",
+    "whisky",
+    "vodka",
+    "tequila",
+    "cognac",
+    "brandy",
+    "liqueur",
+    "alcohol",
+    "champagne",
+    "sake",
+    "sherry",
+    "gin",
+]
+
+# Meat ingredients
+meat_keywords = [
+    "chicken",
+    "beef",
+    "lamb",
+    "turkey",
+    "fish",
+    "shrimp",
+    "salmon",
+    "tuna",
+    "meat",
+    "steak",
+    "duck",
+    "anchovy",
+    "crab",
+    "lobster",
+    "clam",
+    "oyster",
+    "scallop",
+    "mussel",
+    "squid",
+    "sausage",
+]
+
+# Animal products (vegetarian)
+animal_product_keywords = [
+    "egg",
+    "milk",
+    "cheese",
+    "butter",
+    "cream",
+    "yogurt",
+    "ghee",
+    "honey",
+    "mayonnaise",
+    "whey",
+    "casein",
+    "gelatin",
+]
+
+
+# Check haram
+def contains_haram(ingredient_text):
+    if pd.isna(ingredient_text):
+        return False
+    text = ingredient_text.lower()
+    for kw in haram_keywords:
+        if re.search(rf"\b{kw}\b", text):
+            return True
+    return False
+
+
+# Classify diet type
+def classify_diet_type(ingredient_text):
+    if pd.isna(ingredient_text):
+        return "Unknown"
+    text = ingredient_text.lower()
+    for kw in meat_keywords:
+        if re.search(rf"\b{kw}\b", text):
+            return "Non-Vegetarian"
+    for kw in animal_product_keywords:
+        if re.search(rf"\b{kw}\b", text):
+            return "Vegetarian"
+    return "Vegan"
+
+
+# Filter haram
+df["contains_haram"] = df["Ingredients"].apply(contains_haram)
+df_clean = df[~df["contains_haram"]].copy()
+df_clean["Diet_Type"] = df_clean["Ingredients"].apply(classify_diet_type)
+df_clean.drop("contains_haram", axis=1, inplace=True)
+
+# SQLite DB
+conn = sqlite3.connect("smart_pantry_manager/data/cleaned_data.sqlite")
+
+# Tables for each diet type
+for diet in ["Vegan", "Vegetarian", "Non-Vegetarian"]:
+    diet_df = df_clean[df_clean["Diet_Type"] == diet].copy()
+    diet_df.to_sql(
+        diet.lower().replace("-", "_") + "_recipes",
+        conn,
+        if_exists="replace",
+        index=False,
+    )
+
+# Combined table
+df_clean.to_sql("all_recipes", conn, if_exists="replace", index=False)
+
+conn.close()
+print("✅ SQLite DB created with all_recipes and diet tables.")
diff --git a/smart_pantry_manager/data/cleaned_data.sqlite b/smart_pantry_manager/data/cleaned_data.sqlite
diff --git a/smart_pantry_manager/data/pantry_omnia.xlsx b/smart_pantry_manager/data/pantry_omnia.xlsx
diff --git a/smart_pantry_manager/pages/all_recipes.py b/smart_pantry_manager/pages/all_recipes.py
@@ -1,70 +1,84 @@
 # spell-checker: disable
 """
-All Recipes Page for Smart Pantry Application (SQLite version)
+All Recipes Page
+Shows all recipes and diet type from cleaned_data.sqlite
 """
 
+import ast
+import os
 import sqlite3
+
 import pandas as pd
 import streamlit as st
 
 st.set_page_config(page_title="All Recipes", page_icon="📜", layout="wide")
-
 st.title("📜 All Recipes")
-st.caption("Browse all available recipes in the Smart Pantry system.")
+st.caption("Browse all recipes with diet type from Smart Pantry DB.")
+
+DB_PATH = os.path.join("smart_pantry_manager", "data", "cleaned_data.sqlite")
 
 
-# ---------- Load recipes from SQLite ----------
+# ---------- Load Recipes ----------
 @st.cache_data
 def load_recipes():
-    """
-    Load recipes from the SQLite database and normalize column names.
-    Returns a DataFrame with columns: Recipe, Ingredients, Instructions
-    """
-    db_path = "the_app/data/Recipe_Dataset.sqlite"
-
-    # Connect to SQLite database
-    conn = sqlite3.connect(db_path)
-
-    # Load the table "recipes"
-    df = pd.read_sql_query("SELECT * FROM recipes", conn)
-
+    if not os.path.exists(DB_PATH):
+        st.error("❌ Recipes database not found.")
+        return pd.DataFrame(
+            columns=["Title", "Ingredients", "Instructions", "Diet_Type"]
+        )
+    conn = sqlite3.connect(DB_PATH)
+    try:
+        df = pd.read_sql("SELECT * FROM all_recipes", conn)
+    except Exception as e:
+        st.error(f"Error loading recipes: {e}")
+        conn.close()
+        return pd.DataFrame(
+            columns=["Title", "Ingredients", "Instructions", "Diet_Type"]
+        )
     conn.close()
-
-    # Normalize column names
-    df.columns = [c.strip().lower() for c in df.columns]
-
-    # Rename columns if they exist
-    rename_map = {
-        "title": "Recipe",
-        "cleaned_ingredients": "Ingredients",
-        "instruction": "Instructions",
-        "instructions": "Instructions",
-    }
-    df.rename(
-        columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True
-    )
-
-    # Keep only required columns
-    required_cols = ["Recipe", "Ingredients", "Instructions"]
-    df = df[[col for col in required_cols if col in df.columns]]
-
+    # Ensure required columns exist
+    for col in ["Title", "Ingredients", "Instructions", "Diet_Type"]:
+        if col not in df.columns:
+            df[col] = ""
     return df
 
 
 recipes = load_recipes()
-
-# ---------- Display recipes ----------
 if recipes.empty:
     st.info("No recipes found.")
-else:
-    search = st.text_input("🔍 Search for a recipe:")
-    filtered = (
-        recipes[recipes["Recipe"].str.contains(search, case=False, na=False)]
-        if search
-        else recipes
-    )
-
-    for _, row in filtered.iterrows():
-        with st.expander(row["Recipe"]):
-            st.markdown(f"**🧂 Ingredients:** {row['Ingredients']}")
-            st.markdown(f"**👩‍🍳 Instructions:** {row['Instructions']}")
+    st.stop()
+
+
+# ---------- Parse Ingredients ----------
+def parse_ingredients(ingredients_str):
+    if pd.isna(ingredients_str):
+        return []
+    try:
+        if ingredients_str.startswith("[") and ingredients_str.endswith("]"):
+            parsed = ast.literal_eval(ingredients_str)
+            return [str(x).strip() for x in parsed if str(x).strip()]
+        elif "," in ingredients_str:
+            return [x.strip() for x in ingredients_str.split(",") if x.strip()]
+        else:
+            return [ingredients_str]
+    except Exception:
+        return [ingredients_str]
+
+
+# ---------- Display Recipes ----------
+for _, row in recipes.iterrows():
+    title = str(row.get("Title", "Unnamed Recipe"))
+    diet = str(row.get("Diet_Type", "Unknown"))
+    with st.expander(f"📖 {title} — {diet}"):
+        col1, col2 = st.columns([1, 2])
+        with col1:
+            st.markdown(f"**Diet Type:** {diet}")
+        with col2:
+            st.markdown("**🧂 Ingredients:**")
+            ing_list = parse_ingredients(row.get("Ingredients", ""))
+            for ing in ing_list[:10]:
+                st.write(f"• {ing}")
+            if len(ing_list) > 10:
+                st.write(f"*...and {len(ing_list) - 10} more*")
+            st.markdown("**👩‍🍳 Instructions:**")
+            st.write(str(row.get("Instructions", "No instructions available.")))