Imageomics · NetZissou · Apr 1, 2026 · Mar 30, 2026 · Mar 31, 2026
diff --git a/apps/precalculated/components/sidebar.py b/apps/precalculated/components/sidebar.py
@@ -200,8 +200,8 @@ def render_file_section() -> Tuple[bool, Optional[str]]:
                 st.session_state.parquet_file_path = file_path
                 st.session_state.column_info = column_info
 
-                # Reset downstream state
-                st.session_state.filtered_df = None
+                # Initialize filtered_df to full dataset (filtering is optional)
+                st.session_state.filtered_df = df
                 st.session_state.embeddings = None
                 st.session_state.data = None
                 st.session_state.labels = None
@@ -739,9 +739,9 @@ def create_cluster_dataframe(df: pd.DataFrame, embeddings_2d: np.ndarray, labels
         "idx": range(len(df))
     })
 
-    # Add available metadata columns for tooltips
+    # Add available metadata columns for tooltips (fill NaN for clean Altair display)
     for col in df.columns:
         if col not in ['uuid', 'emb', 'embedding', 'embeddings'] and col not in df_plot.columns:
-            df_plot[col] = df[col].values
+            df_plot[col] = df[col].fillna("N/A").values
 
     return df_plot
diff --git a/shared/components/visualization.py b/shared/components/visualization.py
@@ -82,9 +82,12 @@ def _render_chart_fragment(df_plot):
         tooltip_fields.append('cluster:N')
         cluster_legend_title = "Cluster"
 
-    # Add other metadata columns dynamically (limit to prevent tooltip overflow)
-    skip_cols = {'x', 'y', 'cluster', 'cluster_name', 'idx', 'emb', 'embedding', 'embeddings', 'vector'}
-    metadata_cols = [c for c in df_plot.columns if c not in skip_cols][:8]
+    # Add other metadata columns dynamically
+    # Skip technical, ID, and image-URL columns (details available in Data Preview panel)
+    skip_cols = {'x', 'y', 'cluster', 'cluster_name', 'idx',
+                 'emb', 'embedding', 'embeddings', 'vector',
+                 'uuid', 'identifier', 'image_url', 'url', 'img_url', 'image'}
+    metadata_cols = [c for c in df_plot.columns if c not in skip_cols][:15]
     tooltip_fields.extend(metadata_cols)
 
     # Determine title based on data type
@@ -101,14 +104,26 @@ def _render_chart_fragment(df_plot):
     else:
         point_opacity = 0.7  # Normal opacity
 
+    # Sort legend labels: numeric sort for cluster IDs, alphabetical for strings
+    unique_vals = df_plot['cluster'].unique()
+    try:
+        sorted_vals = sorted(unique_vals, key=int)
+    except (ValueError, TypeError):
+        sorted_vals = sorted(unique_vals, key=str)
+
     # Create scatter plot
     scatter = (
         alt.Chart(df_plot)
         .mark_circle(size=60, opacity=point_opacity)
         .encode(
             x=alt.X('x:Q', scale=alt.Scale(zero=False)),
             y=alt.Y('y:Q', scale=alt.Scale(zero=False)),
-            color=alt.Color('cluster:N', legend=alt.Legend(title=cluster_legend_title)),
+            color=alt.Color(
+                'cluster:N',
+                legend=alt.Legend(title=cluster_legend_title),
+                sort=sorted_vals,
+                scale=alt.Scale(scheme='tableau20')
+            ),
             tooltip=tooltip_fields,
             fillOpacity=alt.condition(point_selector, alt.value(1), alt.value(0.3))
         )