Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions apps/precalculated/components/sidebar.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,8 @@ def render_file_section() -> Tuple[bool, Optional[str]]:
st.session_state.parquet_file_path = file_path
st.session_state.column_info = column_info

# Reset downstream state
st.session_state.filtered_df = None
# Initialize filtered_df to full dataset (filtering is optional)
st.session_state.filtered_df = df
st.session_state.embeddings = None
st.session_state.data = None
st.session_state.labels = None
Expand Down Expand Up @@ -739,9 +739,9 @@ def create_cluster_dataframe(df: pd.DataFrame, embeddings_2d: np.ndarray, labels
"idx": range(len(df))
})

# Add available metadata columns for tooltips
# Add available metadata columns for tooltips (fill NaN for clean Altair display)
for col in df.columns:
if col not in ['uuid', 'emb', 'embedding', 'embeddings'] and col not in df_plot.columns:
df_plot[col] = df[col].values
df_plot[col] = df[col].fillna("N/A").values

return df_plot
23 changes: 19 additions & 4 deletions shared/components/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,12 @@ def _render_chart_fragment(df_plot):
tooltip_fields.append('cluster:N')
cluster_legend_title = "Cluster"

# Add other metadata columns dynamically (limit to prevent tooltip overflow)
skip_cols = {'x', 'y', 'cluster', 'cluster_name', 'idx', 'emb', 'embedding', 'embeddings', 'vector'}
metadata_cols = [c for c in df_plot.columns if c not in skip_cols][:8]
# Add other metadata columns dynamically
# Skip technical, ID, and image-URL columns (details available in Data Preview panel)
skip_cols = {'x', 'y', 'cluster', 'cluster_name', 'idx',
'emb', 'embedding', 'embeddings', 'vector',
'uuid', 'identifier', 'image_url', 'url', 'img_url', 'image'}
metadata_cols = [c for c in df_plot.columns if c not in skip_cols][:15]
tooltip_fields.extend(metadata_cols)

# Determine title based on data type
Expand All @@ -101,14 +104,26 @@ def _render_chart_fragment(df_plot):
else:
point_opacity = 0.7 # Normal opacity

# Sort legend labels: numeric sort for cluster IDs, alphabetical for strings
unique_vals = df_plot['cluster'].unique()
try:
sorted_vals = sorted(unique_vals, key=int)
except (ValueError, TypeError):
sorted_vals = sorted(unique_vals, key=str)

# Create scatter plot
scatter = (
alt.Chart(df_plot)
.mark_circle(size=60, opacity=point_opacity)
.encode(
x=alt.X('x:Q', scale=alt.Scale(zero=False)),
y=alt.Y('y:Q', scale=alt.Scale(zero=False)),
color=alt.Color('cluster:N', legend=alt.Legend(title=cluster_legend_title)),
color=alt.Color(
'cluster:N',
legend=alt.Legend(title=cluster_legend_title),
sort=sorted_vals,
scale=alt.Scale(scheme='tableau20')
),
tooltip=tooltip_fields,
fillOpacity=alt.condition(point_selector, alt.value(1), alt.value(0.3))
)
Expand Down