PRAISELab-PicusLab · rimmelasghar · May 25, 2026 · May 25, 2026 · May 25, 2026 · May 25, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,17 @@
 __pycache__/
 bibliovenv/
 Bibenv/
-.idea/
+.idea/
+@"
+
+env/
+out/dashboard_compat_errors.log
+out/dashboard_compat.log
+compat_run.log
+installed.txt
+requirements.utf8.txt
+**/__pycache__/
+**/.ipynb_checkpoints/
+"@ | Add-Content .gitignore
+git add .gitignore
+git commit -m "chore(gitignore): exclude venv, run logs, and notebook checkpoints"
diff --git a/REPORT.md b/REPORT.md
diff --git a/app.py b/app.py
@@ -853,9 +853,265 @@ def indicator_types_ui_all():
                     """
                 ),
 
+        # Helper: render a compact preview of a normalised DataFrame so the
+        # user can immediately see how the ETL standardised the live/CSV
+        # payload (column names, list-shaped fields, integer casts, etc.).
+        def _normalised_preview(frame, source_label="", query_label="", n_rows=20):
+            import pandas as _pd
+            from www.services.etl import SCHEMA as _SCHEMA
+
+            if frame is None or len(frame) == 0:
+                return ui.TagList()
+
+            mandatory = [c for c, s in _SCHEMA.items() if s.get("mandatory")]
+            preferred = [
+                "DB", "UT", "DI", "PY", "TI", "AU", "SO", "TC", "C1", "DE", "SR",
+            ]
+            cols = [c for c in preferred if c in frame.columns]
+            cols += [c for c in frame.columns if c not in cols][: max(0, 15 - len(cols))]
+
+            preview = frame[cols].head(n_rows).copy()
+
+            def _fmt(v):
+                if isinstance(v, list):
+                    s = "; ".join(str(x) for x in v[:6])
+                    if len(v) > 6:
+                        s += f"  …(+{len(v) - 6})"
+                    return s
+                if v is None:
+                    return ""
+                if isinstance(v, float) and _pd.isna(v):
+                    return ""
+                return str(v)
+
+            preview = preview.map(_fmt)
+
+            html_table = preview.to_html(
+                index=False,
+                border=0,
+                classes="table table-sm table-striped table-hover",
+                escape=True,
+            )
+
+            schema_badges = "".join(
+                f'<span style="display:inline-block;background:#e6e9ff;color:#5567BB;'
+                f'padding:2px 8px;border-radius:10px;font-size:11px;margin:2px;">{c}</span>'
+                for c in mandatory if c in frame.columns
+            )
+
+            label = ""
+            if source_label or query_label:
+                label = (
+                    f' from <b>{source_label}</b>'
+                    + (f' (<code>{query_label}</code>)' if query_label else "")
+                )
+
+            html = f"""
+            <div style="margin-top:18px;">
+              <h4 style="color:#5567BB; margin-bottom:4px;">🧪 Normalised preview</h4>
+              <p style="color:gray; font-size:12px; margin-top:0;">
+                First {len(preview)} of {len(frame)} record(s){label}, projected onto the
+                standard ETL schema. List-shaped columns (AU, C1, DE, CR, …) are
+                shown joined with "<code>;</code>" for readability only — the
+                underlying DataFrame keeps real Python lists.
+              </p>
+              <div style="margin:6px 0 10px 0;">
+                <span style="color:gray;font-size:12px;">Mandatory columns present:</span>
+                {schema_badges}
+              </div>
+              <div style="max-height:360px; overflow:auto; border:1px solid #eef;
+                          border-radius:6px; font-size:12px;">
+                {html_table}
+              </div>
+            </div>
+            """
+            return ui.HTML(html)
+
         with ui.nav_panel("None", value="API"):
-            ui.h3("🚧 Warning: API is under construction 🚧")
-
+            ui.h3("�️ Live API query", style="color: #5567BB;")
+            ui.p(
+                "Run a live query against OpenAlex or PubMed. Results are normalised "
+                "by the ETL pipeline into the standard 35-column schema and loaded "
+                "as the current dataset (available to every analytical panel)."
+            )
+            with ui.layout_columns(col_widths=(3, 5, 2, 2)):
+                ui.input_select(
+                    "api_source",
+                    "Source",
+                    choices={"openalex": "OpenAlex", "pubmed": "PubMed"},
+                    selected="openalex",
+                )
+                ui.input_text(
+                    "api_query",
+                    "Query",
+                    value="bibliometrics",
+                    placeholder="e.g. bibliometrics OR scientometrics",
+                )
+                ui.input_numeric("api_max", "Max records", value=50, min=1, max=10000, step=10)
+                ui.input_action_button("api_run", "Fetch", icon=ICONS["play"])
+            ui.input_text(
+                "api_mailto",
+                "Polite-pool e-mail (optional, recommended for OpenAlex)",
+                value="",
+                placeholder="you@example.org",
+            )
+
+            @render.ui
+            @reactive.event(input.api_run)
+            def api_run_handler():
+                from www.services.etl.api_retriever import fetch_dataframe
+                from www.services.etl import validate
+                src = input.api_source()
+                q = (input.api_query() or "").strip()
+                if not q:
+                    return ui.markdown("⚠️ Please enter a query.")
+                try:
+                    n = int(input.api_max() or 50)
+                except Exception:
+                    n = 50
+                mailto = (input.api_mailto() or "").strip() or None
+                try:
+                    kwargs = {"mailto": mailto} if (src == "openalex" and mailto) else {}
+                    fetched = fetch_dataframe(src, q, max_results=n, **kwargs)
+                except Exception as exc:
+                    return ui.markdown(f"❌ Live fetch failed: `{exc!r}`")
+                if fetched is None or len(fetched) == 0:
+                    return ui.markdown("⚠️ No records returned.")
+                df.set(fetched)
+                reset_all_analyses()
+                report = validate(fetched)
+                status = "✅" if report.get("ok") else "⚠️"
+                # Inline JS: reveal both sidebars after the server pushes the
+                # newly-rendered sidebar_2 into the DOM (handled by the
+                # MutationObserver registered at app start-up).
+                reveal_js = ui.tags.script(
+                    "setTimeout(function(){"
+                    "  if (typeof setSidebarState === 'function') setSidebarState(true);"
+                    "  var s1=document.getElementById('sidebar');"
+                    "  var s2=document.getElementById('sidebar_2');"
+                    "  if (s1) s1.classList.remove('sidebar-hidden');"
+                    "  if (s2) s2.classList.remove('sidebar-hidden');"
+                    "  var c=document.getElementById('mainContent');"
+                    "  if (c) c.classList.remove('full-width');"
+                    "}, 300);"
+                )
+                return ui.TagList(
+                    ui.markdown(
+                        f"{status} Loaded **{len(fetched)} records** from "
+                        f"**{src}** (`{q}`). The dataset is now active — open any "
+                        "analytical panel from the sidebar.\n\n"
+                        f"Validation: `{report}`"
+                    ),
+                    _normalised_preview(fetched, src, q),
+                    reveal_js,
+                )
+
+            # --- Load a standardised (ETL-produced) CSV ----------------- #
+            ui.hr()
+            ui.h3("📥 Load standardised CSV", style="color: #5567BB;")
+            ui.p(
+                "Upload a CSV produced by the ETL pipeline (e.g. one of the "
+                "files written by ``tests/run_etl.py`` under ``out/etl/``). "
+                "It is loaded directly as the current dataset — every "
+                "analytical panel becomes available with no re-parsing."
+            )
+            with ui.layout_columns(col_widths=(8, 4)):
+                ui.input_file(
+                    "csv_unified_file",
+                    "Unified CSV file",
+                    accept=[".csv"],
+                    multiple=False,
+                )
+                ui.input_action_button(
+                    "csv_unified_run", "Load CSV", icon=ICONS["play"]
+                )
+
+            @render.ui
+            @reactive.event(input.csv_unified_run)
+            def csv_unified_handler():
+                import ast
+                import pandas as pd
+                from www.services.etl import validate, SCHEMA
+                from www.services.etl.mappings import LIST_COLUMNS, INT_COLUMNS
+
+                files = input.csv_unified_file()
+                if not files:
+                    return ui.markdown("⚠️ Please choose a CSV first.")
+                path = files[0]["datapath"]
+
+                def _parse_list(v):
+                    if isinstance(v, list):
+                        return v
+                    if v is None or (isinstance(v, float) and pd.isna(v)):
+                        return []
+                    s = str(v).strip()
+                    if not s:
+                        return []
+                    if s.startswith("[") and s.endswith("]"):
+                        try:
+                            parsed = ast.literal_eval(s)
+                            if isinstance(parsed, list):
+                                return [str(x) for x in parsed]
+                        except Exception:
+                            pass
+                    # Fallback: split on common bibliometrix separators.
+                    for sep in [";", "|", ","]:
+                        if sep in s:
+                            return [t.strip() for t in s.split(sep) if t.strip()]
+                    return [s]
+
+                try:
+                    loaded = pd.read_csv(path, dtype=str, keep_default_na=False)
+                except Exception as exc:
+                    return ui.markdown(f"❌ Could not read CSV: `{exc!r}`")
+
+                # Coerce list columns back to actual Python lists and
+                # int columns back to integers so downstream functions
+                # see the same shapes they would after convert2df().
+                for col in loaded.columns:
+                    if col in LIST_COLUMNS:
+                        loaded[col] = loaded[col].map(_parse_list)
+                    elif col in INT_COLUMNS:
+                        loaded[col] = pd.to_numeric(
+                            loaded[col], errors="coerce"
+                        ).fillna(0).astype(int)
+
+                missing = [
+                    c for c, spec in SCHEMA.items()
+                    if spec.get("mandatory") and c not in loaded.columns
+                ]
+                if missing:
+                    return ui.markdown(
+                        "❌ This does not look like a standardised ETL CSV — "
+                        f"missing mandatory columns: `{missing}`."
+                    )
+
+                df.set(loaded)
+                reset_all_analyses()
+                report = validate(loaded)
+                status = "✅" if report.get("ok") else "⚠️"
+                reveal_js = ui.tags.script(
+                    "setTimeout(function(){"
+                    "  if (typeof setSidebarState === 'function') setSidebarState(true);"
+                    "  var s1=document.getElementById('sidebar');"
+                    "  var s2=document.getElementById('sidebar_2');"
+                    "  if (s1) s1.classList.remove('sidebar-hidden');"
+                    "  if (s2) s2.classList.remove('sidebar-hidden');"
+                    "  var c=document.getElementById('mainContent');"
+                    "  if (c) c.classList.remove('full-width');"
+                    "}, 300);"
+                )
+                return ui.TagList(
+                    ui.markdown(
+                        f"{status} Loaded **{len(loaded)} records** from "
+                        f"`{files[0]['name']}`. The dataset is now active — open "
+                        "any analytical panel from the sidebar.\n\n"
+                        f"Validation: `{report}`"
+                    ),
+                    _normalised_preview(loaded, "CSV", files[0]["name"]),
+                    reveal_js,
+                )
+
         with ui.nav_panel("None", value="collections"):
             ui.h3("🚧 Warning: Merge Collection is under construction 🚧")
 
@@ -8185,7 +8441,7 @@ def update_plot_settings():
 
 # --- Sidebar Management ---
 @render.express()
-@reactive.event(input.start_button)
+@reactive.event(input.start_button, input.api_run, input.csv_unified_run)
 def toggle_sidebar():
     with ui.tags.div(id="sidebar_2", class_="custom-sidebar"):
         with ui.accordion(id="sidebar_accordion_data", multiple=False, open=False):
@@ -8344,10 +8600,17 @@ def toggle_sidebar():
     });
     observer.observe(document.body, { childList: true, subtree: true });
 
-    // Show both sidebars when 'start_button' is clicked
+    // Show both sidebars when 'start_button', 'api_run' or 'csv_unified_run' is clicked
     document.addEventListener("click", function(e) {
-        if (e.target && e.target.id === "start_button") {
-            setSidebarState(true);
+        // The clickable area is sometimes a child <i>/<span>; walk up the
+        // DOM to find the nearest ancestor button id we care about.
+        let el = e.target;
+        while (el && el !== document) {
+            if (el.id === "start_button" || el.id === "api_run" || el.id === "csv_unified_run") {
+                setSidebarState(true);
+                break;
+            }
+            el = el.parentNode;
         }
     });
 """)

diff --git a/functions/get_citedcountries.py b/functions/get_citedcountries.py
@@ -68,7 +68,7 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):
             y=list(range(n)),
             mode="markers+text",
             marker=dict(
-                size=18 + 6 * (x_values / x_values.max()),
+                size=18 + 6 * (x_values / (x_values.max() or 1)),
                 color=x_values,
                 colorscale=[[0, "#B3D1F2"], [1, "#5567BB"]],
                 line=dict(width=1, color="#E0E0E0"),
@@ -100,6 +100,8 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):
 
     # Set x-axis ticks
     max_x = x_values.max()
+    if pd.isna(max_x) or max_x <= 0:
+        max_x = 0
     tick_step = 5 if max_x <= 50 else int(max_x // 10) or 1
     x_ticks = list(range(0, int(max_x) + tick_step, tick_step))
     if x_ticks[-1] < max_x:

diff --git a/functions/get_citeddocuments.py b/functions/get_citeddocuments.py
@@ -74,7 +74,7 @@ def get_cited_documents(df, num_of_cited_docs, cited_docs_measure):
             y=y_vals,
             mode="markers+text",
             marker=dict(
-                size=18 + 6 * (tab[tab.columns[1]] / tab[tab.columns[1]].max()),
+                size=18 + 6 * (tab[tab.columns[1]] / (tab[tab.columns[1]].max() or 1)),
                 color=tab[tab.columns[1]],
                 colorscale=[[0, "#B3D1F2"], [1, "#5567BB"]],
                 line=dict(width=1, color="#E0E0E0"),
@@ -106,6 +106,8 @@ def get_cited_documents(df, num_of_cited_docs, cited_docs_measure):
 
     # Set x-axis ticks
     max_x = tab[tab.columns[1]].max()
+    if pd.isna(max_x) or max_x <= 0:
+        max_x = 0
     tick_step = max(1, int(max_x // 6))
     x_ticks = list(range(0, int(max_x) + tick_step, tick_step))
     if x_ticks[-1] < max_x:

diff --git a/functions/get_localcitedauthors.py b/functions/get_localcitedauthors.py
@@ -74,7 +74,7 @@ def get_local_cited_authors(df, num_of_cited_authors, fast_search=False):
             y=list(range(len(author_counts))),
             mode="markers+text",
             marker=dict(
-                size=18 + 6 * (author_counts[frequency] / author_counts[frequency].max()),
+                size=18 + 6 * (author_counts[frequency] / (author_counts[frequency].max() or 1)),
                 color=author_counts[frequency],
                 colorscale=[[0, "#B3D1F2"], [1, "#5567BB"]],
                 line=dict(width=1, color="#E0E0E0"),
@@ -106,6 +106,8 @@ def get_local_cited_authors(df, num_of_cited_authors, fast_search=False):
 
     # Set x-axis ticks to 0, 5, 10, etc.
     max_x = author_counts[frequency].max()
+    if pd.isna(max_x) or max_x <= 0:
+        max_x = 0
     tick_step = 5
     x_ticks = list(range(0, int(max_x) + tick_step, tick_step))
     if x_ticks[-1] < max_x:

diff --git a/functions/get_localciteddocuments.py b/functions/get_localciteddocuments.py
@@ -79,7 +79,7 @@ def get_local_cited_documents(df, num_of_local_cited_docs, field_separator, fast
             y=list(range(len(df_documents))),
             mode="markers+text",
             marker=dict(
-                size=18 + 6 * (df_documents["Local Citations"] / df_documents["Local Citations"].max()),
+                size=18 + 6 * (df_documents["Local Citations"] / (df_documents["Local Citations"].max() or 1)),
                 color=df_documents["Local Citations"],
                 colorscale=[[0, "#B3D1F2"], [1, "#5567BB"]],
                 line=dict(width=1, color="#E0E0E0"),
@@ -113,6 +113,8 @@ def get_local_cited_documents(df, num_of_local_cited_docs, field_separator, fast
 
     # Set x-axis ticks to 0, 5, 10, etc.
     max_x = df_documents["Local Citations"].max()
+    if pd.isna(max_x) or max_x <= 0:
+        max_x = 0
     tick_step = 5
     x_ticks = list(range(0, int(max_x) + tick_step, tick_step))
     if x_ticks[-1] < max_x: