PRAISELab-PicusLab · antonio-cln · May 24, 2026 · May 25, 2026 · May 26, 2026 · May 26, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 __pycache__/
 bibliovenv/
 Bibenv/
-.idea/
+.idea/
+test/
diff --git a/app.py b/app.py
diff --git a/data/cochrane_export.csv b/data/cochrane_export.csv
diff --git a/data/cochrane_export.txt b/data/cochrane_export.txt
diff --git a/data/dimension_export.csv b/data/dimension_export.csv
diff --git a/data/dimension_export.xlsx b/data/dimension_export.xlsx
diff --git a/data/lens_export.csv b/data/lens_export.csv
diff --git a/data/scopus_export.bib b/data/scopus_export.bib
diff --git a/data/scopus_export.csv b/data/scopus_export.csv
diff --git a/data/wos_export.txt b/data/wos_export.txt
diff --git a/etl_showcase.ipynb b/etl_showcase.ipynb
diff --git a/functions/get_authorlocalimpact.py b/functions/get_authorlocalimpact.py
@@ -81,6 +81,25 @@ def g_calc(x):
         impact_column = 'TC'
 
     source_counts_visualization = source_counts_visualization.head(num_of_authors_local_impact)
+    n = len(source_counts_visualization)
+
+    if n == 0 or source_counts_visualization[impact_column].max() == 0:
+        metric_label = author_local_impact.replace('_', ' ').title()
+        fig = go.Figure()
+        fig.add_annotation(
+            text=f"⚠️ Cannot Generate Plot<br><br>The calculated <b>'{metric_label}'</b> for all identified sources evaluates to <b>0</b>.<br>"
+            "There are no non-zero citation metrics available to plot.",
+            xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False,
+            font=dict(size=16, color="#D9534F", family="Segoe UI, Arial"), align="center"
+        )
+        fig.update_layout(
+            xaxis={"visible": False}, yaxis={"visible": False},
+            plot_bgcolor="rgba(245,245,245,0.5)", paper_bgcolor="white", height=500
+        )
+        fig = go.FigureWidget(fig)
+        fig._config = fig._config | {'displaylogo': False}
+        return fig, source_counts
+
 
     # Create the plot
     fig = px.scatter(

diff --git a/functions/get_bradfordlaw.py b/functions/get_bradfordlaw.py
@@ -1,7 +1,7 @@
 from www.services import *
 
 
-def get_bradford_law(df):
+def get_bradford_law(df:pd.DataFrame):
     """
     Generate a plot and table based on Bradford's Law.
 
@@ -11,8 +11,10 @@ def get_bradford_law(df):
     Returns:
         A Plotly figure object and a DataFrame of the Bradford's Law zones.
     """
-    # Sort data by frequency of occurrence (equivalent to R's sort(table(M$SO), decreasing = TRUE))
-    data = df.get()
+    data = df.copy()
+    # Convert empty strings (or whitespace strings) to None/NaN
+    data["SO"] = data["SO"].replace(r'^\s*$', None, regex=True)
+
     source_counts = data["SO"].value_counts()
 
     # Total number of sources

diff --git a/functions/get_citedcountries.py b/functions/get_citedcountries.py
@@ -17,6 +17,18 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):
     df = metaTagExtraction(df, "AU1_CO")
     df = df.get()
 
+    if "AU1_CO" not in df.columns or df["AU1_CO"].dropna().empty:
+        fig = go.Figure()
+        fig.add_annotation(
+            text="⚠️ Cannot Calculate Country Citations<br><br>The field <b>'AU1_CO'</b> (First Author Country) is blank or missing from your dataset.",
+            xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False,
+            font=dict(size=14, color="#D9534F", family="Segoe UI, Arial"), align="center"
+        )
+        fig.update_layout(xaxis={"visible": False}, yaxis={"visible": False}, plot_bgcolor="rgba(245,245,245,0.5)", height=400)
+        fig = go.FigureWidget(fig)
+        fig._config = fig._config | {'displaylogo': False}
+        return fig, pd.DataFrame(columns=["Country", "TotalCitation", "AverageArticleCitations"])
+
     # Prepare the table for ranking countries
     tab = (
         df.dropna(subset=["AU1_CO"])
@@ -47,8 +59,43 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):
     x_values = tab.iloc[:, 1]
     n = len(tab)
 
+    if n == 0 or x_values.max() == 0:
+        fig = go.Figure()
+
+        # Inject the explicit text warning into the middle of the empty graph
+        fig.add_annotation(
+            text="⚠️ Cannot Generate Plot<br><br>The selected metrics contain no citation data (all records show <b>0 citations</b>).",
+            xref="paper", yref="paper",
+            x=0.5, y=0.5,
+            showarrow=False,
+            font=dict(size=16, color="#D9534F", family="Segoe UI, Arial"),
+            align="center"
+        )
+
+        # Clean up the background layout so it looks like a clean message card
+        fig.update_layout(
+            xaxis={"visible": False},
+            yaxis={"visible": False},
+            plot_bgcolor="rgba(245,245,245,0.5)",
+            paper_bgcolor="white",
+            height=500
+        )
+
+        # Wrap it inside a FigureWidget exactly like your standard output expects
+        fig = go.FigureWidget(fig)
+        fig._config = fig._config | {'displaylogo': False}
+        return fig, table
+
     fig = go.Figure()
 
+    has_no_citations = (x_values.max() == 0)
+    if has_no_citations:
+        fig.add_annotation(
+            text="ℹ️ Note: All identified countries have 0 citations recorded in this dataset.",
+            xref="paper", yref="paper", x=0.5, y=0.95, showarrow=False,
+            font=dict(size=12, color="#555555", family="Segoe UI, Arial"), align="center"
+        )
+
     # Add thick lines from y-label to marker
     for i, (country, value) in enumerate(zip(y_labels, x_values)):
         fig.add_shape(
@@ -61,14 +108,17 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):
             layer="below",
         )
 
+    max_val = x_values.max()
+    size_denominator = max_val if (max_val and max_val != 0 and not pd.isna(max_val)) else 1
+
     # Add scatter markers with text
     fig.add_trace(
         go.Scatter(
             x=x_values,
             y=list(range(n)),
             mode="markers+text",
             marker=dict(
-                size=18 + 6 * (x_values / x_values.max()),
+                size=18 + 6 * (x_values / size_denominator),
                 color=x_values,
                 colorscale=[[0, "#B3D1F2"], [1, "#5567BB"]],
                 line=dict(width=1, color="#E0E0E0"),
@@ -100,10 +150,14 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):
 
     # Set x-axis ticks
     max_x = x_values.max()
-    tick_step = 5 if max_x <= 50 else int(max_x // 10) or 1
-    x_ticks = list(range(0, int(max_x) + tick_step, tick_step))
-    if x_ticks[-1] < max_x:
-        x_ticks.append(int(max_x))
+
+    if has_no_citations:
+        x_ticks = [0, 1, 2]
+    else:
+        tick_step = 5 if max_x <= 50 else int(max_x // 10) or 1
+        x_ticks = list(range(0, int(max_x) + tick_step, tick_step))
+        if x_ticks[-1] < max_x:
+            x_ticks.append(int(max_x))
 
     fig.update_yaxes(
         tickvals=list(range(n)),
@@ -124,7 +178,7 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):
     fig.update_layout(
         plot_bgcolor='white',
         font=dict(color="#222222", size=14, family="Segoe UI, Arial"),
-        margin=dict(l=0, r=0, t=0, b=0),
+        margin=dict(l=180, r=40, t=40, b=40),
         height=50 + 90 * n,
         showlegend=False,
         hoverlabel=dict(
@@ -138,5 +192,4 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):
     fig = go.FigureWidget(fig)
     fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
                                  'displaylogo': False}
-
     return fig, table
diff --git a/functions/get_co_occurence_network.py b/functions/get_co_occurence_network.py
@@ -295,7 +295,11 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg
     font_sizes = nodes_df_orig['font'].apply(lambda x: x.get('size', 75))
     min_font = font_sizes.min()
     max_font = font_sizes.max()
-    nodes_df_orig['font_size'] = ((font_sizes - min_font) / (max_font - min_font) * 20) + 10
+    if pd.isna(min_font) or pd.isna(max_font) or max_font == min_font:
+        print("Error: the density plot cannot be created because the label font sizes are invalid or all identical.")
+        nodes_df_orig['font_size'] = 10
+    else:
+        nodes_df_orig['font_size'] = ((font_sizes - min_font) / (max_font - min_font) * 20) + 10
 
     # Crea il dataframe replicato per il density plot:
     nodes_df = nodes_df_orig.copy()

diff --git a/functions/get_data.py b/functions/get_data.py
@@ -79,4 +79,4 @@ def get_data(input, database, df, reset_callback=None):
     else:
         text = ""
 
-    return text
+    return text
diff --git a/functions/get_database.py b/functions/get_database.py
@@ -33,5 +33,8 @@ def get_database(input):
 
     elif input.select() == "1C":  # Sample database
         database = "Sample"
+
+    elif input.select() == "1D":
+        database = "API"
 
     return database
diff --git a/functions/get_factorialanalysis.py b/functions/get_factorialanalysis.py
@@ -37,7 +37,7 @@ def assign_consistent_colors(clusters):
 
 
 def get_factorial_analysis(
-    df: pd.DataFrame,
+    df,
     ngram: Union[int, str] = 1,  
     field: str = "ID",
     terms_data_wm: Optional[Sequence[str]] = None,
@@ -75,7 +75,7 @@ def get_factorial_analysis(
     ngrams = int(ngram) if field in ['TI', 'AB'] else 1
 
     M = df.get()
-    tab = table_tag(M, field, ngrams)
+    tab = table_tag(df = M, tag = field, ngrams = ngrams)
 
     if len(tab) >= 2:
         # Get minimum degree threshold from the nth term

diff --git a/functions/get_frequentwords.py b/functions/get_frequentwords.py
@@ -38,7 +38,7 @@ def get_frequent_words(df, ngram, num_of_words, word_type, file_upload_terms, fi
     print(ngrams)
 
     # Get word counts
-    words = table_tag(df, word_type, ngrams, remove_terms, synonyms)
+    words = table_tag(df, tag = word_type, ngrams = ngrams, remove_terms= remove_terms, synonyms=synonyms)
 
     # Create DataFrame of most frequent words
     word_counts = pd.DataFrame(words.items(), columns=['Words', 'Occurrences'])

diff --git a/functions/get_historiograph.py b/functions/get_historiograph.py
@@ -29,7 +29,8 @@ def get_historiograph(df, node_label="AU1", histNodes=20, hist_isolates=True, hi
     # Pre-elaborazione
     df = metaTagExtraction(df, "SR")
     hist_results = histNetwork(df, min_citations=0, sep=sep, network=True)
-
+    if not hist_results:
+        raise Exception('Database not compatible with direct citation analysis.')
     # 1. Costruzione iniziale del grafo
     hist_plot = histPlot(
         hist_results,

diff --git a/functions/get_localcitedauthors.py b/functions/get_localcitedauthors.py
@@ -25,20 +25,60 @@ def get_local_cited_authors(df, num_of_cited_authors, fast_search=False):
     # Fill missing values
     M['TC'] = M['TC'].fillna(0)
 
-    # Create a histogram network
-    H = histNetwork(df, min_citations=loccit, sep=";", network=False)
-    LCS = H['histData']
-    M = H['M']
-
-    # Split authors and repeat local citations
-    AU = M['AU'].explode()
-    n = AU.groupby(level=0).size()
-
-    # Create DataFrame for authors and local citations
-    df_authors = pd.DataFrame({'AU': AU, 'LCS': M['LCS'].repeat(n).values})
-    author_counts = df_authors.groupby('AU')['LCS'].sum().reset_index()
-    author_counts.columns = ["Authors", "N. of Local Citations"]
-    author_counts = author_counts.sort_values(by="N. of Local Citations", ascending=False)
+    try:
+        # Create a histogram network
+        H = histNetwork(df, min_citations=loccit, sep=";", network=False)
+        LCS = H['histData']
+        M = H['M']
+
+        # Split authors and repeat local citations
+        AU = M['AU'].explode()
+        n = AU.groupby(level=0).size()
+
+        # Create DataFrame for authors and local citations
+        df_authors = pd.DataFrame({'AU': AU, 'LCS': M['LCS'].repeat(n).values})
+        author_counts = df_authors.groupby('AU')['LCS'].sum().reset_index()
+        author_counts.columns = ["Authors", "N. of Local Citations"]
+        author_counts = author_counts.sort_values(by="N. of Local Citations", ascending=False)
+
+    except Exception as e:
+        # If histNetwork or pandas formatting crashes, return a clean visual placeholder error
+        fig = go.Figure()
+        fig.add_annotation(
+            text="⚠️ Analysis Interrupted<br><br>The local cited authors network could not be calculated.<br>"
+                 "The source reference column (CR/ref) may be empty or contain malformed datatypes.",
+            xref="paper", yref="paper",
+            x=0.5, y=0.5,
+            showarrow=False,
+            font=dict(size=14, color="#D9534F", family="Segoe UI, Arial"),
+            align="center"
+        )
+        fig.update_layout(
+            xaxis={"visible": False},
+            yaxis={"visible": False},
+            plot_bgcolor="rgba(245,245,245,0.5)",
+            paper_bgcolor="white",
+            height=400,
+            margin=dict(l=20, r=20, t=20, b=20)
+        )
+        fig = go.FigureWidget(fig)
+        fig._config = fig._config | {'displaylogo': False}
+
+        # Create an empty dataframe with structural matching columns
+        empty_table = pd.DataFrame(columns=["Authors", "N. of Local Citations"])
+        return fig, empty_table
+
+    # If execution completes successfully but nothing passes back (empty results)
+    if author_counts.empty:
+        fig = go.Figure()
+        fig.add_annotation(
+            text="⚠️ No Data Found<br><br>No local citation networks match your search parameters.",
+            xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False,
+            font=dict(size=14, family="Segoe UI, Arial"), align="center"
+        )
+        fig.update_layout(xaxis={"visible": False}, yaxis={"visible": False}, height=400)
+        fig = go.FigureWidget(fig)
+        return fig, pd.DataFrame(columns=["Authors", "N. of Local Citations"])
 
     # Limit the number of authors to display
     if num_of_cited_authors > len(author_counts):

diff --git a/functions/get_localciteddocuments.py b/functions/get_localciteddocuments.py
@@ -27,6 +27,8 @@ def get_local_cited_documents(df, num_of_local_cited_docs, field_separator, fast
 
     # Create a histogram network
     H = histNetwork(df, min_citations=loccit, sep=";", network=False)
+    if not H:
+        raise Exception('Database not compatible with direct citation analysis.')
     LCS = H['histData']
     M = H['M']
 

diff --git a/functions/get_localcitedsources.py b/functions/get_localcitedsources.py
@@ -31,6 +31,33 @@ def get_local_cited_sources(df, num_of_cited_sources):
         source_counts = data["CR_SO"].str.split(";").explode().value_counts().reset_index()
         source_counts.columns = ["Sources", "N. of Local Citations"]
 
+    if source_counts.empty:
+        # Create a clean placeholder canvas message
+        fig = go.Figure()
+        fig.add_annotation(
+            text="⚠️ No Data Available<br><br>The <b>'CR_SO'</b> (Cited Periodicals) attribute is completely empty.<br>Local citations cannot be calculated for this dataset.",
+            xref="paper", yref="paper",
+            x=0.5, y=0.5,
+            showarrow=False,
+            font=dict(size=15, color="#D9534F", family="Segoe UI, Arial"),
+            align="center"
+        )
+        fig.update_layout(
+            xaxis={"visible": False},
+            yaxis={"visible": False},
+            plot_bgcolor="rgba(245,245,245,0.5)",
+            paper_bgcolor="white",
+            height=400
+        )
+        fig = go.FigureWidget(fig)
+        fig._config = fig._config | {'displaylogo': False}
+
+        # Create a valid empty dataframe matching your expected table columns
+        empty_table = pd.DataFrame(columns=["Sources", "N. of Local Citations"])
+
+        # Return both elements so your app logic doesn't unpack a mismatch error
+        return fig, empty_table
+
     # Limit the number of sources to display
     if num_of_cited_sources > len(source_counts):
         num_of_cited_sources = len(source_counts)