Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
374d646
2026-05-24
antonio-cln May 24, 2026
3ea0aa7
2026-05-25
antonio-cln May 25, 2026
015378f
2026-05-26
antonio-cln May 26, 2026
f609c4c
2026-05-26
antonio-cln May 26, 2026
02570f1
2026-05-26
antonio-cln May 26, 2026
0247699
2026-05-26
antonio-cln May 26, 2026
6d1d926
2026-05-26
antonio-cln May 26, 2026
ff13901
2026-05-27
antonio-cln May 27, 2026
4d991fa
2026-05-27
antonio-cln May 27, 2026
34dec8c
2026-05-28
antonio-cln May 28, 2026
e528b48
2026-05-28
antonio-cln May 28, 2026
c59987c
2026-05-29
antonio-cln May 29, 2026
23482ab
.
viictor-it May 29, 2026
d895091
.
viictor-it May 29, 2026
acf3a48
Adesso la funzione get_referencesspectroscopy non crasha con dati di …
viictor-it May 29, 2026
eb97aec
.
viictor-it May 30, 2026
86054ce
bug fix in bradgord law
viictor-it May 30, 2026
4b9b023
2026-05-31
antonio-cln May 31, 2026
447e0bc
Merge branch 'main' of https://github.com/antonio-cln/bibliometrix-py…
antonio-cln May 31, 2026
8ca39f9
Update app.py
antonio-cln May 31, 2026
3da370e
2026-05-31
antonio-cln May 31, 2026
62f360b
Update app.py
antonio-cln May 31, 2026
ca32d40
Update app.py
antonio-cln May 31, 2026
41dc13b
2026-06-01
antonio-cln Jun 1, 2026
7e4a111
.
viictor-it Jun 1, 2026
4b9d09f
Revert "."
viictor-it Jun 1, 2026
4b5a3b2
.
viictor-it Jun 1, 2026
e56084a
.
viictor-it Jun 1, 2026
64b808b
.
viictor-it Jun 1, 2026
b1e343c
.
viictor-it Jun 1, 2026
abe5dcc
.
viictor-it Jun 1, 2026
8ce0918
.
viictor-it Jun 1, 2026
f9752d0
.
viictor-it Jun 1, 2026
c8ca5bf
modified error message for Most Global Cited Documents function in ap…
dnrpspc6n7-spec Jun 1, 2026
275cf93
2026-06-01
antonio-cln Jun 1, 2026
62d692d
Merge branch 'main' of https://github.com/antonio-cln/bibliometrix-py…
antonio-cln Jun 1, 2026
9047e4c
Enhance error handling for API data fetching and keyword searches
viictor-it Jun 1, 2026
6eaa9e7
MERGE.
viictor-it Jun 1, 2026
8984435
Merge branch 'main' of https://github.com/antonio-cln/bibliometrix-py…
dnrpspc6n7-spec Jun 2, 2026
2c64302
Fixed errors in get_co_occurence_network and modified app.py to handl…
dnrpspc6n7-spec Jun 2, 2026
a6b6663
Imported nltk library and added stopwords download for word frequency…
dnrpspc6n7-spec Jun 2, 2026
a364de7
.
viictor-it Jun 2, 2026
bffdf59
Merge branch 'main' of https://github.com/antonio-cln/bibliometrix-py…
viictor-it Jun 2, 2026
33d03d8
PPPPPPPPPPPPPPPP
viictor-it Jun 2, 2026
ce80122
Final integration of API querying support for OpenAlex, PubMed, and S…
viictor-it Jun 2, 2026
6fc4f07
Enhance API integration by adding a unified fetch function for OpenAl…
viictor-it Jun 2, 2026
196c46f
Enhance documentation and type hints in API ETL and data validation m…
viictor-it Jun 2, 2026
2a47927
Added docstrings to api_etl.py
antonio-cln Jun 2, 2026
3a6649b
Added docstrings to data_validation.py
antonio-cln Jun 2, 2026
d2578c4
fixed
viictor-it Jun 2, 2026
dd186cf
2026-06-02
antonio-cln Jun 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
__pycache__/
bibliovenv/
Bibenv/
.idea/
.idea/
test/
1,132 changes: 1,072 additions & 60 deletions app.py

Large diffs are not rendered by default.

4,121 changes: 4,121 additions & 0 deletions data/cochrane_export.csv

Large diffs are not rendered by default.

88,315 changes: 88,315 additions & 0 deletions data/cochrane_export.txt

Large diffs are not rendered by default.

1,013 changes: 1,013 additions & 0 deletions data/dimension_export.csv

Large diffs are not rendered by default.

Binary file added data/dimension_export.xlsx
Binary file not shown.
1,001 changes: 1,001 additions & 0 deletions data/lens_export.csv

Large diffs are not rendered by default.

25,472 changes: 25,472 additions & 0 deletions data/scopus_export.bib

Large diffs are not rendered by default.

1,001 changes: 1,001 additions & 0 deletions data/scopus_export.csv

Large diffs are not rendered by default.

148,882 changes: 148,882 additions & 0 deletions data/wos_export.txt

Large diffs are not rendered by default.

1,262 changes: 1,262 additions & 0 deletions etl_showcase.ipynb

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions functions/get_authorlocalimpact.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,25 @@ def g_calc(x):
impact_column = 'TC'

source_counts_visualization = source_counts_visualization.head(num_of_authors_local_impact)
n = len(source_counts_visualization)

if n == 0 or source_counts_visualization[impact_column].max() == 0:
metric_label = author_local_impact.replace('_', ' ').title()
fig = go.Figure()
fig.add_annotation(
text=f"⚠️ Cannot Generate Plot<br><br>The calculated <b>'{metric_label}'</b> for all identified sources evaluates to <b>0</b>.<br>"
"There are no non-zero citation metrics available to plot.",
xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False,
font=dict(size=16, color="#D9534F", family="Segoe UI, Arial"), align="center"
)
fig.update_layout(
xaxis={"visible": False}, yaxis={"visible": False},
plot_bgcolor="rgba(245,245,245,0.5)", paper_bgcolor="white", height=500
)
fig = go.FigureWidget(fig)
fig._config = fig._config | {'displaylogo': False}
return fig, source_counts


# Create the plot
fig = px.scatter(
Expand Down
8 changes: 5 additions & 3 deletions functions/get_bradfordlaw.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from www.services import *


def get_bradford_law(df):
def get_bradford_law(df:pd.DataFrame):
"""
Generate a plot and table based on Bradford's Law.

Expand All @@ -11,8 +11,10 @@ def get_bradford_law(df):
Returns:
A Plotly figure object and a DataFrame of the Bradford's Law zones.
"""
# Sort data by frequency of occurrence (equivalent to R's sort(table(M$SO), decreasing = TRUE))
data = df.get()
data = df.copy()
# Convert empty strings (or whitespace strings) to None/NaN
data["SO"] = data["SO"].replace(r'^\s*$', None, regex=True)

source_counts = data["SO"].value_counts()

# Total number of sources
Expand Down
67 changes: 60 additions & 7 deletions functions/get_citedcountries.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):
df = metaTagExtraction(df, "AU1_CO")
df = df.get()

if "AU1_CO" not in df.columns or df["AU1_CO"].dropna().empty:
fig = go.Figure()
fig.add_annotation(
text="⚠️ Cannot Calculate Country Citations<br><br>The field <b>'AU1_CO'</b> (First Author Country) is blank or missing from your dataset.",
xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False,
font=dict(size=14, color="#D9534F", family="Segoe UI, Arial"), align="center"
)
fig.update_layout(xaxis={"visible": False}, yaxis={"visible": False}, plot_bgcolor="rgba(245,245,245,0.5)", height=400)
fig = go.FigureWidget(fig)
fig._config = fig._config | {'displaylogo': False}
return fig, pd.DataFrame(columns=["Country", "TotalCitation", "AverageArticleCitations"])

# Prepare the table for ranking countries
tab = (
df.dropna(subset=["AU1_CO"])
Expand Down Expand Up @@ -47,8 +59,43 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):
x_values = tab.iloc[:, 1]
n = len(tab)

if n == 0 or x_values.max() == 0:
fig = go.Figure()

# Inject the explicit text warning into the middle of the empty graph
fig.add_annotation(
text="⚠️ Cannot Generate Plot<br><br>The selected metrics contain no citation data (all records show <b>0 citations</b>).",
xref="paper", yref="paper",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=16, color="#D9534F", family="Segoe UI, Arial"),
align="center"
)

# Clean up the background layout so it looks like a clean message card
fig.update_layout(
xaxis={"visible": False},
yaxis={"visible": False},
plot_bgcolor="rgba(245,245,245,0.5)",
paper_bgcolor="white",
height=500
)

# Wrap it inside a FigureWidget exactly like your standard output expects
fig = go.FigureWidget(fig)
fig._config = fig._config | {'displaylogo': False}
return fig, table

fig = go.Figure()

has_no_citations = (x_values.max() == 0)
if has_no_citations:
fig.add_annotation(
text="ℹ️ Note: All identified countries have 0 citations recorded in this dataset.",
xref="paper", yref="paper", x=0.5, y=0.95, showarrow=False,
font=dict(size=12, color="#555555", family="Segoe UI, Arial"), align="center"
)

# Add thick lines from y-label to marker
for i, (country, value) in enumerate(zip(y_labels, x_values)):
fig.add_shape(
Expand All @@ -61,14 +108,17 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):
layer="below",
)

max_val = x_values.max()
size_denominator = max_val if (max_val and max_val != 0 and not pd.isna(max_val)) else 1

# Add scatter markers with text
fig.add_trace(
go.Scatter(
x=x_values,
y=list(range(n)),
mode="markers+text",
marker=dict(
size=18 + 6 * (x_values / x_values.max()),
size=18 + 6 * (x_values / size_denominator),
color=x_values,
colorscale=[[0, "#B3D1F2"], [1, "#5567BB"]],
line=dict(width=1, color="#E0E0E0"),
Expand Down Expand Up @@ -100,10 +150,14 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):

# Set x-axis ticks
max_x = x_values.max()
tick_step = 5 if max_x <= 50 else int(max_x // 10) or 1
x_ticks = list(range(0, int(max_x) + tick_step, tick_step))
if x_ticks[-1] < max_x:
x_ticks.append(int(max_x))

if has_no_citations:
x_ticks = [0, 1, 2]
else:
tick_step = 5 if max_x <= 50 else int(max_x // 10) or 1
x_ticks = list(range(0, int(max_x) + tick_step, tick_step))
if x_ticks[-1] < max_x:
x_ticks.append(int(max_x))

fig.update_yaxes(
tickvals=list(range(n)),
Expand All @@ -124,7 +178,7 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):
fig.update_layout(
plot_bgcolor='white',
font=dict(color="#222222", size=14, family="Segoe UI, Arial"),
margin=dict(l=0, r=0, t=0, b=0),
margin=dict(l=180, r=40, t=40, b=40),
height=50 + 90 * n,
showlegend=False,
hoverlabel=dict(
Expand All @@ -138,5 +192,4 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):
fig = go.FigureWidget(fig)
fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}

return fig, table
6 changes: 5 additions & 1 deletion functions/get_co_occurence_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,11 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg
font_sizes = nodes_df_orig['font'].apply(lambda x: x.get('size', 75))
min_font = font_sizes.min()
max_font = font_sizes.max()
nodes_df_orig['font_size'] = ((font_sizes - min_font) / (max_font - min_font) * 20) + 10
if pd.isna(min_font) or pd.isna(max_font) or max_font == min_font:
print("Error: the density plot cannot be created because the label font sizes are invalid or all identical.")
nodes_df_orig['font_size'] = 10
else:
nodes_df_orig['font_size'] = ((font_sizes - min_font) / (max_font - min_font) * 20) + 10

# Crea il dataframe replicato per il density plot:
nodes_df = nodes_df_orig.copy()
Expand Down
2 changes: 1 addition & 1 deletion functions/get_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,4 @@ def get_data(input, database, df, reset_callback=None):
else:
text = ""

return text
return text
3 changes: 3 additions & 0 deletions functions/get_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,8 @@ def get_database(input):

elif input.select() == "1C": # Sample database
database = "Sample"

elif input.select() == "1D":
database = "API"

return database
4 changes: 2 additions & 2 deletions functions/get_factorialanalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def assign_consistent_colors(clusters):


def get_factorial_analysis(
df: pd.DataFrame,
df,
ngram: Union[int, str] = 1,
field: str = "ID",
terms_data_wm: Optional[Sequence[str]] = None,
Expand Down Expand Up @@ -75,7 +75,7 @@ def get_factorial_analysis(
ngrams = int(ngram) if field in ['TI', 'AB'] else 1

M = df.get()
tab = table_tag(M, field, ngrams)
tab = table_tag(df = M, tag = field, ngrams = ngrams)

if len(tab) >= 2:
# Get minimum degree threshold from the nth term
Expand Down
2 changes: 1 addition & 1 deletion functions/get_frequentwords.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def get_frequent_words(df, ngram, num_of_words, word_type, file_upload_terms, fi
print(ngrams)

# Get word counts
words = table_tag(df, word_type, ngrams, remove_terms, synonyms)
words = table_tag(df, tag = word_type, ngrams = ngrams, remove_terms= remove_terms, synonyms=synonyms)

# Create DataFrame of most frequent words
word_counts = pd.DataFrame(words.items(), columns=['Words', 'Occurrences'])
Expand Down
3 changes: 2 additions & 1 deletion functions/get_historiograph.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ def get_historiograph(df, node_label="AU1", histNodes=20, hist_isolates=True, hi
# Pre-elaborazione
df = metaTagExtraction(df, "SR")
hist_results = histNetwork(df, min_citations=0, sep=sep, network=True)

if not hist_results:
raise Exception('Database not compatible with direct citation analysis.')
# 1. Costruzione iniziale del grafo
hist_plot = histPlot(
hist_results,
Expand Down
68 changes: 54 additions & 14 deletions functions/get_localcitedauthors.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,60 @@ def get_local_cited_authors(df, num_of_cited_authors, fast_search=False):
# Fill missing values
M['TC'] = M['TC'].fillna(0)

# Create a histogram network
H = histNetwork(df, min_citations=loccit, sep=";", network=False)
LCS = H['histData']
M = H['M']

# Split authors and repeat local citations
AU = M['AU'].explode()
n = AU.groupby(level=0).size()

# Create DataFrame for authors and local citations
df_authors = pd.DataFrame({'AU': AU, 'LCS': M['LCS'].repeat(n).values})
author_counts = df_authors.groupby('AU')['LCS'].sum().reset_index()
author_counts.columns = ["Authors", "N. of Local Citations"]
author_counts = author_counts.sort_values(by="N. of Local Citations", ascending=False)
try:
# Create a histogram network
H = histNetwork(df, min_citations=loccit, sep=";", network=False)
LCS = H['histData']
M = H['M']

# Split authors and repeat local citations
AU = M['AU'].explode()
n = AU.groupby(level=0).size()

# Create DataFrame for authors and local citations
df_authors = pd.DataFrame({'AU': AU, 'LCS': M['LCS'].repeat(n).values})
author_counts = df_authors.groupby('AU')['LCS'].sum().reset_index()
author_counts.columns = ["Authors", "N. of Local Citations"]
author_counts = author_counts.sort_values(by="N. of Local Citations", ascending=False)

except Exception as e:
# If histNetwork or pandas formatting crashes, return a clean visual placeholder error
fig = go.Figure()
fig.add_annotation(
text="⚠️ Analysis Interrupted<br><br>The local cited authors network could not be calculated.<br>"
"The source reference column (CR/ref) may be empty or contain malformed datatypes.",
xref="paper", yref="paper",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=14, color="#D9534F", family="Segoe UI, Arial"),
align="center"
)
fig.update_layout(
xaxis={"visible": False},
yaxis={"visible": False},
plot_bgcolor="rgba(245,245,245,0.5)",
paper_bgcolor="white",
height=400,
margin=dict(l=20, r=20, t=20, b=20)
)
fig = go.FigureWidget(fig)
fig._config = fig._config | {'displaylogo': False}

# Create an empty dataframe with structural matching columns
empty_table = pd.DataFrame(columns=["Authors", "N. of Local Citations"])
return fig, empty_table

# If execution completes successfully but nothing passes back (empty results)
if author_counts.empty:
fig = go.Figure()
fig.add_annotation(
text="⚠️ No Data Found<br><br>No local citation networks match your search parameters.",
xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False,
font=dict(size=14, family="Segoe UI, Arial"), align="center"
)
fig.update_layout(xaxis={"visible": False}, yaxis={"visible": False}, height=400)
fig = go.FigureWidget(fig)
return fig, pd.DataFrame(columns=["Authors", "N. of Local Citations"])

# Limit the number of authors to display
if num_of_cited_authors > len(author_counts):
Expand Down
2 changes: 2 additions & 0 deletions functions/get_localciteddocuments.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def get_local_cited_documents(df, num_of_local_cited_docs, field_separator, fast

# Create a histogram network
H = histNetwork(df, min_citations=loccit, sep=";", network=False)
if not H:
raise Exception('Database not compatible with direct citation analysis.')
LCS = H['histData']
M = H['M']

Expand Down
27 changes: 27 additions & 0 deletions functions/get_localcitedsources.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,33 @@ def get_local_cited_sources(df, num_of_cited_sources):
source_counts = data["CR_SO"].str.split(";").explode().value_counts().reset_index()
source_counts.columns = ["Sources", "N. of Local Citations"]

if source_counts.empty:
# Create a clean placeholder canvas message
fig = go.Figure()
fig.add_annotation(
text="⚠️ No Data Available<br><br>The <b>'CR_SO'</b> (Cited Periodicals) attribute is completely empty.<br>Local citations cannot be calculated for this dataset.",
xref="paper", yref="paper",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15, color="#D9534F", family="Segoe UI, Arial"),
align="center"
)
fig.update_layout(
xaxis={"visible": False},
yaxis={"visible": False},
plot_bgcolor="rgba(245,245,245,0.5)",
paper_bgcolor="white",
height=400
)
fig = go.FigureWidget(fig)
fig._config = fig._config | {'displaylogo': False}

# Create a valid empty dataframe matching your expected table columns
empty_table = pd.DataFrame(columns=["Sources", "N. of Local Citations"])

# Return both elements so your app logic doesn't unpack a mismatch error
return fig, empty_table

# Limit the number of sources to display
if num_of_cited_sources > len(source_counts):
num_of_cited_sources = len(source_counts)
Expand Down
Loading