From fe29ac75fb8fa65b7cd01b661931368c82849d39 Mon Sep 17 00:00:00 2001
From: Oreoluwa Oluwasina <oreoluwaoluwasina@gmail.com>
Date: Thu, 19 Feb 2026 11:39:20 +0100
Subject: [PATCH 1/2] Arxiv process and report

---
 scripts/2-process/arxiv_process.py | 531 +++++++++++++++++++
 scripts/3-report/arxiv_report.py   | 808 +++++++++++++++++++++++++++++
 scripts/plot.py                    | 112 +++-
 3 files changed, 1450 insertions(+), 1 deletion(-)
 create mode 100644 scripts/2-process/arxiv_process.py
 create mode 100644 scripts/3-report/arxiv_report.py

diff --git a/scripts/2-process/arxiv_process.py b/scripts/2-process/arxiv_process.py
new file mode 100644
index 00000000..58fc4569
--- /dev/null
+++ b/scripts/2-process/arxiv_process.py
@@ -0,0 +1,531 @@
+#!/usr/bin/env python
+"""
+This file is dedicated to processing Arxiv data
+for analysis and comparison between quarters.
+"""
+
+# Standard library
+import argparse
+import os
+import sys
+import traceback
+
+# Third-party
+import pandas as pd
+
+# Add parent directory so shared can be imported
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+
+# First-party/Local
+import shared  # noqa: E402
+
+# Setup
+LOGGER, PATHS = shared.setup(__file__)
+
+# Constants
+QUARTER = os.path.basename(PATHS["data_quarter"])
+FILE_PATHS = [
+    shared.path_join(PATHS["data_phase"], "github_totals_by_license.csv"),
+    shared.path_join(PATHS["data_phase"], "github_totals_by_restriction.csv"),
+]
+
+
+def parse_arguments():
+    """
+    Parse command-line options, returns parsed argument namespace.
+    """
+    global QUARTER
+    LOGGER.info("Parsing command-line options")
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--quarter",
+        default=QUARTER,
+        help=f"Data quarter in format YYYYQx (default: {QUARTER})",
+    )
+    parser.add_argument(
+        "--enable-save",
+        action="store_true",
+        help="Enable saving results (default: False)",
+    )
+    parser.add_argument(
+        "--enable-git",
+        action="store_true",
+        help="Enable git actions such as fetch, merge, add, commit, and push"
+        " (default: False)",
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Regenerate data even if processed files already exist",
+    )
+
+    args = parser.parse_args()
+    if not args.enable_save and args.enable_git:
+        parser.error("--enable-git requires --enable-save")
+    if args.quarter != QUARTER:
+        global FILE_PATHS, PATHS
+        FILE_PATHS = shared.paths_list_update(
+            LOGGER, FILE_PATHS, QUARTER, args.quarter
+        )
+        PATHS = shared.paths_update(LOGGER, PATHS, QUARTER, args.quarter)
+        QUARTER = args.quarter
+    args.logger = LOGGER
+    args.paths = PATHS
+    return args
+
+
+def process_totals_by_license(args, count_data):
+    """
+    Processing count data: totals by license
+    """
+    LOGGER.info(process_totals_by_license.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        count = int(row.COUNT)
+
+        data[tool] = count
+
+    data = pd.DataFrame(data.items(), columns=["License", "Count"])
+    data.sort_values("License", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_totals_by_license.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_totals_by_author_bucket(args, count_data):
+    """
+    Processing count data: totals by author_bucket
+    """
+    LOGGER.info(process_totals_by_author_bucket.__doc__.strip())
+    data = count_data.pivot_table(
+        index="AUTHOR_BUCKET",
+        columns="TOOL_IDENTIFIER",
+        values="COUNT",
+        fill_value=0,
+    ).reset_index()
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_totals_by_author_bucket.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_cc_by_3_by_year(args, count_data):
+    """
+    Processing count data: CC BY 3.0 by year
+    """
+    LOGGER.info(process_cc_by_3_by_year.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        year = str(row.YEAR)
+        count = int(row.COUNT)
+
+        if tool != "CC BY 3.0":
+            continue
+        data[year] = count
+
+    data = pd.DataFrame(data.items(), columns=["Year", "Count"])
+    data.sort_values("Year", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_3_by_year.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_cc_by_4_by_year(args, count_data):
+    """
+    Processing count data: CC BY 4.0 by year
+    """
+    LOGGER.info(process_cc_by_4_by_year.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        year = str(row.YEAR)
+        count = int(row.COUNT)
+
+        if tool != "CC BY 4.0":
+            continue
+        data[year] = count
+
+    data = pd.DataFrame(data.items(), columns=["Year", "Count"])
+    data.sort_values("Year", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_4_by_year.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_cc_by_nc_nd_4_by_year(args, count_data):
+    """
+    Processing count data: CC BY-NC-ND 4.0 by year
+    """
+    LOGGER.info(process_cc_by_nc_nd_4_by_year.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        year = str(row.YEAR)
+        count = int(row.COUNT)
+
+        if tool != "CC BY-NC-ND 4.0":
+            continue
+        data[year] = count
+
+    data = pd.DataFrame(data.items(), columns=["Year", "Count"])
+    data.sort_values("Year", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_nc_nd_4_by_year.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_cc_by_nc_sa_3_by_year(args, count_data):
+    """
+    Processing count data: CC BY-NC-SA 3.0 by year
+    """
+    LOGGER.info(process_cc_by_nc_sa_3_by_year.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        year = str(row.YEAR)
+        count = int(row.COUNT)
+        if tool != "CC BY-NC-SA 3.0":
+            continue
+        data[year] = count
+
+    data = pd.DataFrame(data.items(), columns=["Year", "Count"])
+    data.sort_values("Year", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_nc_sa_3_by_year.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_cc_by_nc_sa_4_by_year(args, count_data):
+    """
+    Processing count data: CC BY-NC-SA 4.0 by year
+    """
+    LOGGER.info(process_cc_by_nc_sa_4_by_year.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        year = str(row.YEAR)
+        count = int(row.COUNT)
+        if tool != "CC BY-NC-SA 4.0":
+            continue
+        data[year] = count
+
+    data = pd.DataFrame(data.items(), columns=["Year", "Count"])
+    data.sort_values("Year", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_nc_sa_4_by_year.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_cc_by_sa_4_by_year(args, count_data):
+    """
+    Processing count data: CC BY-SA 4.0 by year
+    """
+    LOGGER.info(process_cc_by_sa_4_by_year.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        year = str(row.YEAR)
+        count = int(row.COUNT)
+        if tool != "CC BY-SA 4.0":
+            continue
+        data[year] = count
+
+    data = pd.DataFrame(data.items(), columns=["Year", "Count"])
+    data.sort_values("Year", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_sa_4_by_year.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_cc0_1_by_year(args, count_data):
+    """
+    Processing count data: CC0 1.0 by year
+    """
+    LOGGER.info(process_cc0_1_by_year.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        year = str(row.YEAR)
+        count = int(row.COUNT)
+        if tool != "CC0 1.0":
+            continue
+        data[year] = count
+
+    data = pd.DataFrame(data.items(), columns=["Year", "Count"])
+    data.sort_values("Year", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc0_1_by_year.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_cc_by_3_by_category(args, count_data):
+    """
+    Processing count data: CC BY 3.0 by category
+    """
+    LOGGER.info(process_cc_by_3_by_category.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        category = str(row.CATEGORY_NAME)
+        count = int(row.COUNT)
+        if tool != "CC BY 3.0":
+            continue
+        data[category] = count
+
+    data = pd.DataFrame(data.items(), columns=["Category", "Count"])
+    data.sort_values("Category", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_3_by_category.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_cc_by_4_by_category(args, count_data):
+    """
+    Processing count data: CC BY 4.0 by category
+    """
+    LOGGER.info(process_cc_by_4_by_category.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        category = str(row.CATEGORY_NAME)
+        count = int(row.COUNT)
+        if tool != "CC BY 4.0":
+            continue
+        data[category] = count
+
+    data = pd.DataFrame(data.items(), columns=["Category", "Count"])
+    data.sort_values("Category", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_4_by_category.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_cc_by_nc_nd_4_by_category(args, count_data):
+    """
+    Processing count data: CC BY-NC-ND 4.0 by category
+    """
+    LOGGER.info(process_cc_by_nc_nd_4_by_category.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        category = str(row.CATEGORY_NAME)
+        count = int(row.COUNT)
+        if tool != "CC BY-NC-ND 4.0":
+            continue
+        data[category] = count
+
+    data = pd.DataFrame(data.items(), columns=["Category", "Count"])
+    data.sort_values("Category", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_nc_nd_4_by_category.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_cc_by_nc_sa_3_by_category(args, count_data):
+    """
+    Processing count data: CC BY-NC-SA 3.0 by category
+    """
+    LOGGER.info(process_cc_by_nc_sa_3_by_category.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        category = str(row.CATEGORY_NAME)
+        count = int(row.COUNT)
+        if tool != "CC BY-NC-SA 3.0":
+            continue
+        data[category] = count
+
+    data = pd.DataFrame(data.items(), columns=["Category", "Count"])
+    data.sort_values("Category", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_nc_sa_3_by_category.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_cc_by_nc_sa_4_by_category(args, count_data):
+    """
+    Processing count data: CC BY-NC-SA 4.0 by category
+    """
+    LOGGER.info(process_cc_by_nc_sa_4_by_category.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        category = str(row.CATEGORY_NAME)
+        count = int(row.COUNT)
+        if tool != "CC BY-NC-SA 4.0":
+            continue
+        data[category] = count
+
+    data = pd.DataFrame(data.items(), columns=["Category", "Count"])
+    data.sort_values("Category", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_nc_sa_4_by_category.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_cc_by_sa_4_by_category(args, count_data):
+    """
+    Processing count data: CC BY-SA 4.0 by category
+    """
+    LOGGER.info(process_cc_by_sa_4_by_category.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        category = str(row.CATEGORY_NAME)
+        count = int(row.COUNT)
+        if tool != "CC BY-SA 4.0":
+            continue
+        data[category] = count
+
+    data = pd.DataFrame(data.items(), columns=["Category", "Count"])
+    data.sort_values("Category", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_sa_4_by_category.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def process_cc0_1_by_category(args, count_data):
+    """
+    Processing count data: CC0 1.0 by category
+    """
+    LOGGER.info(process_cc0_1_by_category.__doc__.strip())
+    data = {}
+
+    for row in count_data.itertuples(index=False):
+        tool = str(row.TOOL_IDENTIFIER)
+        category = str(row.CATEGORY_NAME)
+        count = int(row.COUNT)
+        if tool != "CC0 1.0":
+            continue
+        data[category] = count
+
+    data = pd.DataFrame(data.items(), columns=["Category", "Count"])
+    data.sort_values("Category", ascending=True, inplace=True)
+    data.reset_index(drop=True, inplace=True)
+    file_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc0_1_by_category.csv"
+    )
+    shared.data_to_csv(args, data, file_path)
+
+
+def main():
+    args = parse_arguments()
+    shared.paths_log(LOGGER, PATHS)
+    shared.git_fetch_and_merge(args, PATHS["repo"])
+    shared.check_completion_file_exists(args, FILE_PATHS)
+    file_count = shared.path_join(PATHS["data_1-fetch"], "arxiv_1_count.csv")
+    file_category = shared.path_join(
+        PATHS["data_1-fetch"], "arxiv_2_count_by_category_report.csv"
+    )
+    file_year = shared.path_join(
+        PATHS["data_1-fetch"], "arxiv_3_count_by_year.csv"
+    )
+    file_author_bucket = shared.path_join(
+        PATHS["data_1-fetch"], "arxiv_4_count_by_author_bucket.csv"
+    )
+    count_data = shared.open_data_file(
+        LOGGER, file_count, usecols=["TOOL_IDENTIFIER", "COUNT"]
+    )
+    category_data = shared.open_data_file(
+        LOGGER,
+        file_category,
+        usecols=["TOOL_IDENTIFIER", "CATEGORY_NAME", "COUNT"],
+    )
+    year_data = shared.open_data_file(
+        LOGGER, file_year, usecols=["TOOL_IDENTIFIER", "YEAR", "COUNT"]
+    )
+    author_bucket_data = shared.open_data_file(
+        LOGGER,
+        file_author_bucket,
+        usecols=["TOOL_IDENTIFIER", "AUTHOR_BUCKET", "COUNT"],
+    )
+
+    process_totals_by_license(args, count_data)
+    process_totals_by_author_bucket(args, author_bucket_data)
+    process_cc_by_3_by_year(args, year_data)
+    process_cc_by_4_by_year(args, year_data)
+    process_cc_by_nc_nd_4_by_year(args, year_data)
+    process_cc_by_nc_sa_3_by_year(args, year_data)
+    process_cc_by_nc_sa_4_by_year(args, year_data)
+    process_cc_by_sa_4_by_year(args, year_data)
+    process_cc0_1_by_year(args, year_data)
+    process_cc_by_3_by_category(args, category_data)
+    process_cc_by_4_by_category(args, category_data)
+    process_cc_by_nc_nd_4_by_category(args, category_data)
+    process_cc_by_nc_sa_3_by_category(args, category_data)
+    process_cc_by_nc_sa_4_by_category(args, category_data)
+    process_cc_by_sa_4_by_category(args, category_data)
+    process_cc0_1_by_category(args, category_data)
+
+    # Push changes
+    args = shared.git_add_and_commit(
+        args,
+        PATHS["repo"],
+        PATHS["data_quarter"],
+        f"Add and commit new GitHub data for {QUARTER}",
+    )
+    shared.git_push_changes(args, PATHS["repo"])
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except shared.QuantifyingException as e:
+        if e.exit_code == 0:
+            LOGGER.info(e.message)
+        else:
+            LOGGER.error(e.message)
+        sys.exit(e.exit_code)
+    except SystemExit as e:
+        LOGGER.error(f"System exit with code: {e.code}")
+        sys.exit(e.code)
+    except KeyboardInterrupt:
+        LOGGER.info("(130) Halted via KeyboardInterrupt.")
+        sys.exit(130)
+    except Exception:
+        LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}")
+        sys.exit(1)
diff --git a/scripts/3-report/arxiv_report.py b/scripts/3-report/arxiv_report.py
new file mode 100644
index 00000000..98c954f9
--- /dev/null
+++ b/scripts/3-report/arxiv_report.py
@@ -0,0 +1,808 @@
+#!/usr/bin/env python
+"""
+This file is dedicated to visualizing and analyzing the data collected
+from Arxiv.
+"""
+
+# Standard library
+import argparse
+import os
+import sys
+import textwrap
+import traceback
+from pathlib import Path
+
+# Third-party
+from pygments import highlight
+from pygments.formatters import TerminalFormatter
+from pygments.lexers import PythonTracebackLexer
+
+# Add parent directory so shared can be imported
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+
+# First-party/Local
+import plot  # noqa: E402
+import shared  # noqa: E402
+
+# Setup
+LOGGER, PATHS = shared.setup(__file__)
+QUARTER = os.path.basename(PATHS["data_quarter"])
+SECTION_FILE = Path(__file__).name
+SECTION_TITLE = "Arxiv"
+
+
+def parse_arguments():
+    """
+    Parses command-line arguments, returns parsed arguments.
+    """
+    global QUARTER
+    LOGGER.info("Parsing command-line arguments")
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--quarter",
+        default=QUARTER,
+        help=f"Data quarter in format YYYYQx (default: {QUARTER})",
+    )
+    parser.add_argument(
+        "--show-plots",
+        action="store_true",
+        help="Show generated plots (default: False)",
+    )
+    parser.add_argument(
+        "--enable-save",
+        action="store_true",
+        help="Enable saving results (default: False)",
+    )
+    parser.add_argument(
+        "--enable-git",
+        action="store_true",
+        help="Enable git actions such as fetch, merge, add, commit, and push"
+        " (default: False)",
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Regenerate data even if report files exist",
+    )
+    args = parser.parse_args()
+    if not args.enable_save and args.enable_git:
+        parser.error("--enable-git requires --enable-save")
+    if args.quarter != QUARTER:
+        global PATHS
+        PATHS = shared.paths_update(LOGGER, PATHS, QUARTER, args.quarter)
+        QUARTER = args.quarter
+    args.logger = LOGGER
+    args.paths = PATHS
+    return args
+
+
+def arxiv_intro(args):
+    """
+    Write Arxiv introduction.
+    """
+    LOGGER.info(arxiv_intro.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_1-fetch"],
+        "arxiv_1_count.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    # name_label = "TOOL_IDENTIFIER"
+    # data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        "Overview",
+        None,
+        None,
+        "Coming soon",
+    )
+
+
+def plot_totals_by_license_type(args):
+    """
+    Create plots showing totals by license type
+    """
+    LOGGER.info(plot_totals_by_license_type.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_totals_by_license.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    name_label = "License"
+    data_label = "Count"
+    data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
+    data.sort_values(data_label, ascending=True, inplace=True)
+    title = "Totals by license type"
+    plt = plot.combined_plot(
+        args=args,
+        data=data,
+        title=title,
+        name_label=name_label,
+        data_label=data_label,
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_totals_by_license_type.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+
+    if args.enable_save:
+        # Create the directory if it does not exist
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Plots showing totals by license type.",
+    )
+
+
+def plot_cc_by_3_by_year(args):
+    """
+    Create line plot showing CC BY 3.0 by year
+    """
+    LOGGER.info(plot_cc_by_3_by_year.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_cc_by_3_by_year.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    data = shared.open_data_file(LOGGER, file_path, index_col="Year")
+    title = "CC BY 3.0 by year"
+    plt = plot.line_plot(
+        args=args,
+        data=data,
+        title=title,
+        xlabel="Year",
+        ylabel="Number of works",
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_3_by_year.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Line plot showing CC BY 3.0 works by year.",
+    )
+
+
+def plot_cc_by_4_by_year(args):
+    """
+    Create line plot showing CC BY 4.0 by year
+    """
+    LOGGER.info(plot_cc_by_4_by_year.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_cc_by_4_by_year.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    data = shared.open_data_file(LOGGER, file_path, index_col="Year")
+    title = "CC BY 4.0 by year"
+    plt = plot.line_plot(
+        args=args,
+        data=data,
+        title=title,
+        xlabel="Year",
+        ylabel="Number of works",
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_4_by_year.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Line plot showing CC BY 4.0 works by year.",
+    )
+
+
+def plot_cc_by_nc_nd_4_by_year(args):
+    """
+    Create line plot showing CC BY-NC-ND 4.0 by year
+    """
+    LOGGER.info(plot_cc_by_nc_nd_4_by_year.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_cc_by_nc_nd_4_by_year.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    data = shared.open_data_file(LOGGER, file_path, index_col="Year")
+    title = "CC BY-NC-ND 4.0 by year"
+    plt = plot.line_plot(
+        args=args,
+        data=data,
+        title=title,
+        xlabel="Year",
+        ylabel="Number of works",
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_nc_nd_4_by_year.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Line plot showing CC BY-NC-ND 4.0 works by year.",
+    )
+
+
+def plot_cc_by_nc_sa_3_by_year(args):
+    """
+    Create line plot showing CC BY-NC-SA 3.0 by year
+    """
+    LOGGER.info(plot_cc_by_nc_sa_3_by_year.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_cc_by_nc_sa_3_by_year.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    data = shared.open_data_file(LOGGER, file_path, index_col="Year")
+    title = "CC BY-NC-SA 3.0 by year"
+    plt = plot.line_plot(
+        args=args,
+        data=data,
+        title=title,
+        xlabel="Year",
+        ylabel="Number of works",
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_nc_sa_3_by_year.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Line plot showing CC BY-NC-SA 3.0 works by year.",
+    )
+
+
+def plot_cc_by_nc_sa_4_by_year(args):
+    """
+    Create line plot showing CC BY-NC-SA 4.0 by year
+    """
+    LOGGER.info(plot_cc_by_nc_sa_4_by_year.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_cc_by_nc_sa_4_by_year.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    data = shared.open_data_file(LOGGER, file_path, index_col="Year")
+    title = "CC BY-NC-SA 4.0 by year"
+    plt = plot.line_plot(
+        args=args,
+        data=data,
+        title=title,
+        xlabel="Year",
+        ylabel="Number of works",
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_nc_sa_4_by_year.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Line plot showing CC BY-NC-SA 4.0 works by year.",
+    )
+
+
+def plot_cc_by_sa_4_by_year(args):
+    """
+    Create line plot showing CC BY-SA 4.0 by year
+    """
+    LOGGER.info(plot_cc_by_sa_4_by_year.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_cc_by_sa_4_by_year.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    data = shared.open_data_file(LOGGER, file_path, index_col="Year")
+    title = "CC BY-SA 4.0 by year"
+    plt = plot.line_plot(
+        args=args,
+        data=data,
+        title=title,
+        xlabel="Year",
+        ylabel="Number of works",
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_sa_4_by_year.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Line plot showing CC BY-SA 4.0 works by year.",
+    )
+
+
+def plot_cc0_1_by_year(args):
+    """
+    Create line plot showing CC0 1.0 by year
+    """
+    LOGGER.info(plot_cc0_1_by_year.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_cc0_1_by_year.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    data = shared.open_data_file(LOGGER, file_path, index_col="Year")
+    title = "CC0 1.0 legal tool by year"
+    plt = plot.line_plot(
+        args=args,
+        data=data,
+        title=title,
+        xlabel="Year",
+        ylabel="Number of works",
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc0_1_by_year.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Line plot showing CC0 1.0 works by year.",
+    )
+
+
+def plot_totals_by_author_bucket(args):
+    """
+    Create stacked vertical bar plot showing totals by author bucket
+    """
+    LOGGER.info(plot_totals_by_author_bucket.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_totals_by_author_bucket.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    data = shared.open_data_file(LOGGER, file_path, index_col="AUTHOR_BUCKET")
+    stack_labels = list(data.columns)
+    title = "Totals by author bucket"
+    plt = plot.stacked_barv_plot(
+        args=args,
+        data=data,
+        title=title,
+        name_label="Author Bucket",
+        stack_labels=stack_labels,
+        xlabel="Author Bucket",
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_totals_by_author_bucket.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Stacked bar plot showing Arxiv works by author bucket,"
+        " broken down by license type.",
+    )
+
+
+def plot_cc_by_3_by_category(args):
+    """
+    Create plots showing CC BY 3.0 by category
+    """
+    LOGGER.info(plot_cc_by_3_by_category.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_cc_by_3_by_category.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    name_label = "Category"
+    data_label = "Count"
+    data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
+    data.sort_values(data_label, ascending=True, inplace=True)
+    data = data.tail(10)
+    title = "CC BY 3.0 by category"
+    plt = plot.combined_plot(
+        args=args,
+        data=data,
+        title=title,
+        name_label=name_label,
+        data_label=data_label,
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_3_by_category.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Plots showing CC BY 3.0 totals by category.",
+    )
+
+
+def plot_cc_by_4_by_category(args):
+    """
+    Create plots showing CC BY 4.0 by category
+    """
+    LOGGER.info(plot_cc_by_4_by_category.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_cc_by_4_by_category.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    name_label = "Category"
+    data_label = "Count"
+    data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
+    data.sort_values(data_label, ascending=True, inplace=True)
+    data = data.tail(10)
+    title = "CC BY 4.0 by category"
+    plt = plot.combined_plot(
+        args=args,
+        data=data,
+        title=title,
+        name_label=name_label,
+        data_label=data_label,
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_4_by_category.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Plots showing CC BY 4.0 totals by category.",
+    )
+
+
+def plot_cc_by_nc_nd_4_by_category(args):
+    """
+    Create plots showing CC BY-NC-ND 4.0 by category
+    """
+    LOGGER.info(plot_cc_by_nc_nd_4_by_category.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_cc_by_nc_nd_4_by_category.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    name_label = "Category"
+    data_label = "Count"
+    data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
+    data.sort_values(data_label, ascending=True, inplace=True)
+    data = data.tail(10)
+    title = "CC BY-NC-ND 4.0 by category"
+    plt = plot.combined_plot(
+        args=args,
+        data=data,
+        title=title,
+        name_label=name_label,
+        data_label=data_label,
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_nc_nd_4_by_category.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Plots showing CC BY-NC-ND 4.0 totals by category.",
+    )
+
+
+def plot_cc_by_nc_sa_3_by_category(args):
+    """
+    Create plots showing CC BY-NC-SA 3.0 by category
+    """
+    LOGGER.info(plot_cc_by_nc_sa_3_by_category.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_cc_by_nc_sa_3_by_category.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    name_label = "Category"
+    data_label = "Count"
+    data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
+    data.sort_values(data_label, ascending=True, inplace=True)
+    data = data.tail(10)
+    title = "CC BY-NC-SA 3.0 by category"
+    plt = plot.combined_plot(
+        args=args,
+        data=data,
+        title=title,
+        name_label=name_label,
+        data_label=data_label,
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_nc_sa_3_by_category.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Plots showing CC BY-NC-SA 3.0 totals by category.",
+    )
+
+
+def plot_cc_by_nc_sa_4_by_category(args):
+    """
+    Create plots showing CC BY-NC-SA 4.0 by category
+    """
+    LOGGER.info(plot_cc_by_nc_sa_4_by_category.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_cc_by_nc_sa_4_by_category.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    name_label = "Category"
+    data_label = "Count"
+    data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
+    data.sort_values(data_label, ascending=True, inplace=True)
+    data = data.tail(10)
+    title = "CC BY-NC-SA 4.0 by category"
+    plt = plot.combined_plot(
+        args=args,
+        data=data,
+        title=title,
+        name_label=name_label,
+        data_label=data_label,
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_nc_sa_4_by_category.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Plots showing CC BY-NC-SA 4.0 totals by category.",
+    )
+
+
+def plot_cc_by_sa_4_by_category(args):
+    """
+    Create plots showing CC BY-SA 4.0 by category
+    """
+    LOGGER.info(plot_cc_by_sa_4_by_category.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_cc_by_sa_4_by_category.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    name_label = "Category"
+    data_label = "Count"
+    data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
+    data.sort_values(data_label, ascending=True, inplace=True)
+    data = data.tail(10)
+    title = "CC BY-SA 4.0 by category"
+    plt = plot.combined_plot(
+        args=args,
+        data=data,
+        title=title,
+        name_label=name_label,
+        data_label=data_label,
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc_by_sa_4_by_category.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Plots showing CC BY-SA 4.0 totals by category.",
+    )
+
+
+def plot_cc0_1_by_category(args):
+    """
+    Create plots showing CC0 1.0 by category
+    """
+    LOGGER.info(plot_cc0_1_by_category.__doc__.strip())
+    file_path = shared.path_join(
+        PATHS["data_2-process"],
+        "arxiv_cc0_1_by_category.csv",
+    )
+    LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    name_label = "Category"
+    data_label = "Count"
+    data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
+    data.sort_values(data_label, ascending=True, inplace=True)
+    data = data.tail(10)
+    title = "CC0 1.0 by category"
+    plt = plot.combined_plot(
+        args=args,
+        data=data,
+        title=title,
+        name_label=name_label,
+        data_label=data_label,
+    )
+
+    image_path = shared.path_join(
+        PATHS["data_phase"], "arxiv_cc0_1_by_category.png"
+    )
+    LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
+    if args.enable_save:
+        os.makedirs(PATHS["data_phase"], exist_ok=True)
+        plt.savefig(image_path)
+
+    shared.update_readme(
+        args,
+        SECTION_FILE,
+        SECTION_TITLE,
+        title,
+        image_path,
+        "Plots showing CC0 1.0 totals by category.",
+    )
+
+
+def main():
+    args = parse_arguments()
+    shared.paths_log(LOGGER, PATHS)
+    shared.git_fetch_and_merge(args, PATHS["repo"])
+    last_entry = shared.path_join(
+        PATHS["data_phase"], "github_restriction.png"
+    )
+    shared.check_completion_file_exists(args, last_entry)
+    arxiv_intro(args)
+    plot_totals_by_license_type(args)
+    plot_cc_by_3_by_year(args)
+    plot_cc_by_4_by_year(args)
+    plot_cc_by_nc_nd_4_by_year(args)
+    plot_cc_by_nc_sa_3_by_year(args)
+    plot_cc_by_nc_sa_4_by_year(args)
+    plot_cc_by_sa_4_by_year(args)
+    plot_cc0_1_by_year(args)
+    plot_totals_by_author_bucket(args)
+    plot_cc_by_3_by_category(args)
+    plot_cc_by_4_by_category(args)
+    plot_cc_by_nc_nd_4_by_category(args)
+    plot_cc_by_nc_sa_3_by_category(args)
+    plot_cc_by_nc_sa_4_by_category(args)
+    plot_cc_by_sa_4_by_category(args)
+    plot_cc0_1_by_category(args)
+
+    # Add and commit changes
+    args = shared.git_add_and_commit(
+        args,
+        PATHS["repo"],
+        PATHS["data_quarter"],
+        f"Add and commit GitHub reports for {QUARTER}",
+    )
+    shared.git_push_changes(args, PATHS["repo"])
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except shared.QuantifyingException as e:
+        if e.exit_code == 0:
+            LOGGER.info(e.message)
+        else:
+            LOGGER.error(e.message)
+        sys.exit(e.exit_code)
+    except SystemExit as e:
+        if e.code != 0:
+            LOGGER.error(f"System exit with code: {e.code}")
+        sys.exit(e.code)
+    except KeyboardInterrupt:
+        LOGGER.info("(130) Halted via KeyboardInterrupt.")
+        sys.exit(130)
+    except Exception:
+        traceback_formatted = textwrap.indent(
+            highlight(
+                traceback.format_exc(),
+                PythonTracebackLexer(),
+                TerminalFormatter(),
+            ),
+            "    ",
+        )
+        LOGGER.critical(f"(1) Unhandled exception:\n{traceback_formatted}")
+        sys.exit(1)
diff --git a/scripts/plot.py b/scripts/plot.py
index fcbdb817..064a328f 100644
--- a/scripts/plot.py
+++ b/scripts/plot.py
@@ -73,7 +73,7 @@ def combined_plot(
         height = 2.5
 
     fig, (ax1, ax2) = plt.subplots(
-        1, 2, figsize=(8, height), width_ratios=(2, 1), layout="constrained"
+        1, 2, figsize=(12, height), width_ratios=(2, 1), layout="constrained"
     )
     colors = colormaps["tab10"].colors
 
@@ -128,6 +128,116 @@ def combined_plot(
     return plt
 
 
+def line_plot(args, data, title, xlabel=None, ylabel=None):
+    plt.rcParams.update({"font.family": "monospace", "figure.dpi": 300})
+
+    fig, ax = plt.subplots(figsize=(12, 5), layout="constrained")
+    colors = colormaps["tab10"].colors
+
+    for i, col in enumerate(data.columns):
+        ax.plot(
+            data.index,
+            data[col],
+            color=colors[i % len(colors)],
+            label=col,
+        )
+
+    ax.set_title(title)
+    if xlabel:
+        ax.set_xlabel(xlabel)
+    if ylabel:
+        ax.set_ylabel(ylabel)
+    ax.yaxis.set_major_formatter(ticker.FuncFormatter(number_formatter))
+    ax.tick_params(axis="x", which="major", labelrotation=45)
+    ax.legend(fontsize="small")
+    ax.grid(True, alpha=0.3)
+
+    plt.annotate(
+        f"Creative Commons (CC)\ndata from {args.quarter}",
+        (0.95, 5),
+        xycoords=("figure fraction", "figure points"),
+        color="gray",
+        fontsize="x-small",
+        horizontalalignment="right",
+    )
+
+    if args.show_plots:
+        plt.show()
+
+    return plt
+
+
+def stacked_barv_plot(
+    args,
+    data,
+    title,
+    name_label,
+    stack_labels,
+    yscale="linear",
+    xlabel=None,
+):
+    """
+    Create a stacked vertical bar plot.
+    """
+    if len(data) > 10:
+        raise shared.QuantifyingException(
+            "stacked_barv_plot() is limited to a maximum of 10 data points"
+        )
+
+    plt.rcParams.update({"font.family": "monospace", "figure.dpi": 300})
+
+    fig, ax = plt.subplots(figsize=(12, 5), layout="constrained")
+    colors = colormaps["tab10"].colors
+    bottom = [0] * len(data)
+
+    for i, label in enumerate(stack_labels):
+        ax.bar(
+            data.index,
+            data[label],
+            bottom=bottom,
+            color=colors[i % len(colors)],
+            label=label,
+            log=(yscale == "log"),
+        )
+        bottom = [
+            current_bottom + height
+            for current_bottom, height in zip(bottom, data[label])
+        ]
+
+    ax.set_ylabel("Number of works")
+    ax.yaxis.set_major_formatter(ticker.FuncFormatter(number_formatter))
+    ax.tick_params(axis="x", which="major", labelrotation=45)
+
+    if xlabel:
+        ax.set_xlabel(xlabel)
+    else:
+        ax.set_xlabel(name_label)
+
+    ax.legend(
+        title="Type",
+        fontsize="x-small",
+        title_fontsize="x-small",
+        loc="upper left",
+        bbox_to_anchor=(1.02, 1),
+    )
+
+    plt.suptitle(title)
+    plt.annotate(
+        f"Creative Commons (CC)\nbar y scale: {yscale}, data from"
+        f" {args.quarter}",
+        (0.95, 5),
+        xycoords=("figure fraction", "figure points"),
+        color="gray",
+        fontsize="x-small",
+        horizontalalignment="right",
+    )
+
+    if args.show_plots:
+        plt.show()
+
+    return plt
+
+
 def number_formatter(x, pos):
     """
     Use the millions formatter for x-axis

From ec1bff8980b306d64a5a9d74e4dbd7912670c109 Mon Sep 17 00:00:00 2001
From: Oreoluwa Oluwasina <oreoluwaoluwasina@gmail.com>
Date: Thu, 19 Feb 2026 12:11:22 +0100
Subject: [PATCH 2/2] changed mode of scripts

---
 scripts/2-process/arxiv_process.py | 0
 scripts/3-report/arxiv_report.py   | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 scripts/2-process/arxiv_process.py
 mode change 100644 => 100755 scripts/3-report/arxiv_report.py

diff --git a/scripts/2-process/arxiv_process.py b/scripts/2-process/arxiv_process.py
old mode 100644
new mode 100755
diff --git a/scripts/3-report/arxiv_report.py b/scripts/3-report/arxiv_report.py
old mode 100644
new mode 100755