davewalker5 · davewalker5 · Apr 19, 2025 · Apr 19, 2025
diff --git a/reports/README.md b/reports/README.md
@@ -6,6 +6,7 @@ The following reports are currently available:
 
 | Notebook | Report Type |
 | --- | --- |
+| abundance_vs_frequency_scatter.ipynb | Abundance vs. frequency scatter plot for each species in a category at a location, indicating rarity at that location |
 | annual_category_location_heatmap.ipynb | Heatmap of number of sightings of each species in a category at a location during a specified year |
 | category_life_list.ipynb | Life list for the species in a category, including total sightings and location count |
 | location_richness_map.ipynb | Interactive map of species richness (number of unique species sighted) by location |

diff --git a/reports/abundance_vs_frequency_scatter.ipynb b/reports/abundance_vs_frequency_scatter.ipynb
@@ -0,0 +1,191 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Abundance vs Frequency Scatter Plot\n",
+    "\n",
+    "This notebook generates and exports a scatter plot of abundance vs frequency for all species in a category at a location.\n",
+    "\n",
+    "| Region on Plot | Interpretation |\n",
+    "| --- | --- |\n",
+    "| Bottom-left | Rare species — seen infrequently and in low numbers. Could be elusive, migratory, or genuinely uncommon |\n",
+    "| Top-right | Common species — seen often and in large numbers. Likely widespread and/or gregarious |\n",
+    "| High frequency, low abundance | Species often seen but in small groups or solo (e.g. a bird that’s always alone but spotted often) |\n",
+    "| Low frequency, high abundance | Species seen rarely, but in big flocks/groups when they do appear (e.g. irruptive species or migratory flocks) |\n",
+    "\n",
+    "To use it, update the location, category and required export format in the first code cell, below, before running the notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Location to report on\n",
+    "location = \"\"\n",
+    "\n",
+    "# Category to report on\n",
+    "category = \"\"\n",
+    "\n",
+    "# Export format for the trend chart:\n",
+    "# PNG     - export as PNG image\n",
+    "# PDF     - export as PDF file\n",
+    "# <blank> - do not export\n",
+    "export_format = \"PNG\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import sqlparse\n",
+    "\n",
+    "# Read the query file\n",
+    "query_file_path = Path(\"sql\") / \"abundance_frequency.sql\"\n",
+    "with open(query_file_path.absolute(), \"r\") as f:\n",
+    "    query = f.read().replace(\"\\n\", \" \")\n",
+    "\n",
+    "# Replace the location and year placeholders\n",
+    "query = query.replace(\"$LOCATION\", location) \\\n",
+    "             .replace(\"$CATEGORY\", category)\n",
+    "\n",
+    "# Show a pretty-printed form of the query\n",
+    "print(sqlparse.format(query, reindent=True, keyword_case='upper'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import sqlite3\n",
+    "import os\n",
+    "\n",
+    "# Connect to the database, execute the query and read the results into a dataframe\n",
+    "database_path = os.environ[\"NATURE_RECORDER_DB\"]\n",
+    "connection = sqlite3.connect(database_path)\n",
+    "df = pd.read_sql_query(query, connection, parse_dates=[\"Date\"])\n",
+    "\n",
+    "# Check there is some data\n",
+    "if not df.shape[0]:\n",
+    "    message = f\"No data found for category '{category}' at location '{location}'\"\n",
+    "    raise ValueError(message)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import re\n",
+    "\n",
+    "# Calculate abundance and frequency\n",
+    "scatter_plot = (\n",
+    "    df\n",
+    "    .groupby(\"Species\")\n",
+    "    .agg(\n",
+    "        Abundance=(\"Count\", \"sum\"),\n",
+    "        Frequency=(\"Species\", \"count\")\n",
+    "    )\n",
+    "    .reset_index()\n",
+    ")\n",
+    "\n",
+    "# Create the folder to hold exported reports\n",
+    "export_folder_path = Path(\"exported\")\n",
+    "export_folder_path.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "# Export the data to Excel\n",
+    "clean_location = re.sub(\"[^0-9a-zA-Z ]+\", \"\", location).replace(\" \", \"-\")\n",
+    "clean_category = re.sub(\"[^0-9a-zA-Z ]+\", \"\", category).replace(\" \", \"-\")\n",
+    "export_file_name = f\"{clean_category}-{clean_location}-Abundance-Frequency\"\n",
+    "export_file_path = export_folder_path / f\"{export_file_name}.xlsx\"\n",
+    "scatter_plot.to_excel(export_file_path.absolute(), sheet_name=\"Abundance vs Frequency\", index=False)\n",
+    "\n",
+    "# Print the scatter plot data\n",
+    "with pd.option_context('display.max_rows', None,\n",
+    "                       'display.max_columns', None,\n",
+    "                       'display.precision', 3,\n",
+    "                       ):\n",
+    "    display(scatter_plot)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import seaborn as sns\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "plt.figure(figsize=(10, 6))\n",
+    "sns.scatterplot(data=scatter_plot, x='Frequency', y='Abundance', hue='Species', s=100)\n",
+    "\n",
+    "plt.title(f'Abundance vs Frequency for {category}')\n",
+    "plt.xlabel('Frequency (Number of Sightings)')\n",
+    "plt.ylabel('Abundance (Total Individuals)')\n",
+    "plt.grid(True)\n",
+    "\n",
+    "# Move legend below the plot, centered below the plot and with multiple columns\n",
+    "plt.legend(\n",
+    "    title='Species',\n",
+    "    bbox_to_anchor=(0.5, -0.25),\n",
+    "    loc='upper center',\n",
+    "    borderaxespad=0,\n",
+    "    ncol=3\n",
+    ")\n",
+    "\n",
+    "# Export to PNG\n",
+    "if export_format.casefold() == \"png\":\n",
+    "    export_file_path = export_folder_path / f\"{export_file_name}.png\"\n",
+    "    plt.savefig(export_file_path.absolute(), format=\"png\", dpi=300, bbox_inches=\"tight\")\n",
+    "\n",
+    "# Export to PDF\n",
+    "if export_format.casefold() == \"pdf\":\n",
+    "    export_file_path = export_folder_path / f\"{export_file_name}.pdf\"\n",
+    "    plt.savefig(export_file_path.absolute(), format=\"pdf\", bbox_inches=\"tight\")\n",
+    "\n",
+    "# Show the plot\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.2"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "f085c86085609b1ab2f295d8cd5b519618e19fd591a6919f4ec2f9290a6745f6"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/reports/annual_category_location_heatmap.ipynb b/reports/annual_category_location_heatmap.ipynb
@@ -117,7 +117,7 @@
     "locations_list = \"-\".join(locations)\n",
     "clean_locations = re.sub(\"[^0-9a-zA-Z ]+\", \"\", locations_list).replace(\" \", \"-\")\n",
     "export_file_path = export_folder_path / f\"{year}-{category}-{clean_locations}-Heatmap.xlsx\"\n",
-    "heatmap_data.to_excel(export_file_path.absolute(), sheet_name=\"Sightings\")\n",
+    "heatmap_data.to_excel(export_file_path.absolute(), sheet_name=\"Sightings\", index=False)\n",
     "\n",
     "# Print the heatmap data\n",
     "with pd.option_context('display.max_rows', None,\n",

diff --git a/reports/category_life_list.ipynb b/reports/category_life_list.ipynb
@@ -109,13 +109,13 @@
     "# Export the life list\n",
     "clean_category = re.sub(\"[^0-9a-zA-Z ]+\", \"\", category).replace(\" \", \"-\")\n",
     "export_file_path = export_folder_path / f\"{clean_category}-Life-List.xlsx\"\n",
-    "life_list.to_excel(export_file_path.absolute(), sheet_name=\"Sightings\")"
+    "life_list.to_excel(export_file_path.absolute(), sheet_name=\"Life List\", index=False)"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "venv",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -129,7 +129,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.13.2"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "7a792fcb311f9eb9f3c1b942a8c87ada8484712b89b670347c16a1088e0a1f69"
+   }
   }
  },
  "nbformat": 4,

diff --git a/reports/location_richness_map.ipynb b/reports/location_richness_map.ipynb
@@ -121,7 +121,7 @@
     "clean_country = re.sub(\"[^0-9a-zA-Z ]+\", \"\", country).replace(\" \", \"-\")\n",
     "export_file_name = f\"{year}-{clean_country}-Richness\" if year else f\"{clean_country}-Richness\"\n",
     "export_file_path = export_folder_path / f\"{export_file_name}.xlsx\"\n",
-    "richness.to_excel(export_file_path.absolute(), sheet_name=\"Location Richness\")"
+    "richness.to_excel(export_file_path.absolute(), sheet_name=\"Location Richness\", index=False)"
    ]
   },
   {
@@ -170,7 +170,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "venv",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -189,7 +189,7 @@
   "orig_nbformat": 4,
   "vscode": {
    "interpreter": {
-    "hash": "f085c86085609b1ab2f295d8cd5b519618e19fd591a6919f4ec2f9290a6745f6"
+    "hash": "7a792fcb311f9eb9f3c1b942a8c87ada8484712b89b670347c16a1088e0a1f69"
    }
   }
  },

diff --git a/reports/make_venv.bat b/reports/make_venv.bat
@@ -1,19 +1,19 @@
-@ECHO OFF
-
-REM Deactivate and remove the old virtual environment, if present
-ECHO Removing existing Virtual Environment, if present ...
-deactivate > nul 2>&1
-RMDIR /S /Q venv
-
-REM Create a new environment and activate it
-ECHO Creating new Virtual Environment ...
-python -m venv venv
-CALL venv\Scripts\activate.bat
-
-REM Make sure pip is up to date
-python -m pip install --upgrade pip
-
-REM Install the requirements
-python -m pip install -r requirements.txt
-
-ECHO ON
+@ECHO OFF
+
+REM Deactivate and remove the old virtual environment, if present
+ECHO Removing existing Virtual Environment, if present ...
+deactivate > nul 2>&1
+RMDIR /S /Q venv
+
+REM Create a new environment and activate it
+ECHO Creating new Virtual Environment ...
+python -m venv venv
+CALL venv\Scripts\activate.bat
+
+REM Make sure pip is up to date
+python -m pip install --upgrade pip
+
+REM Install the requirements
+python -m pip install -r requirements.txt
+
+ECHO ON
diff --git a/reports/sql/abundance_frequency.sql b/reports/sql/abundance_frequency.sql
@@ -0,0 +1,7 @@
+SELECT l.Name AS 'Location', sp.Name AS 'Species', c.Name AS 'Category', DATE( s.Date ) AS 'Date', IFNULL( s.Number, 1 ) AS 'Count'
+FROM SIGHTINGS s
+INNER JOIN SPECIES sp ON sp.Id = s.SpeciesId
+INNER JOIN CATEGORIES c ON c.Id = sp.CategoryId
+INNER JOIN LOCATIONS l ON l.Id = s.LocationId
+WHERE l.Name = '$LOCATION'
+AND c.Name = "$CATEGORY";
diff --git a/reports/year_on_year_species_location_trend.ipynb b/reports/year_on_year_species_location_trend.ipynb
@@ -109,7 +109,7 @@
     "    export_file_name = f\"{start_year}-{end_year}-{clean_species}\"\n",
     "\n",
     "export_file_path = export_folder_path / f\"{export_file_name}-Trend.xlsx\"\n",
-    "yearly_species_counts.to_excel(export_file_path.absolute(), sheet_name=\"Year On Year Trends\")\n",
+    "yearly_species_counts.to_excel(export_file_path.absolute(), sheet_name=\"Year On Year Trends\", index=False)\n",
     "\n",
     "# Print the data\n",
     "with pd.option_context('display.max_rows', None,\n",

diff --git a/src/naturerec_model/logic/naming.py b/src/naturerec_model/logic/naming.py
@@ -1,30 +1,30 @@
-"""
-Species and category common and scientific naming logic
-"""
-
-class Casing:
-    TITLE_CASE = "title_case"
-    CAPITALISED = "capitalised"
-
-
-def tidy_string(text, required_case):
-    """
-    Tidy the case of the specified string and remove duplicate spaces
-
-    :param name: Text string to tidy
-    :param required_case: Required casing
-    :returns: Tidied text string
-    """
-
-    tidied_text = None
-
-    if text:
-        match required_case:
-            case Casing.TITLE_CASE:
-                tidied_text = " ".join(text.split()).title().replace("'S", "'s")
-            case Casing.CAPITALISED:
-                tidied_text = " ".join(text.split()).capitalize()
-            case _:
-                tidied_text = text
-
-    return tidied_text
+"""
+Species and category common and scientific naming logic
+"""
+
+class Casing:
+    TITLE_CASE = "title_case"
+    CAPITALISED = "capitalised"
+
+
+def tidy_string(text, required_case):
+    """
+    Tidy the case of the specified string and remove duplicate spaces
+
+    :param name: Text string to tidy
+    :param required_case: Required casing
+    :returns: Tidied text string
+    """
+
+    tidied_text = None
+
+    if text:
+        match required_case:
+            case Casing.TITLE_CASE:
+                tidied_text = " ".join(text.split()).title().replace("'S", "'s")
+            case Casing.CAPITALISED:
+                tidied_text = " ".join(text.split()).capitalize()
+            case _:
+                tidied_text = text
+
+    return tidied_text