From 2f9d5920cfb3293244d46ba0f3555002ffe37578 Mon Sep 17 00:00:00 2001 From: Dave Walker Date: Sat, 19 Apr 2025 09:06:36 +0100 Subject: [PATCH] Added year-on-year species trend report --- .../annual_category_location_heatmap.ipynb | 2 +- reports/requirements.txt | 2 +- reports/sql/species_year_on_year.sql | 8 + .../year_on_year_species_location_trend.ipynb | 196 ++++++++++++++++++ 4 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 reports/sql/species_year_on_year.sql create mode 100644 reports/year_on_year_species_location_trend.ipynb diff --git a/reports/annual_category_location_heatmap.ipynb b/reports/annual_category_location_heatmap.ipynb index a63f9a3..79dc902 100644 --- a/reports/annual_category_location_heatmap.ipynb +++ b/reports/annual_category_location_heatmap.ipynb @@ -176,7 +176,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.2" + "version": "3.11.4" }, "vscode": { "interpreter": { diff --git a/reports/requirements.txt b/reports/requirements.txt index 82609df..a82707f 100644 --- a/reports/requirements.txt +++ b/reports/requirements.txt @@ -91,9 +91,9 @@ requests==2.32.3 rfc3339-validator==0.1.4 rfc3986-validator==0.1.1 rpds-py==0.24.0 +scipy==1.15.2 seaborn==0.13.2 Send2Trash==1.8.3 -setuptools==78.1.0 six==1.17.0 sniffio==1.3.1 soupsieve==2.6 diff --git a/reports/sql/species_year_on_year.sql b/reports/sql/species_year_on_year.sql new file mode 100644 index 0000000..b6c61aa --- /dev/null +++ b/reports/sql/species_year_on_year.sql @@ -0,0 +1,8 @@ +SELECT l.Name AS 'Location', sp.Name AS 'Species', sp.Name AS 'Category', DATE( s.Date ) AS 'Date', IFNULL( s.Number, 1 ) AS 'Count' +FROM SIGHTINGS s +INNER JOIN SPECIES sp ON sp.Id = s.SpeciesId +INNER JOIN CATEGORIES c ON c.Id = sp.CategoryId +INNER JOIN LOCATIONS l ON l.Id = s.LocationId +WHERE l.Name LIKE '%$LOCATION%' +AND s.Date BETWEEN '$START_YEAR-01-01' AND '$END_YEAR-12-31' +AND sp.Name = '$SPECIES'; diff --git a/reports/year_on_year_species_location_trend.ipynb b/reports/year_on_year_species_location_trend.ipynb new file mode 100644 index 0000000..2ce501a --- /dev/null +++ b/reports/year_on_year_species_location_trend.ipynb @@ -0,0 +1,196 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3c938486", + "metadata": {}, + "source": [ + "# Year-on-Year Species Trend Chart\n", + "\n", + "This notebook generates and exports a year-on-year tredn of sightings for a species, optionally limited to a single location. To use it, update the year range, location, species and required export format in the first code cell, below, before running the notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee471420", + "metadata": {}, + "outputs": [], + "source": [ + "# Years to report on\n", + "start_year = \"\"\n", + "end_year = \"\"\n", + "\n", + "# Optional location name to report on. If left blank, report on all locations\n", + "location = \"\"\n", + "\n", + "# Species to report on\n", + "species = \"\"\n", + "\n", + "# Export format for the trend chart:\n", + "# PNG - export as PNG image\n", + "# PDF - export as PDF file\n", + "# - do not export\n", + "export_format = \"PNG\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7264f824", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import sqlparse\n", + "\n", + "# Read the query file\n", + "query_file_path = Path(\"sql\") / \"species_year_on_year.sql\"\n", + "with open(query_file_path.absolute(), \"r\") as f:\n", + " query = f.read().replace(\"\\n\", \" \")\n", + "\n", + "# Replace the location and year placeholders\n", + "query = query.replace(\"$START_YEAR\", start_year) \\\n", + " .replace(\"$END_YEAR\", end_year) \\\n", + " .replace(\"$LOCATION\", location) \\\n", + " .replace(\"$SPECIES\", species)\n", + "\n", + "# Show a pretty-printed form of the query\n", + "print(sqlparse.format(query, reindent=True, keyword_case='upper'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d1b4739", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import sqlite3\n", + "import os\n", + "\n", + "# Connect to the database, execute the query and read the results into a dataframe\n", + "database_path = os.environ[\"NATURE_RECORDER_DB\"]\n", + "connection = sqlite3.connect(database_path)\n", + "df = pd.read_sql_query(query, connection, parse_dates=[\"Date\"])\n", + "\n", + "# Check there is some data\n", + "if not df.shape[0]:\n", + " message = f\"No data found for species '{species}' from '{start_year}' to '{end_year}\"\n", + " if location:\n", + " message += f\" at '{location}\"\n", + " raise ValueError(message)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8512872", + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "\n", + "# Specifically add a separate \"year\" column to the data frame and aggregate the data\n", + "df[\"Year\"] = df[\"Date\"].dt.year\n", + "yearly_species_counts = df.groupby([\"Year\", \"Species\"])[\"Count\"].sum().reset_index()\n", + "\n", + "# Create the folder to hold exported reports\n", + "export_folder_path = Path(\"exported\")\n", + "export_folder_path.mkdir(parents=True, exist_ok=True)\n", + "\n", + "# Export the trend data to Excel\n", + "clean_species = re.sub(\"[^0-9a-zA-Z ]+\", \"\", species).replace(\" \", \"-\")\n", + "if location:\n", + " clean_location = re.sub(\"[^0-9a-zA-Z ]+\", \"\", location).replace(\" \", \"-\")\n", + " export_file_name = f\"{start_year}-{end_year}-{clean_species}-{clean_location}\"\n", + "else:\n", + " export_file_name = f\"{start_year}-{end_year}-{clean_species}\"\n", + "\n", + "export_file_path = export_folder_path / f\"{export_file_name}-Trend.xlsx\"\n", + "yearly_species_counts.to_excel(export_file_path.absolute(), sheet_name=\"Year On Year Trends\")\n", + "\n", + "# Print the data\n", + "with pd.option_context('display.max_rows', None,\n", + " 'display.max_columns', None,\n", + " 'display.precision', 3,\n", + " ):\n", + " display(yearly_species_counts)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d39d505", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from scipy.stats import linregress\n", + "\n", + "x = yearly_species_counts[\"Year\"]\n", + "y = yearly_species_counts[\"Count\"]\n", + "\n", + "# Fit linear regression\n", + "slope, intercept, *_ = linregress(x, y)\n", + "trend_y = intercept + slope * x\n", + "\n", + "# Plot\n", + "plt.figure(figsize=(10, 6))\n", + "\n", + "# Bar chart for actual data\n", + "plt.bar(x, y, label='Sightings per Year', color='skyblue')\n", + "\n", + "# Trend line over bars\n", + "plt.plot(x, trend_y, color='red', linewidth=2, label='Trend Line')\n", + "\n", + "# Set the title and axis labels and style the chart\n", + "title = f\"Year On Year Trends for {species}\"\n", + "if location:\n", + " title += f\" at {location}\"\n", + "\n", + "plt.title(title)\n", + "plt.xlabel(\"Year\")\n", + "plt.ylabel(\"Count\")\n", + "plt.legend()\n", + "plt.grid(True, linestyle='--', alpha=0.5)\n", + "plt.tight_layout()\n", + "\n", + "# Export to PNG\n", + "if export_format.casefold() == \"png\":\n", + " export_file_path = export_folder_path / f\"{export_file_name}.png\"\n", + " plt.savefig(export_file_path.absolute(), format=\"png\", dpi=300, bbox_inches=\"tight\")\n", + "\n", + "# Export to PDF\n", + "if export_format.casefold() == \"pdf\":\n", + " export_file_path = export_folder_path / f\"{export_file_name}.pdf\"\n", + " plt.savefig(export_file_path.absolute(), format=\"pdf\", bbox_inches=\"tight\")\n", + "\n", + "# Show the plot\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}