From 80592b9f0ae843a0973f02f4bf50c6395588418a Mon Sep 17 00:00:00 2001 From: Dave Walker Date: Sat, 19 Apr 2025 16:42:06 +0100 Subject: [PATCH] Added category life list report --- reports/README.md | 3 +- reports/category_life_list.ipynb | 137 +++++++++++++++++++++++++++ reports/sql/category_life_list.sql | 6 ++ reports/sql/sightings.sql | 2 +- reports/sql/species_year_on_year.sql | 2 +- 5 files changed, 147 insertions(+), 3 deletions(-) create mode 100644 reports/category_life_list.ipynb create mode 100644 reports/sql/category_life_list.sql diff --git a/reports/README.md b/reports/README.md index 8ae6964..d58eff1 100644 --- a/reports/README.md +++ b/reports/README.md @@ -6,7 +6,8 @@ The following reports are currently available: | Notebook | Report Type | | --- | --- | -| annual_category_location_heatmap.ipynb | Heatmap of number of sightings of each species in a category at a location during a specified year | +| annual_category_location_heatmap.ipynb | Heatmap of number of sightings of each species in a category at a location during a specified year | +| category_life_list.ipynb | Life list for the species in a category, including total sightings and location count | | location_richness_map.ipynb | Interactive map of species richness (number of unique species sighted) by location | | year_on_year_species_location_trend.ipynb | Year-on-year sightings trend for a species, optionally limited to one location | diff --git a/reports/category_life_list.ipynb b/reports/category_life_list.ipynb new file mode 100644 index 0000000..cecee1a --- /dev/null +++ b/reports/category_life_list.ipynb @@ -0,0 +1,137 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d1c0d0e6", + "metadata": {}, + "source": [ + "# Category Life List\n", + "\n", + "This notebook generates and exports a life list for all sightings of the species in a category. For each species in that category, the total number of sightings of the species and the number of locations where sightings were recorded are shown alongside the species name. To use it, update the category in the first code cell, below, before running the notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2d768fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Species category to report on\n", + "category = \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b9351fe", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import sqlparse\n", + "\n", + "# Read the query file\n", + "query_file_path = Path(\"sql\") / \"category_life_list.sql\"\n", + "with open(query_file_path.absolute(), \"r\") as f:\n", + " query = f.read().replace(\"\\n\", \" \")\n", + "\n", + "# Replace the placeholders\n", + "query = query.replace(\"$CATEGORY\", category)\n", + "\n", + "# Show a pretty-printed form of the query\n", + "print(sqlparse.format(query, reindent=True, keyword_case='upper'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a3f55c5", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import sqlite3\n", + "import os\n", + "\n", + "# Connect to the database, execute the query and read the results into a dataframe\n", + "database_path = os.environ[\"NATURE_RECORDER_DB\"]\n", + "connection = sqlite3.connect(database_path)\n", + "df = pd.read_sql_query(query, connection, parse_dates=[\"Date\"])\n", + "\n", + "# Check there is some data\n", + "if not df.shape[0]:\n", + " message = f\"No data found for category '{category}'\"\n", + " raise ValueError(message)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1b8b432", + "metadata": {}, + "outputs": [], + "source": [ + "# Aggregate the data to produce the life list\n", + "life_list = (\n", + " df\n", + " .groupby(\"Species\")\n", + " .agg(\n", + " Sightings=(\"Count\", \"sum\"),\n", + " Locations=(\"Location\", pd.Series.nunique)\n", + " )\n", + " .reset_index()\n", + " .sort_values(by=\"Species\", ascending=True)\n", + ")\n", + "\n", + "\n", + "# Print the life list\n", + "with pd.option_context('display.max_rows', None,\n", + " 'display.max_columns', None,\n", + " 'display.precision', 3,\n", + " ):\n", + " display(life_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7827f304", + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "\n", + "# Create the folder to hold exported reports\n", + "export_folder_path = Path(\"exported\")\n", + "export_folder_path.mkdir(parents=True, exist_ok=True)\n", + "\n", + "# Export the life list\n", + "clean_category = re.sub(\"[^0-9a-zA-Z ]+\", \"\", category).replace(\" \", \"-\")\n", + "export_file_path = export_folder_path / f\"{clean_category}-Life-List.xlsx\"\n", + "life_list.to_excel(export_file_path.absolute(), sheet_name=\"Sightings\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/reports/sql/category_life_list.sql b/reports/sql/category_life_list.sql new file mode 100644 index 0000000..753cba0 --- /dev/null +++ b/reports/sql/category_life_list.sql @@ -0,0 +1,6 @@ +SELECT l.Name AS 'Location', sp.Name AS 'Species', c.Name AS 'Category', DATE( s.Date ) AS 'Date', IFNULL( s.Number, 1 ) AS 'Count' +FROM SIGHTINGS s +INNER JOIN SPECIES sp ON sp.Id = s.SpeciesId +INNER JOIN CATEGORIES c ON c.Id = sp.CategoryId +INNER JOIN LOCATIONS l ON l.Id = s.LocationId +WHERE c.Name = "$CATEGORY"; diff --git a/reports/sql/sightings.sql b/reports/sql/sightings.sql index 52b63a8..4258332 100644 --- a/reports/sql/sightings.sql +++ b/reports/sql/sightings.sql @@ -1,4 +1,4 @@ -SELECT l.Name AS 'Location', sp.Name AS 'Species', sp.Name AS 'Category', DATE( s.Date ) AS 'Date', IFNULL( s.Number, 1 ) AS 'Count' +SELECT l.Name AS 'Location', sp.Name AS 'Species', c.Name AS 'Category', DATE( s.Date ) AS 'Date', IFNULL( s.Number, 1 ) AS 'Count' FROM SIGHTINGS s INNER JOIN SPECIES sp ON sp.Id = s.SpeciesId INNER JOIN CATEGORIES c ON c.Id = sp.CategoryId diff --git a/reports/sql/species_year_on_year.sql b/reports/sql/species_year_on_year.sql index b6c61aa..1a91d51 100644 --- a/reports/sql/species_year_on_year.sql +++ b/reports/sql/species_year_on_year.sql @@ -1,4 +1,4 @@ -SELECT l.Name AS 'Location', sp.Name AS 'Species', sp.Name AS 'Category', DATE( s.Date ) AS 'Date', IFNULL( s.Number, 1 ) AS 'Count' +SELECT l.Name AS 'Location', sp.Name AS 'Species', c.Name AS 'Category', DATE( s.Date ) AS 'Date', IFNULL( s.Number, 1 ) AS 'Count' FROM SIGHTINGS s INNER JOIN SPECIES sp ON sp.Id = s.SpeciesId INNER JOIN CATEGORIES c ON c.Id = sp.CategoryId