From b185d61de3dfd3bab7e7d6fa403e5b1b51b25e01 Mon Sep 17 00:00:00 2001 From: David Walker Date: Fri, 18 Apr 2025 10:04:36 +0100 Subject: [PATCH] Added the location richness interactive map --- .../annual_category_location_heatmap.ipynb | 11 +- reports/location_richness_map.ipynb | 198 ++++++++++++++++++ reports/requirements.txt | 4 + reports/sql/richness.sql | 8 + reports/sql/richness_by_year.sql | 9 + 5 files changed, 227 insertions(+), 3 deletions(-) create mode 100644 reports/location_richness_map.ipynb create mode 100644 reports/sql/richness.sql create mode 100644 reports/sql/richness_by_year.sql diff --git a/reports/annual_category_location_heatmap.ipynb b/reports/annual_category_location_heatmap.ipynb index 5c1b42d..a63f9a3 100644 --- a/reports/annual_category_location_heatmap.ipynb +++ b/reports/annual_category_location_heatmap.ipynb @@ -114,8 +114,8 @@ "heatmap_data.columns = [calendar.month_abbr[m] for m in heatmap_data.columns]\n", "\n", "# Export the heatmap data to Excel\n", - "locations_list = \"-\".join(locations).replace(\" \", \"-\")\n", - "clean_locations = re.sub(\"[^0-9a-zA-Z\\-]+\", \"\", locations_list)\n", + "locations_list = \"-\".join(locations)\n", + "clean_locations = re.sub(\"[^0-9a-zA-Z ]+\", \"\", locations_list).replace(\" \", \"-\")\n", "export_file_path = export_folder_path / f\"{year}-{category}-{clean_locations}-Heatmap.xlsx\"\n", "heatmap_data.to_excel(export_file_path.absolute(), sheet_name=\"Sightings\")\n", "\n", @@ -176,7 +176,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.13.2" + }, + "vscode": { + "interpreter": { + "hash": "f085c86085609b1ab2f295d8cd5b519618e19fd591a6919f4ec2f9290a6745f6" + } } }, "nbformat": 4, diff --git a/reports/location_richness_map.ipynb b/reports/location_richness_map.ipynb new file mode 100644 index 0000000..9ddb6bd --- /dev/null +++ b/reports/location_richness_map.ipynb @@ -0,0 +1,198 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Species Richness by Location Map\n", + "\n", + "This notebook generates and exports an HTML format interactive map of species richness at reported locations. To use it, update the country and, optionally, the year in the first code cell, below, before running the notebook.\n", + "\n", + "If the year is specified, only locations reported in that year will be included.\n", + "\n", + "If the year is left blank, all locations reported in the specified country for all time will be included." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Year to report on. Optional but if specified, should be in YYYY format e.g. 2025\n", + "year = \"\"\n", + "\n", + "# Country to report on\n", + "country = \"\"\n", + "\n", + "# Initial Zoom Level for the map\n", + "zoom = 6" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import sqlparse\n", + "\n", + "# Select the query file based on whether the year is specified\n", + "query_file_name = \"richness_by_year.sql\" if year else \"richness.sql\"\n", + "\n", + "# Read the query file\n", + "query_file_path = Path(\"sql\") / query_file_name\n", + "with open(query_file_path.absolute(), \"r\") as f:\n", + " query = f.read().replace(\"\\n\", \" \")\n", + "\n", + "# Replace the country and year placeholders\n", + "query = query.replace(\"$YEAR\", year) \\\n", + " .replace(\"$COUNTRY\", country)\n", + "\n", + "# Show a pretty-printed form of the query\n", + "print(sqlparse.format(query, reindent=True, keyword_case='upper'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import sqlite3\n", + "import os\n", + "\n", + "# Connect to the database, execute the query and read the results into a dataframe\n", + "database_path = os.environ[\"NATURE_RECORDER_DB\"]\n", + "connection = sqlite3.connect(database_path)\n", + "df = pd.read_sql_query(query, connection, parse_dates=[\"Date\"])\n", + "\n", + "# Check there is some data\n", + "if not df.shape[0]:\n", + " message = f\"No data found for country '{country}'\"\n", + " if year:\n", + " message += f\" during '{year}'\"\n", + " raise ValueError(message)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Make sure the latitude and longitude have been read as numbers, not strings\n", + "df[\"Latitude\"] = pd.to_numeric(df[\"Latitude\"], errors=\"coerce\")\n", + "df[\"Longitude\"] = pd.to_numeric(df[\"Longitude\"], errors=\"coerce\")\n", + "\n", + "# Calculate the data for the richness chart\n", + "richness = (\n", + " df.groupby([\"Location\", \"Latitude\", \"Longitude\"])[\"Species\"]\n", + " .nunique()\n", + " .reset_index()\n", + ")\n", + "\n", + "richness.columns = [\"Location\", \"Latitude\", \"Longitude\", \"Richness\"]\n", + "\n", + "# Print the richness data\n", + "with pd.option_context('display.max_rows', None,\n", + " 'display.max_columns', None,\n", + " 'display.precision', 3,\n", + " ):\n", + " display(richness)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "\n", + "# Create the folder to hold exported reports\n", + "export_folder_path = Path(\"exported\")\n", + "export_folder_path.mkdir(parents=True, exist_ok=True)\n", + "\n", + "# Export the richness data to Excel\n", + "clean_country = re.sub(\"[^0-9a-zA-Z ]+\", \"\", country).replace(\" \", \"-\")\n", + "export_file_name = f\"{year}-{clean_country}-Richness\" if year else f\"{clean_country}-Richness\"\n", + "export_file_path = export_folder_path / f\"{export_file_name}.xlsx\"\n", + "richness.to_excel(export_file_path.absolute(), sheet_name=\"Location Richness\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import folium\n", + "from folium import CircleMarker\n", + "from folium.plugins import MarkerCluster\n", + "\n", + "# Center map on the average lat/lon of all your locations\n", + "richness_map = folium.Map(\n", + " location=[richness[\"Latitude\"].mean(), richness[\"Longitude\"].mean()],\n", + " zoom_start=zoom,\n", + " tiles=\"cartodbpositron\"\n", + ")\n", + "\n", + "# Optional: cluster markers if you have many locations\n", + "marker_cluster = MarkerCluster().add_to(richness_map)\n", + "\n", + "# Normalize marker size based on richness\n", + "max_richness = richness[\"Richness\"].max()\n", + "\n", + "for _, row in richness.iterrows():\n", + " CircleMarker(\n", + " location=(row[\"Latitude\"], row[\"Longitude\"]),\n", + " radius=5 + 10 * (row[\"Richness\"] / max_richness), # Size scaled by richness\n", + " color=\"blue\",\n", + " fill=True,\n", + " fill_color=\"green\",\n", + " fill_opacity=0.6,\n", + " popup=f\"{row['Location']}
Richness: {row['Richness']}\",\n", + " ).add_to(marker_cluster)\n", + "\n", + "\n", + "# Export the map to HTML\n", + "export_file_path = export_folder_path / f\"{export_file_name}.html\"\n", + "richness_map.save(export_file_path.absolute())\n", + "\n", + "# Show the map\n", + "richness_map" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "f085c86085609b1ab2f295d8cd5b519618e19fd591a6919f4ec2f9290a6745f6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/reports/requirements.txt b/reports/requirements.txt index fc940a1..82609df 100644 --- a/reports/requirements.txt +++ b/reports/requirements.txt @@ -9,6 +9,7 @@ attrs==25.3.0 babel==2.17.0 beautifulsoup4==4.13.4 bleach==6.2.0 +branca==0.8.1 certifi==2025.1.31 cffi==1.17.1 charset-normalizer==3.4.1 @@ -21,6 +22,7 @@ defusedxml==0.7.1 et_xmlfile==2.0.0 executing==2.2.0 fastjsonschema==2.21.1 +folium==0.19.5 fonttools==4.57.0 fqdn==1.5.1 h11==0.14.0 @@ -91,6 +93,7 @@ rfc3986-validator==0.1.1 rpds-py==0.24.0 seaborn==0.13.2 Send2Trash==1.8.3 +setuptools==78.1.0 six==1.17.0 sniffio==1.3.1 soupsieve==2.6 @@ -110,3 +113,4 @@ webcolors==24.11.1 webencodings==0.5.1 websocket-client==1.8.0 widgetsnbextension==4.0.14 +xyzservices==2025.1.0 diff --git a/reports/sql/richness.sql b/reports/sql/richness.sql new file mode 100644 index 0000000..0336d38 --- /dev/null +++ b/reports/sql/richness.sql @@ -0,0 +1,8 @@ +SELECT l.Name AS 'Location', l.Latitude, l.Longitude, sp.Name AS 'Species', DATE( s.Date ) AS 'Date' +FROM SIGHTINGS s +INNER JOIN SPECIES sp ON sp.Id = s.SpeciesId +INNER JOIN CATEGORIES c ON c.Id = sp.CategoryId +INNER JOIN LOCATIONS l ON l.Id = s.LocationId +WHERE l.Country = '$COUNTRY' +AND l.Latitude IS NOT NULL +AND l.Longitude IS NOT NULL; diff --git a/reports/sql/richness_by_year.sql b/reports/sql/richness_by_year.sql new file mode 100644 index 0000000..634a102 --- /dev/null +++ b/reports/sql/richness_by_year.sql @@ -0,0 +1,9 @@ +SELECT l.Name AS 'Location', l.Latitude, l.Longitude, sp.Name AS 'Species', DATE( s.Date ) AS 'Date' +FROM SIGHTINGS s +INNER JOIN SPECIES sp ON sp.Id = s.SpeciesId +INNER JOIN CATEGORIES c ON c.Id = sp.CategoryId +INNER JOIN LOCATIONS l ON l.Id = s.LocationId +WHERE l.Country = '$COUNTRY' +AND l.Latitude IS NOT NULL +AND l.Longitude IS NOT NULL +AND s.Date LIKE '$YEAR-%';