From 486f3742dc6900ca326395d5906191d9d7646068 Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Mon, 28 Jul 2025 17:06:40 +0200 Subject: [PATCH 01/37] custom classifier are read again, more testing needed to ensure old models can still be run with this --- birdnet_analyzer/analyze/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/birdnet_analyzer/analyze/core.py b/birdnet_analyzer/analyze/core.py index d55f7c0e..7cdbbb4f 100644 --- a/birdnet_analyzer/analyze/core.py +++ b/birdnet_analyzer/analyze/core.py @@ -222,7 +222,7 @@ def _set_params( if not os.path.isfile(cfg.LABELS_FILE): cfg.LABELS_FILE = custom_classifier.replace("Model_FP32.tflite", "Labels.txt") - if not custom_classifier.endswith("Model_FP32.tflite") or not os.path.isfile(cfg.LABELS_FILE): + if not os.path.isfile(cfg.LABELS_FILE): cfg.LABELS_FILE = None cfg.LABELS = None else: From af79aeb0b7ed71cadfe280af6779934862b92fe6 Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Tue, 29 Jul 2025 13:30:20 +0200 Subject: [PATCH 02/37] added test for custom classifier training and prediction, changed behaviour in the gui to align with the cli --- birdnet_analyzer/analyze/core.py | 4 ++-- birdnet_analyzer/gui/utils.py | 6 ++--- tests/train/test_train.py | 41 +++++++++++++++++++++++++++++++- 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/birdnet_analyzer/analyze/core.py b/birdnet_analyzer/analyze/core.py index 7cdbbb4f..195a8689 100644 --- a/birdnet_analyzer/analyze/core.py +++ b/birdnet_analyzer/analyze/core.py @@ -219,10 +219,10 @@ def _set_params( if custom_classifier.endswith(".tflite"): cfg.LABELS_FILE = custom_classifier.replace(".tflite", "_Labels.txt") # same for labels file - if not os.path.isfile(cfg.LABELS_FILE): + if not os.path.isfile(cfg.LABELS_FILE): # if the label file is not found, an old birdnet model might be used cfg.LABELS_FILE = custom_classifier.replace("Model_FP32.tflite", "Labels.txt") - if not os.path.isfile(cfg.LABELS_FILE): + if not os.path.isfile(cfg.LABELS_FILE): # if the label file is still not found, dont use labels cfg.LABELS_FILE = None cfg.LABELS = None else: diff --git a/birdnet_analyzer/gui/utils.py b/birdnet_analyzer/gui/utils.py index 81eef0e8..76d0361b 100644 --- a/birdnet_analyzer/gui/utils.py +++ b/birdnet_analyzer/gui/utils.py @@ -605,10 +605,10 @@ def on_custom_classifier_selection_click(): if not os.path.isfile(labels): labels = file.replace("Model_FP32.tflite", "Labels.txt") - if not file.endswith("Model_FP32.tflite") or not os.path.isfile(labels): - gr.Warning(loc.localize("species-list-custom-classifier-no-labelfile-warning")) + if not os.path.isfile(labels): + gr.Warning(loc.localize("species-list-custom-classifier-no-labelfile-warning")) - return file, gr.File(value=[file], visible=True) + return file, gr.File(value=[file], visible=True) return file, gr.File(value=[file, labels], visible=True) diff --git a/tests/train/test_train.py b/tests/train/test_train.py index b270c2a1..e70412a8 100644 --- a/tests/train/test_train.py +++ b/tests/train/test_train.py @@ -3,12 +3,16 @@ import tempfile from unittest.mock import patch +import librosa import pytest +import soundfile as sf import birdnet_analyzer.config as cfg +from birdnet_analyzer.analyze.core import analyze from birdnet_analyzer.cli import train_parser from birdnet_analyzer.train.core import train +from random import randint @pytest.fixture def setup_test_environment(): @@ -20,7 +24,7 @@ def setup_test_environment(): os.makedirs(input_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True) - classifier_output = os.path.join(output_dir, "classifier_output") + classifier_output = os.path.join(output_dir, "classifier_output", "custom_classifier.tflite") # Store original config values original_config = { @@ -55,3 +59,38 @@ def test_train_cli(mock_train_model, mock_ensure_model, setup_test_environment): mock_ensure_model.assert_called_once() mock_train_model.assert_called_once_with() + +def test_training(setup_test_environment): + """Test the training process and prediction with dummy data.""" + env = setup_test_environment + + dummy_classes = ["Dummy A", "Dummy B"] + subfolders = dummy_classes.copy() + subfolders.append("Background") + + for sub in subfolders: + subfolder_path = os.path.join(env["input_dir"], sub) + os.makedirs(subfolder_path, exist_ok=True) + # Create dummy files in each subfolder + for i in range(10): + file_path = os.path.join(subfolder_path, f"audio_{i}.wav") + with open(file_path, "wb") as f: + audio = librosa.tone(randint(20, 20000), length=3.0, sr=44100) + sf.write(f, audio, 44100, format="WAV") + + train(env["input_dir"], env["classifier_output"]) + + assert os.path.isfile(env["classifier_output"]), "Classifier output file was not created." + assert os.path.exists(env["classifier_output"].replace(".tflite", "_Labels.txt")), "Labels file was not created." + assert os.path.exists(env["classifier_output"].replace(".tflite", "_Params.csv")), "Params file was not created." + assert os.path.exists(env["classifier_output"].replace(".tflite", ".tflite_sample_counts.csv")), "Params file was not created." + + soundscape_path = "birdnet_analyzer/example/soundscape.wav" + analyze(soundscape_path, env["output_dir"], top_n=1, classifier=env["classifier_output"]) + + output_file = os.path.join(env["output_dir"], "soundscape.BirdNET.selection.table.txt") + with open(output_file) as f: + lines = f.readlines()[1:] + for line in lines: + parts = line.strip().split("\t") + assert parts[7] in dummy_classes, f"Detected class {parts[7]} not in expected classes {dummy_classes}" \ No newline at end of file From 81b65088aacfcf9ac57261286ada1c4b34314ef8 Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Tue, 29 Jul 2025 13:33:12 +0200 Subject: [PATCH 03/37] ruff fix --- tests/train/test_train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/train/test_train.py b/tests/train/test_train.py index e70412a8..1d3afcba 100644 --- a/tests/train/test_train.py +++ b/tests/train/test_train.py @@ -1,6 +1,7 @@ import os import shutil import tempfile +from random import randint from unittest.mock import patch import librosa @@ -12,7 +13,6 @@ from birdnet_analyzer.cli import train_parser from birdnet_analyzer.train.core import train -from random import randint @pytest.fixture def setup_test_environment(): @@ -93,4 +93,4 @@ def test_training(setup_test_environment): lines = f.readlines()[1:] for line in lines: parts = line.strip().split("\t") - assert parts[7] in dummy_classes, f"Detected class {parts[7]} not in expected classes {dummy_classes}" \ No newline at end of file + assert parts[7] in dummy_classes, f"Detected class {parts[7]} not in expected classes {dummy_classes}" From 6edde5280b81f4657c1fbfe4a98d8887b8b77ef4 Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Tue, 29 Jul 2025 15:06:37 +0200 Subject: [PATCH 04/37] timeout increase for test --- tests/train/test_train.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/train/test_train.py b/tests/train/test_train.py index 1d3afcba..fe064832 100644 --- a/tests/train/test_train.py +++ b/tests/train/test_train.py @@ -60,6 +60,7 @@ def test_train_cli(mock_train_model, mock_ensure_model, setup_test_environment): mock_ensure_model.assert_called_once() mock_train_model.assert_called_once_with() +@pytest.mark.timeout(300) # Increase timeout for training def test_training(setup_test_environment): """Test the training process and prediction with dummy data.""" env = setup_test_environment From 2f52188fcef5be9948f9e6532827dfd6787ea7e1 Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Tue, 29 Jul 2025 15:17:55 +0200 Subject: [PATCH 05/37] . --- tests/train/test_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/train/test_train.py b/tests/train/test_train.py index fe064832..e63de61c 100644 --- a/tests/train/test_train.py +++ b/tests/train/test_train.py @@ -60,7 +60,7 @@ def test_train_cli(mock_train_model, mock_ensure_model, setup_test_environment): mock_ensure_model.assert_called_once() mock_train_model.assert_called_once_with() -@pytest.mark.timeout(300) # Increase timeout for training +@pytest.mark.timeout(600) # Increase timeout for training def test_training(setup_test_environment): """Test the training process and prediction with dummy data.""" env = setup_test_environment From 266af4d62d231194fa1bc17edc5e8677175bab3c Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Tue, 29 Jul 2025 15:32:28 +0200 Subject: [PATCH 06/37] . --- tests/train/test_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/train/test_train.py b/tests/train/test_train.py index e63de61c..15356278 100644 --- a/tests/train/test_train.py +++ b/tests/train/test_train.py @@ -60,7 +60,7 @@ def test_train_cli(mock_train_model, mock_ensure_model, setup_test_environment): mock_ensure_model.assert_called_once() mock_train_model.assert_called_once_with() -@pytest.mark.timeout(600) # Increase timeout for training +@pytest.mark.timeout(6000) # Increase timeout for training def test_training(setup_test_environment): """Test the training process and prediction with dummy data.""" env = setup_test_environment From e597a62545e59a9f6dc6f1e8690af3b79d6c95e7 Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Wed, 30 Jul 2025 13:33:08 +0200 Subject: [PATCH 07/37] . --- tests/train/test_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/train/test_train.py b/tests/train/test_train.py index 15356278..5df72fa7 100644 --- a/tests/train/test_train.py +++ b/tests/train/test_train.py @@ -79,7 +79,7 @@ def test_training(setup_test_environment): audio = librosa.tone(randint(20, 20000), length=3.0, sr=44100) sf.write(f, audio, 44100, format="WAV") - train(env["input_dir"], env["classifier_output"]) + train(env["input_dir"], env["classifier_output"], threads=1) assert os.path.isfile(env["classifier_output"]), "Classifier output file was not created." assert os.path.exists(env["classifier_output"].replace(".tflite", "_Labels.txt")), "Labels file was not created." From 8defe2ceff483a14408df695a160196b557d1d11 Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Wed, 30 Jul 2025 14:06:01 +0200 Subject: [PATCH 08/37] . --- .github/workflows/ci.yml | 2 +- tests/train/test_train.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ccfd1041..26f2a579 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,4 +35,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest + python -m pytest -u diff --git a/tests/train/test_train.py b/tests/train/test_train.py index 5df72fa7..e63de61c 100644 --- a/tests/train/test_train.py +++ b/tests/train/test_train.py @@ -60,7 +60,7 @@ def test_train_cli(mock_train_model, mock_ensure_model, setup_test_environment): mock_ensure_model.assert_called_once() mock_train_model.assert_called_once_with() -@pytest.mark.timeout(6000) # Increase timeout for training +@pytest.mark.timeout(600) # Increase timeout for training def test_training(setup_test_environment): """Test the training process and prediction with dummy data.""" env = setup_test_environment @@ -79,7 +79,7 @@ def test_training(setup_test_environment): audio = librosa.tone(randint(20, 20000), length=3.0, sr=44100) sf.write(f, audio, 44100, format="WAV") - train(env["input_dir"], env["classifier_output"], threads=1) + train(env["input_dir"], env["classifier_output"]) assert os.path.isfile(env["classifier_output"]), "Classifier output file was not created." assert os.path.exists(env["classifier_output"].replace(".tflite", "_Labels.txt")), "Labels file was not created." From 111e13d5a88b688d5bcb490a10f71458e792bf67 Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Wed, 30 Jul 2025 14:27:11 +0200 Subject: [PATCH 09/37] . --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 26f2a579..08671dda 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,4 +35,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -u + python -u -m pytest From a1951c7a031e98dfdb2a8387ef9ea80c338a3ef4 Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Wed, 30 Jul 2025 14:54:37 +0200 Subject: [PATCH 10/37] . --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 08671dda..95e52a53 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,4 +35,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -u -m pytest + python -m pytest -s From 7816a7d4ef16b7426b4873778508d09f1215bb1c Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Wed, 30 Jul 2025 15:16:06 +0200 Subject: [PATCH 11/37] . --- birdnet_analyzer/train/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/birdnet_analyzer/train/utils.py b/birdnet_analyzer/train/utils.py index 10d56d13..1794a299 100644 --- a/birdnet_analyzer/train/utils.py +++ b/birdnet_analyzer/train/utils.py @@ -60,6 +60,8 @@ def _load_audio_file(f, label_vector, config): # Try to load the audio file try: + print("\tLoading file:", f, flush=True) + # Load audio sig, rate = audio.open_audio_file( f, From 5115c9f5681a3f14985624560fd11883b39b2f5d Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Wed, 30 Jul 2025 15:26:36 +0200 Subject: [PATCH 12/37] . --- birdnet_analyzer/audio.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/birdnet_analyzer/audio.py b/birdnet_analyzer/audio.py index 45ccb361..3407c472 100644 --- a/birdnet_analyzer/audio.py +++ b/birdnet_analyzer/audio.py @@ -27,6 +27,9 @@ def open_audio_file(path: str, sample_rate=48000, offset=0.0, duration=None, fmi Returns: Returns the audio time series and the sampling rate. """ + + print(f"Opening audio file: {path}", flush=True) + # Open file with librosa (uses ffmpeg or libav) if speed == 1.0: sig, rate = librosa.load( @@ -41,6 +44,9 @@ def open_audio_file(path: str, sample_rate=48000, offset=0.0, duration=None, fmi sig = librosa.resample(sig, orig_sr=int(rate * speed), target_sr=sample_rate, res_type="kaiser_fast") rate = sample_rate + + print(f"File opened: {path}", flush=True) + # Bandpass filter if fmin is not None and fmax is not None: sig = bandpass(sig, rate, fmin, fmax) From edaa70be7fc3b867843b288665923b77b24a1673 Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Wed, 30 Jul 2025 15:41:24 +0200 Subject: [PATCH 13/37] . --- birdnet_analyzer/train/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/birdnet_analyzer/train/utils.py b/birdnet_analyzer/train/utils.py index 1794a299..1d840ad9 100644 --- a/birdnet_analyzer/train/utils.py +++ b/birdnet_analyzer/train/utils.py @@ -79,6 +79,8 @@ def _load_audio_file(f, label_vector, config): print(f"\t {e}", flush=True) return np.array([]), np.array([]) + print("Cropping audio file...", flush=True) + # Crop training samples if cfg.SAMPLE_CROP_MODE == "center": sig_splits = [audio.crop_center(sig, rate, cfg.SIG_LENGTH)] @@ -90,6 +92,9 @@ def _load_audio_file(f, label_vector, config): else: sig_splits = audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN) + + print("Extracting segments...", flush=True) + # Get feature embeddings batch_size = 1 # turns out that batch size 1 is the fastest, probably because of having to resize the model input when the number of samples in a batch changes for i in range(0, len(sig_splits), batch_size): @@ -101,6 +106,8 @@ def _load_audio_file(f, label_vector, config): x_train.extend(embeddings) y_train.extend(batch_label) + print(f"Loaded {len(x_train)} samples from {f}", flush=True) + return x_train, y_train From c65dad1bd1303550e9b53eb41331356de158468c Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Wed, 30 Jul 2025 15:55:16 +0200 Subject: [PATCH 14/37] . --- birdnet_analyzer/audio.py | 5 ----- birdnet_analyzer/model.py | 6 ++++++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/birdnet_analyzer/audio.py b/birdnet_analyzer/audio.py index 3407c472..decc97d0 100644 --- a/birdnet_analyzer/audio.py +++ b/birdnet_analyzer/audio.py @@ -28,8 +28,6 @@ def open_audio_file(path: str, sample_rate=48000, offset=0.0, duration=None, fmi Returns the audio time series and the sampling rate. """ - print(f"Opening audio file: {path}", flush=True) - # Open file with librosa (uses ffmpeg or libav) if speed == 1.0: sig, rate = librosa.load( @@ -44,9 +42,6 @@ def open_audio_file(path: str, sample_rate=48000, offset=0.0, duration=None, fmi sig = librosa.resample(sig, orig_sr=int(rate * speed), target_sr=sample_rate, res_type="kaiser_fast") rate = sample_rate - - print(f"File opened: {path}", flush=True) - # Bandpass filter if fmin is not None and fmax is not None: sig = bandpass(sig, rate, fmin, fmax) diff --git a/birdnet_analyzer/model.py b/birdnet_analyzer/model.py index 8f265640..c682dbe0 100644 --- a/birdnet_analyzer/model.py +++ b/birdnet_analyzer/model.py @@ -1198,16 +1198,22 @@ def embeddings(sample): The embeddings. """ + print("Embeddings: Loading model...", flush=True) + load_model(False) sample = np.array(sample, dtype="float32") + + print("Reshaping input Tensor...", flush=True) # Reshape input tensor INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape]) INTERPRETER.allocate_tensors() + print("Extracting feature embeddings...", flush=True) # Extract feature embeddings INTERPRETER.set_tensor(INPUT_LAYER_INDEX, sample) INTERPRETER.invoke() + print("Returning result...", flush=True) return INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX) From abbf6011efb368588e522375f1fadc650decc6ac Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Wed, 30 Jul 2025 16:17:05 +0200 Subject: [PATCH 15/37] removed logging --- birdnet_analyzer/model.py | 7 ------- birdnet_analyzer/train/utils.py | 8 -------- 2 files changed, 15 deletions(-) diff --git a/birdnet_analyzer/model.py b/birdnet_analyzer/model.py index c682dbe0..73146e95 100644 --- a/birdnet_analyzer/model.py +++ b/birdnet_analyzer/model.py @@ -1197,23 +1197,16 @@ def embeddings(sample): Returns: The embeddings. """ - - print("Embeddings: Loading model...", flush=True) - load_model(False) sample = np.array(sample, dtype="float32") - - print("Reshaping input Tensor...", flush=True) # Reshape input tensor INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape]) INTERPRETER.allocate_tensors() - print("Extracting feature embeddings...", flush=True) # Extract feature embeddings INTERPRETER.set_tensor(INPUT_LAYER_INDEX, sample) INTERPRETER.invoke() - print("Returning result...", flush=True) return INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX) diff --git a/birdnet_analyzer/train/utils.py b/birdnet_analyzer/train/utils.py index 1d840ad9..ca88a2d3 100644 --- a/birdnet_analyzer/train/utils.py +++ b/birdnet_analyzer/train/utils.py @@ -60,8 +60,6 @@ def _load_audio_file(f, label_vector, config): # Try to load the audio file try: - print("\tLoading file:", f, flush=True) - # Load audio sig, rate = audio.open_audio_file( f, @@ -79,8 +77,6 @@ def _load_audio_file(f, label_vector, config): print(f"\t {e}", flush=True) return np.array([]), np.array([]) - print("Cropping audio file...", flush=True) - # Crop training samples if cfg.SAMPLE_CROP_MODE == "center": sig_splits = [audio.crop_center(sig, rate, cfg.SIG_LENGTH)] @@ -93,8 +89,6 @@ def _load_audio_file(f, label_vector, config): sig_splits = audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN) - print("Extracting segments...", flush=True) - # Get feature embeddings batch_size = 1 # turns out that batch size 1 is the fastest, probably because of having to resize the model input when the number of samples in a batch changes for i in range(0, len(sig_splits), batch_size): @@ -106,8 +100,6 @@ def _load_audio_file(f, label_vector, config): x_train.extend(embeddings) y_train.extend(batch_label) - print(f"Loaded {len(x_train)} samples from {f}", flush=True) - return x_train, y_train From e8cf558aabe3b9338c93765515c03d7c22668bdd Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Wed, 30 Jul 2025 16:38:05 +0200 Subject: [PATCH 16/37] added real training data from test-data submodule --- tests/data | 2 +- tests/train/test_train.py | 23 ++++++++--------------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/tests/data b/tests/data index b43d4283..d6871b77 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit b43d4283fe0d24f63d5d460584e608094ead2879 +Subproject commit d6871b77a0a1e8396d96cbbb29c4dc2a2292f2ab diff --git a/tests/train/test_train.py b/tests/train/test_train.py index e63de61c..9d4a0708 100644 --- a/tests/train/test_train.py +++ b/tests/train/test_train.py @@ -64,22 +64,15 @@ def test_train_cli(mock_train_model, mock_ensure_model, setup_test_environment): def test_training(setup_test_environment): """Test the training process and prediction with dummy data.""" env = setup_test_environment + training_data_input = "tests/data/training" - dummy_classes = ["Dummy A", "Dummy B"] - subfolders = dummy_classes.copy() - subfolders.append("Background") - - for sub in subfolders: - subfolder_path = os.path.join(env["input_dir"], sub) - os.makedirs(subfolder_path, exist_ok=True) - # Create dummy files in each subfolder - for i in range(10): - file_path = os.path.join(subfolder_path, f"audio_{i}.wav") - with open(file_path, "wb") as f: - audio = librosa.tone(randint(20, 20000), length=3.0, sr=44100) - sf.write(f, audio, 44100, format="WAV") - - train(env["input_dir"], env["classifier_output"]) + # Read class names from subfolders in the input directory, filtering out background classes + dummy_classes = [ + d for d in os.listdir(training_data_input) + if os.path.isdir(os.path.join(training_data_input, d)) and d.lower() not in cfg.NON_EVENT_CLASSES + ] + + train(training_data_input, env["classifier_output"]) assert os.path.isfile(env["classifier_output"]), "Classifier output file was not created." assert os.path.exists(env["classifier_output"].replace(".tflite", "_Labels.txt")), "Labels file was not created." From 672d79d0a1a05dac01d994a920bbeae4aab82a91 Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Wed, 30 Jul 2025 16:41:24 +0200 Subject: [PATCH 17/37] ruff fixes --- tests/train/test_train.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/train/test_train.py b/tests/train/test_train.py index 9d4a0708..31dedbe9 100644 --- a/tests/train/test_train.py +++ b/tests/train/test_train.py @@ -1,12 +1,9 @@ import os import shutil import tempfile -from random import randint from unittest.mock import patch -import librosa import pytest -import soundfile as sf import birdnet_analyzer.config as cfg from birdnet_analyzer.analyze.core import analyze From bec7510084c53732ca6b3b3e1ab1699d9a8b11ec Mon Sep 17 00:00:00 2001 From: Max Mauermann Date: Wed, 30 Jul 2025 17:28:16 +0200 Subject: [PATCH 18/37] simple embeddings test --- tests/embeddings/test_embeddings.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/embeddings/test_embeddings.py b/tests/embeddings/test_embeddings.py index 9b8a4e5b..7ad8db7f 100644 --- a/tests/embeddings/test_embeddings.py +++ b/tests/embeddings/test_embeddings.py @@ -4,9 +4,11 @@ import tempfile from unittest.mock import MagicMock, patch +import numpy as np import pytest import birdnet_analyzer.config as cfg +from birdnet_analyzer import model from birdnet_analyzer.cli import embeddings_parser from birdnet_analyzer.embeddings.core import embeddings @@ -53,3 +55,16 @@ def test_embeddings_cli(mock_run_embeddings: MagicMock, mock_ensure_model: Magic mock_ensure_model.assert_called_once() threads = min(8, max(1, multiprocessing.cpu_count() // 2)) mock_run_embeddings.assert_called_once_with(env["input_dir"], env["output_dir"], 0, 1.0, 0, 15000, threads, 1, None) + + +def test_model_embeddings_function_returns_expected_shape(): + # Create a dummy sample (e.g., 1D numpy array of audio data) + sample = np.zeros(144000).astype(np.float32) + # Reshape the sample to (1, 144000) as expected by the model + sample = sample.reshape(1, 144000) + # Call the embeddings function + result = model.embeddings(sample) + + # Check that result is a numpy array and has expected shape (depends on model, e.g., (1, embedding_dim)) + assert isinstance(result, np.ndarray) + assert result.ndim == 2 From c0fffda495840bfc92be9248fa78a94da4e3932e Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 14:03:43 +0200 Subject: [PATCH 19/37] minmizing for testing --- .github/workflows/ci.yml | 2 +- tests/train/test_train.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6e0fe15f..f4f51b0c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s + python -m pytest -s tests/train/test_train.py::test_training diff --git a/tests/train/test_train.py b/tests/train/test_train.py index 31dedbe9..a2bc79a8 100644 --- a/tests/train/test_train.py +++ b/tests/train/test_train.py @@ -18,8 +18,9 @@ def setup_test_environment(): input_dir = os.path.join(test_dir, "input") output_dir = os.path.join(test_dir, "output") - os.makedirs(input_dir, exist_ok=True) - os.makedirs(output_dir, exist_ok=True) + # Directory should not exist, so no exist_ok=True + os.makedirs(input_dir) + os.makedirs(output_dir) classifier_output = os.path.join(output_dir, "classifier_output", "custom_classifier.tflite") From cc5a9160da373ac25d248490aa020fd897deedbb Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 14:14:03 +0200 Subject: [PATCH 20/37] enabling more tests --- .github/workflows/ci.yml | 2 +- tests/train/test_train.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f4f51b0c..5148867c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/train/test_train.py::test_training + python -m pytest -s tests/train/test_train.py diff --git a/tests/train/test_train.py b/tests/train/test_train.py index a2bc79a8..bbc89fb8 100644 --- a/tests/train/test_train.py +++ b/tests/train/test_train.py @@ -58,7 +58,7 @@ def test_train_cli(mock_train_model, mock_ensure_model, setup_test_environment): mock_ensure_model.assert_called_once() mock_train_model.assert_called_once_with() -@pytest.mark.timeout(600) # Increase timeout for training +@pytest.mark.timeout(400) # Increase timeout for training, 400s should be sufficient, win is by far the slowest def test_training(setup_test_environment): """Test the training process and prediction with dummy data.""" env = setup_test_environment From e16be511e1a02c8eab330686499ff3b1f5bcc17b Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 14:22:26 +0200 Subject: [PATCH 21/37] so far so good, more tests enabled --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5148867c..c01f5201 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/train/test_train.py + python -m pytest -s tests/train/ tests/analyze/ From d8983545e7f00e539122526baccc6ea4ca5b735a Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 14:30:27 +0200 Subject: [PATCH 22/37] so far so good, more tests enabled --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c01f5201..626af313 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/train/ tests/analyze/ + python -m pytest -s tests/train/ tests/analyze/ tests/embeddings/ From 703edc9c4d0728bd298f7d50dca44781710d9b49 Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 14:38:38 +0200 Subject: [PATCH 23/37] so far so good, more tests enabled --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 626af313..89c2c202 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/train/ tests/analyze/ tests/embeddings/ + python -m pytest -s tests/train/ tests/analyze/ tests/embeddings/ tests/evaluation/ From 7416e638f8fb052551974c65bad58a86c3ea72af Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 14:45:30 +0200 Subject: [PATCH 24/37] so far so good, more tests enabled --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 89c2c202..19540b8f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/train/ tests/analyze/ tests/embeddings/ tests/evaluation/ + python -m pytest -s tests/train/ tests/analyze/ tests/embeddings/ tests/evaluation/ tests/gui/ From 1ce159edff0f9d5714542f8edec6a55d36ffb357 Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 14:52:40 +0200 Subject: [PATCH 25/37] so far so good, more tests enabled --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 19540b8f..07faa13a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/train/ tests/analyze/ tests/embeddings/ tests/evaluation/ tests/gui/ + python -m pytest -s tests/train/ tests/analyze/ tests/embeddings/ tests/evaluation/ tests/gui/ tests/segments/ From 840099f6f0bb509d6e9922f7582b164e9088710a Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 15:05:39 +0200 Subject: [PATCH 26/37] so far so good, more tests enabled --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 07faa13a..c14064b6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/train/ tests/analyze/ tests/embeddings/ tests/evaluation/ tests/gui/ tests/segments/ + python -m pytest -s tests/train/ tests/analyze/ tests/embeddings/ tests/evaluation/ tests/gui/ tests/segments/ tests/species/ From c53455e0eba857bb99f1026a6b4900c40f0fce85 Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 15:13:19 +0200 Subject: [PATCH 27/37] so far so good, all tests enabled --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c14064b6..b22903bb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/train/ tests/analyze/ tests/embeddings/ tests/evaluation/ tests/gui/ tests/segments/ tests/species/ + python -m pytest -s tests/train/ tests/analyze/ tests/embeddings/ tests/evaluation/ tests/gui/ tests/segments/ tests/species/ tests/test_utils.py From a0b72427e39dc3ed5872c8058ba62fb57e92b773 Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 15:21:11 +0200 Subject: [PATCH 28/37] so far so good, all tests enabled --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b22903bb..6e0fe15f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/train/ tests/analyze/ tests/embeddings/ tests/evaluation/ tests/gui/ tests/segments/ tests/species/ tests/test_utils.py + python -m pytest -s From 7cc96fb47fd67d93b0f8c1d1d8360cfe020e564f Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 15:42:13 +0200 Subject: [PATCH 29/37] i hate this --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6e0fe15f..90119710 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s + python -m pytest -s tests/ From 84eb30c61ea674252e26ada02f62884e2f0bab92 Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 16:22:43 +0200 Subject: [PATCH 30/37] Moved failing test to back, see if that triggers it --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 90119710..3d195f36 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/ + python -m pytest -s tests/analyze/ tests/embeddings/ tests/evaluation/ tests/gui/ tests/segments/ tests/species/ tests/test_utils.py tests/train/ From 68366a35625dca8c69e6b9152e807b41cac67e27 Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 16:30:46 +0200 Subject: [PATCH 31/37] The ordering triggers it, checking interactions --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3d195f36..fe3bc431 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/analyze/ tests/embeddings/ tests/evaluation/ tests/gui/ tests/segments/ tests/species/ tests/test_utils.py tests/train/ + python -m pytest -s tests/analyze/ tests/train/ From 6444e9055d2afec68a9e23bd6056059d4c89083d Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 16:38:48 +0200 Subject: [PATCH 32/37] analyze Yes --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe3bc431..e23c472e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/analyze/ tests/train/ + python -m pytest -s tests/embeddings/ tests/train/ From 97cc0a9e78312db82a0e0b4da0f122a05c3397ed Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 16:43:52 +0200 Subject: [PATCH 33/37] embeddings No --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e23c472e..742c9193 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/embeddings/ tests/train/ + python -m pytest -s tests/evaluation/ tests/train/ From 6f3f4da785396a217e61ba299c8ab4fe26a4715f Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 16:48:25 +0200 Subject: [PATCH 34/37] evaluation No --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 742c9193..2ffa31cb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/evaluation/ tests/train/ + python -m pytest -s tests/gui/ tests/train/ From 6d97842dcced6a1a987e644edf0e42db42af2752 Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 16:53:15 +0200 Subject: [PATCH 35/37] gui No --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2ffa31cb..37d77a2b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/gui/ tests/train/ + python -m pytest -s tests/segments/ tests/train/ From c4582ecc81ba567eaad951c072ddfd62d93ebaf9 Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 16:56:59 +0200 Subject: [PATCH 36/37] segments No --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 37d77a2b..54f35db6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/segments/ tests/train/ + python -m pytest -s tests/species/ tests/train/ From 5f29fcb4938d24ee86204ed47dd504602c9a65b1 Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Thu, 31 Jul 2025 17:00:32 +0200 Subject: [PATCH 37/37] species No --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 54f35db6..72be78d9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,4 +37,4 @@ jobs: run: | python -m pip install .[tests] python -m birdnet_analyzer.utils - python -m pytest -s tests/species/ tests/train/ + python -m pytest -s tests/test_utils.py tests/train/