From 41679006fb3833696d921fe19a362e8b7ceb48eb Mon Sep 17 00:00:00 2001 From: "trevor.stout" Date: Tue, 24 Feb 2026 13:57:08 -0500 Subject: [PATCH 1/4] preprocess now always returns metadata paths --- batbot/batbot_cli.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/batbot/batbot_cli.py b/batbot/batbot_cli.py index f108ef3..ee49094 100755 --- a/batbot/batbot_cli.py +++ b/batbot/batbot_cli.py @@ -319,8 +319,7 @@ def preprocess( ) data['output_path'].extend(output_paths) data['compressed_path'].extend(compressed_paths) - if process_metadata: - data['metadata_path'].append(metadata_path) + data['metadata_path'].append(metadata_path) except Exception as e: warnings.warn('WARNING: Pipeline failed for file {}'.format(file)) data['failed_files'].append((str(file), e)) @@ -356,8 +355,7 @@ def preprocess( ) data['output_path'].extend(output_paths) data['compressed_path'].extend(compressed_paths) - if process_metadata: - data['metadata_path'].extend(metadata_paths) + data['metadata_path'].extend(metadata_paths) data['failed_files'].extend(failed_files) if output_json is None: @@ -365,9 +363,8 @@ def preprocess( pprint.pp(sorted(data['output_path'])) print('\nCompressed spectrogram output paths:') pprint.pp(sorted(data['compressed_path'])) - if process_metadata: - print('\nProcessed metadata paths:') - pprint.pp(sorted(data['metadata_path'])) + print('\nProcessed metadata paths:') + pprint.pp(sorted(data['metadata_path'])) print('\nFiles skipped due to failure, and corresponding exceptions:') pprint.pp(sorted(data['failed_files'])) else: From 0444d4ca989c8801a4601c03c086cf628a4193af Mon Sep 17 00:00:00 2001 From: "trevor.stout" Date: Tue, 24 Feb 2026 13:57:33 -0500 Subject: [PATCH 2/4] preprocess testing --- tests/test_preprocess.py | 38 +++++++++++++++++++++++++++++++ tests/test_preprocess_parallel.py | 19 ++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 tests/test_preprocess.py create mode 100644 tests/test_preprocess_parallel.py diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py new file mode 100644 index 0000000..de19807 --- /dev/null +++ b/tests/test_preprocess.py @@ -0,0 +1,38 @@ +import json +import os + +from click.testing import CliRunner +from batbot.batbot_cli import preprocess + +def test_preprocess(): + """Test of batbot preprocess CLI ensuring the example files are processed without error. + Additionally, a regression test ensuring that the number of detected bat calls in the examples + does not decrease. The minumum numbers of bat call segments listed below correspond to the number + of bat call segments detected at the time of writing minus detected noise segments (noise counted by hand). + Note that this test uses "fast mode" processing, which is more permissive of low-amplitude calls and noise. + """ + runner = CliRunner() + data = runner.invoke(preprocess, ['examples', '-o', './output', '--force-overwrite']) + assert data.exit_code == 0 + # parse stdout to ensure example files were processed properly + num_examples = 4 + output_str = str(data.output).split('\n') + for ii in range(num_examples): + expected_file = './output/example{}.01of01.compressed.jpg'.format(ii+1) + assert any([expected_file in x for x in output_str]), 'Did not find file listed among outputs: {}'.format(expected_file) + assert os.path.exists(expected_file), 'Did not find file on filesystem: {}'.format(expected_file) + num_min_call_segments = [65, 18, 149, 47] + for ii in range(num_examples): + expected_file = './output/example{}.metadata.json'.format(ii+1) + assert any([expected_file in x for x in output_str]), 'Did not find file listed among outputs: {}'.format(expected_file) + assert os.path.exists(expected_file), 'Did not find file on filesystem: {}'.format(expected_file) + # load metadata file and ensure minimum number of call segments were detected + with open(expected_file) as hf: + data = json.load(hf) + n_segments = len(data['segments']) + err_str = 'Expected at least {} bat call segments in file {}, found only {} segments'.format( + num_min_call_segments[ii], + expected_file, + n_segments + ) + assert n_segments >= num_min_call_segments[ii], err_str diff --git a/tests/test_preprocess_parallel.py b/tests/test_preprocess_parallel.py new file mode 100644 index 0000000..b161f3b --- /dev/null +++ b/tests/test_preprocess_parallel.py @@ -0,0 +1,19 @@ +import os + +from click.testing import CliRunner +from batbot.batbot_cli import preprocess + +def test_preprocess_parallel(): + runner = CliRunner() + data = runner.invoke(preprocess, ['examples', '-o', './output', '--process-metadata', '--force-overwrite', '-n', 4]) + # parse stdout to ensure example files were processed properly + num_examples = 4 + output_str = str(data.output).split('\n') + for ii in range(num_examples): + expected_file = './output/example{}.01of01.compressed.jpg'.format(ii+1) + assert any([expected_file in x for x in output_str]), 'Did not find file listed among outputs: {}'.format(expected_file) + assert os.path.exists(expected_file), 'Did not find file in filesystem: {}'.format(expected_file) + for ii in range(num_examples): + expected_file = './output/example{}.metadata.json'.format(ii+1) + assert any([expected_file in x for x in output_str]), 'Did not find file listed among outputs: {}'.format(expected_file) + assert os.path.exists(expected_file), 'Did not find file in filesystem: {}'.format(expected_file) From 159e5d9b6b6e2b3251201924c4bb60229990aa18 Mon Sep 17 00:00:00 2001 From: "trevor.stout" Date: Tue, 24 Feb 2026 15:30:50 -0500 Subject: [PATCH 3/4] Linting --- tests/test_preprocess.py | 32 ++++++++++++++++++++----------- tests/test_preprocess_parallel.py | 27 +++++++++++++++++++------- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py index de19807..61d4256 100644 --- a/tests/test_preprocess.py +++ b/tests/test_preprocess.py @@ -2,12 +2,14 @@ import os from click.testing import CliRunner + from batbot.batbot_cli import preprocess + def test_preprocess(): """Test of batbot preprocess CLI ensuring the example files are processed without error. Additionally, a regression test ensuring that the number of detected bat calls in the examples - does not decrease. The minumum numbers of bat call segments listed below correspond to the number + does not decrease. The minumum numbers of bat call segments listed below correspond to the number of bat call segments detected at the time of writing minus detected noise segments (noise counted by hand). Note that this test uses "fast mode" processing, which is more permissive of low-amplitude calls and noise. """ @@ -18,21 +20,29 @@ def test_preprocess(): num_examples = 4 output_str = str(data.output).split('\n') for ii in range(num_examples): - expected_file = './output/example{}.01of01.compressed.jpg'.format(ii+1) - assert any([expected_file in x for x in output_str]), 'Did not find file listed among outputs: {}'.format(expected_file) - assert os.path.exists(expected_file), 'Did not find file on filesystem: {}'.format(expected_file) + expected_file = './output/example{}.01of01.compressed.jpg'.format(ii + 1) + assert any( + [expected_file in x for x in output_str] + ), 'Did not find file listed among outputs: {}'.format(expected_file) + assert os.path.exists(expected_file), 'Did not find file on filesystem: {}'.format( + expected_file + ) num_min_call_segments = [65, 18, 149, 47] for ii in range(num_examples): - expected_file = './output/example{}.metadata.json'.format(ii+1) - assert any([expected_file in x for x in output_str]), 'Did not find file listed among outputs: {}'.format(expected_file) - assert os.path.exists(expected_file), 'Did not find file on filesystem: {}'.format(expected_file) + expected_file = './output/example{}.metadata.json'.format(ii + 1) + assert any( + [expected_file in x for x in output_str] + ), 'Did not find file listed among outputs: {}'.format(expected_file) + assert os.path.exists(expected_file), 'Did not find file on filesystem: {}'.format( + expected_file + ) # load metadata file and ensure minimum number of call segments were detected with open(expected_file) as hf: data = json.load(hf) n_segments = len(data['segments']) - err_str = 'Expected at least {} bat call segments in file {}, found only {} segments'.format( - num_min_call_segments[ii], - expected_file, - n_segments + err_str = ( + 'Expected at least {} bat call segments in file {}, found only {} segments'.format( + num_min_call_segments[ii], expected_file, n_segments ) + ) assert n_segments >= num_min_call_segments[ii], err_str diff --git a/tests/test_preprocess_parallel.py b/tests/test_preprocess_parallel.py index b161f3b..530075f 100644 --- a/tests/test_preprocess_parallel.py +++ b/tests/test_preprocess_parallel.py @@ -1,19 +1,32 @@ import os from click.testing import CliRunner + from batbot.batbot_cli import preprocess + def test_preprocess_parallel(): runner = CliRunner() - data = runner.invoke(preprocess, ['examples', '-o', './output', '--process-metadata', '--force-overwrite', '-n', 4]) + data = runner.invoke( + preprocess, + ['examples', '-o', './output', '--process-metadata', '--force-overwrite', '-n', 4], + ) # parse stdout to ensure example files were processed properly num_examples = 4 output_str = str(data.output).split('\n') for ii in range(num_examples): - expected_file = './output/example{}.01of01.compressed.jpg'.format(ii+1) - assert any([expected_file in x for x in output_str]), 'Did not find file listed among outputs: {}'.format(expected_file) - assert os.path.exists(expected_file), 'Did not find file in filesystem: {}'.format(expected_file) + expected_file = './output/example{}.01of01.compressed.jpg'.format(ii + 1) + assert any( + [expected_file in x for x in output_str] + ), 'Did not find file listed among outputs: {}'.format(expected_file) + assert os.path.exists(expected_file), 'Did not find file in filesystem: {}'.format( + expected_file + ) for ii in range(num_examples): - expected_file = './output/example{}.metadata.json'.format(ii+1) - assert any([expected_file in x for x in output_str]), 'Did not find file listed among outputs: {}'.format(expected_file) - assert os.path.exists(expected_file), 'Did not find file in filesystem: {}'.format(expected_file) + expected_file = './output/example{}.metadata.json'.format(ii + 1) + assert any( + [expected_file in x for x in output_str] + ), 'Did not find file listed among outputs: {}'.format(expected_file) + assert os.path.exists(expected_file), 'Did not find file in filesystem: {}'.format( + expected_file + ) From 30ccf910ea62c2d2f9ba0ca02c907ef144c48ea5 Mon Sep 17 00:00:00 2001 From: "trevor.stout" Date: Wed, 11 Mar 2026 14:11:21 -0400 Subject: [PATCH 4/4] LImiting testing to 2 example files --- tests/test_preprocess.py | 3 ++- tests/test_preprocess_parallel.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py index 61d4256..de5b4ca 100644 --- a/tests/test_preprocess.py +++ b/tests/test_preprocess.py @@ -17,7 +17,8 @@ def test_preprocess(): data = runner.invoke(preprocess, ['examples', '-o', './output', '--force-overwrite']) assert data.exit_code == 0 # parse stdout to ensure example files were processed properly - num_examples = 4 + # limiting to 2 examples for now + num_examples = 2 output_str = str(data.output).split('\n') for ii in range(num_examples): expected_file = './output/example{}.01of01.compressed.jpg'.format(ii + 1) diff --git a/tests/test_preprocess_parallel.py b/tests/test_preprocess_parallel.py index 530075f..8e5fe83 100644 --- a/tests/test_preprocess_parallel.py +++ b/tests/test_preprocess_parallel.py @@ -9,10 +9,11 @@ def test_preprocess_parallel(): runner = CliRunner() data = runner.invoke( preprocess, - ['examples', '-o', './output', '--process-metadata', '--force-overwrite', '-n', 4], + ['examples', '-o', './output', '--process-metadata', '--force-overwrite', '-n', 2], ) # parse stdout to ensure example files were processed properly - num_examples = 4 + # limiting to 2 examples for now + num_examples = 2 output_str = str(data.output).split('\n') for ii in range(num_examples): expected_file = './output/example{}.01of01.compressed.jpg'.format(ii + 1)