diff --git a/batbot/batbot_cli.py b/batbot/batbot_cli.py index f108ef3..ee49094 100755 --- a/batbot/batbot_cli.py +++ b/batbot/batbot_cli.py @@ -319,8 +319,7 @@ def preprocess( ) data['output_path'].extend(output_paths) data['compressed_path'].extend(compressed_paths) - if process_metadata: - data['metadata_path'].append(metadata_path) + data['metadata_path'].append(metadata_path) except Exception as e: warnings.warn('WARNING: Pipeline failed for file {}'.format(file)) data['failed_files'].append((str(file), e)) @@ -356,8 +355,7 @@ def preprocess( ) data['output_path'].extend(output_paths) data['compressed_path'].extend(compressed_paths) - if process_metadata: - data['metadata_path'].extend(metadata_paths) + data['metadata_path'].extend(metadata_paths) data['failed_files'].extend(failed_files) if output_json is None: @@ -365,9 +363,8 @@ def preprocess( pprint.pp(sorted(data['output_path'])) print('\nCompressed spectrogram output paths:') pprint.pp(sorted(data['compressed_path'])) - if process_metadata: - print('\nProcessed metadata paths:') - pprint.pp(sorted(data['metadata_path'])) + print('\nProcessed metadata paths:') + pprint.pp(sorted(data['metadata_path'])) print('\nFiles skipped due to failure, and corresponding exceptions:') pprint.pp(sorted(data['failed_files'])) else: diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py new file mode 100644 index 0000000..de5b4ca --- /dev/null +++ b/tests/test_preprocess.py @@ -0,0 +1,49 @@ +import json +import os + +from click.testing import CliRunner + +from batbot.batbot_cli import preprocess + + +def test_preprocess(): + """Test of batbot preprocess CLI ensuring the example files are processed without error. + Additionally, a regression test ensuring that the number of detected bat calls in the examples + does not decrease. The minumum numbers of bat call segments listed below correspond to the number + of bat call segments detected at the time of writing minus detected noise segments (noise counted by hand). + Note that this test uses "fast mode" processing, which is more permissive of low-amplitude calls and noise. + """ + runner = CliRunner() + data = runner.invoke(preprocess, ['examples', '-o', './output', '--force-overwrite']) + assert data.exit_code == 0 + # parse stdout to ensure example files were processed properly + # limiting to 2 examples for now + num_examples = 2 + output_str = str(data.output).split('\n') + for ii in range(num_examples): + expected_file = './output/example{}.01of01.compressed.jpg'.format(ii + 1) + assert any( + [expected_file in x for x in output_str] + ), 'Did not find file listed among outputs: {}'.format(expected_file) + assert os.path.exists(expected_file), 'Did not find file on filesystem: {}'.format( + expected_file + ) + num_min_call_segments = [65, 18, 149, 47] + for ii in range(num_examples): + expected_file = './output/example{}.metadata.json'.format(ii + 1) + assert any( + [expected_file in x for x in output_str] + ), 'Did not find file listed among outputs: {}'.format(expected_file) + assert os.path.exists(expected_file), 'Did not find file on filesystem: {}'.format( + expected_file + ) + # load metadata file and ensure minimum number of call segments were detected + with open(expected_file) as hf: + data = json.load(hf) + n_segments = len(data['segments']) + err_str = ( + 'Expected at least {} bat call segments in file {}, found only {} segments'.format( + num_min_call_segments[ii], expected_file, n_segments + ) + ) + assert n_segments >= num_min_call_segments[ii], err_str diff --git a/tests/test_preprocess_parallel.py b/tests/test_preprocess_parallel.py new file mode 100644 index 0000000..8e5fe83 --- /dev/null +++ b/tests/test_preprocess_parallel.py @@ -0,0 +1,33 @@ +import os + +from click.testing import CliRunner + +from batbot.batbot_cli import preprocess + + +def test_preprocess_parallel(): + runner = CliRunner() + data = runner.invoke( + preprocess, + ['examples', '-o', './output', '--process-metadata', '--force-overwrite', '-n', 2], + ) + # parse stdout to ensure example files were processed properly + # limiting to 2 examples for now + num_examples = 2 + output_str = str(data.output).split('\n') + for ii in range(num_examples): + expected_file = './output/example{}.01of01.compressed.jpg'.format(ii + 1) + assert any( + [expected_file in x for x in output_str] + ), 'Did not find file listed among outputs: {}'.format(expected_file) + assert os.path.exists(expected_file), 'Did not find file in filesystem: {}'.format( + expected_file + ) + for ii in range(num_examples): + expected_file = './output/example{}.metadata.json'.format(ii + 1) + assert any( + [expected_file in x for x in output_str] + ), 'Did not find file listed among outputs: {}'.format(expected_file) + assert os.path.exists(expected_file), 'Did not find file in filesystem: {}'.format( + expected_file + )