Skip to content

Commit 65977fd

Browse files
authored
Merge pull request #1252 from VisLab/fix_extras
Changed schema error vs warning strategy
2 parents 300b5cb + d65e758 commit 65977fd

11 files changed

Lines changed: 322 additions & 189 deletions

File tree

.github/workflows/ci_cov.yaml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,7 @@ jobs:
3737
3838
# Install dependencies
3939
- name: Install dependencies
40-
run: uv pip install ruff -e ".[docs,test]"
41-
42-
# Run ruff
43-
- name: Lint with ruff
44-
run: |
45-
ruff check . --output-format=github
40+
run: uv pip install -e ".[docs,test]"
4641

4742
# Run unittest with coverage
4843
- name: Test with unittest and coverage

.github/workflows/ci_windows.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ jobs:
2525
with:
2626
python-version: ${{ matrix.python-version }}
2727
enable-cache: true
28+
cache-dependency-glob: "**/pyproject.toml"
2829

2930
- name: Create virtual environment
3031
shell: pwsh

docs/api/errors.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ Error functions
4040

4141
.. autofunction:: hed.errors.error_reporter.sort_issues
4242

43-
.. autofunction:: hed.errors.error_reporter.replace_tag_references
43+
.. autofunction:: hed.errors.error_reporter.separate_issues
44+
45+
.. autofunction:: hed.errors.error_reporter.iter_errors
4446

4547
Error types
4648
-----------

hed/errors/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
"""Error handling module for HED."""
22

3-
from .error_reporter import ErrorHandler, get_printable_issue_string, sort_issues, replace_tag_references, iter_errors
3+
from .error_reporter import (
4+
ErrorHandler,
5+
separate_issues,
6+
get_printable_issue_string,
7+
sort_issues,
8+
iter_errors,
9+
)
410
from .error_types import (
511
DefinitionErrors,
612
TemporalErrors,

hed/errors/error_reporter.py

Lines changed: 83 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,23 @@ def wrapper(tag, *args, severity=default_severity, **kwargs):
204204
schema_error_messages.mark_as_used = True
205205

206206

207+
def separate_issues(issues_list: list[dict]) -> tuple[list[dict], list[dict]]:
208+
"""Separate a list of issues into errors and warnings.
209+
210+
Parameters:
211+
issues_list (list[dict]): A list of issue dictionaries. The 'severity' key is
212+
optional; issues that omit it are treated as errors (ErrorSeverity.ERROR).
213+
214+
Returns:
215+
tuple[list[dict], list[dict]]: A tuple of (errors, warnings) where errors contains
216+
issues with severity <= ErrorSeverity.ERROR and warnings contains issues with
217+
severity > ErrorSeverity.ERROR.
218+
"""
219+
errors = [issue for issue in issues_list if issue.get("severity", ErrorSeverity.ERROR) <= ErrorSeverity.ERROR]
220+
warnings = [issue for issue in issues_list if issue.get("severity", ErrorSeverity.ERROR) > ErrorSeverity.ERROR]
221+
return errors, warnings
222+
223+
207224
class ErrorHandler:
208225
"""Class to hold error context and having general error functions."""
209226

@@ -274,20 +291,6 @@ def format_error_with_context(self, *args, **kwargs):
274291

275292
return error_object
276293

277-
@staticmethod
278-
def filter_issues_by_severity(issues_list: list[dict], severity: int) -> list[dict]:
279-
"""Gather all issues matching or below a given severity.
280-
281-
Parameters:
282-
issues_list (list[dict]): A list of dictionaries containing the full issue list.
283-
severity (int): The level of issues to keep.
284-
285-
Returns:
286-
list[dict]: A list of dictionaries containing the issue list after filtering by severity.
287-
288-
"""
289-
return [issue for issue in issues_list if issue["severity"] <= severity]
290-
291294
@staticmethod
292295
def format_error(error_type: str, *args, actual_error=None, **kwargs) -> list[dict]:
293296
"""Format an error based on the parameters, which vary based on what type of error this is.
@@ -423,13 +426,46 @@ def val_error_unknown(*args, **kwargs) -> str:
423426
return f"Unknown error. Args: {str(args), str(kwargs)}"
424427

425428
@staticmethod
426-
def filter_issues_by_count(issues, count, by_file=False) -> tuple[list[dict], dict[str, int]]:
429+
def filter_issues_by_severity(issues_list: list[dict], severity: int) -> list[dict]:
430+
"""Gather all issues matching or below a given severity.
431+
432+
Parameters:
433+
issues_list (list[dict]): A list of dictionaries containing the full issue list.
434+
severity (int): The level of issues to keep.
435+
436+
Returns:
437+
list[dict]: A list of dictionaries containing the issue list after filtering by severity.
438+
439+
"""
440+
return [issue for issue in issues_list if issue.get("severity", ErrorSeverity.ERROR) <= severity]
441+
442+
@staticmethod
443+
def aggregate_code_counts(file_code_dict: dict) -> dict:
444+
"""Aggregate the counts of codes across multiple files.
445+
446+
Parameters:
447+
file_code_dict (dict): A dictionary where keys are filenames and values are
448+
dictionaries of code counts.
449+
450+
Returns:
451+
dict: A dictionary with the aggregated counts of codes across all files.
452+
"""
453+
total_counts = defaultdict(int)
454+
for file_dict in file_code_dict.values():
455+
for code, count in file_dict.items():
456+
total_counts[code] += count
457+
return dict(total_counts)
458+
459+
@staticmethod
460+
def filter_issues_by_count(
461+
issues: list[dict], count: int, by_file: bool = False
462+
) -> tuple[list[dict], dict[str, int]]:
427463
"""Filter the issues list to only include the first count issues of each code.
428464
429-
Parameters:
430-
issues (list): A list of dictionaries containing the full issue list.
431-
count (int): The number of issues to keep for each code.
432-
by_file (bool): If True, group by file name.
465+
Parameters:
466+
issues (list[dict]): A list of dictionaries containing the full issue list.
467+
count (int): The number of issues to keep for each code.
468+
by_file (bool): If True, group by file name.
433469
434470
Returns:
435471
tuple[list[dict], dict[str, int]]: A tuple containing:
@@ -457,22 +493,6 @@ def filter_issues_by_count(issues, count, by_file=False) -> tuple[list[dict], di
457493

458494
return filtered_issues, ErrorHandler.aggregate_code_counts(file_dicts)
459495

460-
@staticmethod
461-
def aggregate_code_counts(file_code_dict) -> dict:
462-
"""Aggregate the counts of codes across multiple files.
463-
464-
Parameters:
465-
file_code_dict (dict): A dictionary where keys are filenames and values are dictionaries of code counts.
466-
467-
Returns:
468-
dict: A dictionary with the aggregated counts of codes across all files.
469-
"""
470-
total_counts = defaultdict(int)
471-
for file_dict in file_code_dict.values():
472-
for code, count in file_dict.items():
473-
total_counts[code] += count
474-
return dict(total_counts)
475-
476496
@staticmethod
477497
def get_code_counts(issues: list[dict]) -> dict[str, int]:
478498
"""Count the occurrences of each error code in the issues list.
@@ -490,6 +510,34 @@ def get_code_counts(issues: list[dict]) -> dict[str, int]:
490510
code_counts[code] += 1
491511
return dict(code_counts)
492512

513+
@staticmethod
514+
def replace_tag_references(list_or_dict):
515+
"""Utility function to remove any references to tags, strings, etc. from any type of nested list or dict.
516+
517+
Use this if you want to save out issues to a file.
518+
519+
If you'd prefer a copy returned, use ErrorHandler.replace_tag_references(list_or_dict.copy()).
520+
521+
Parameters:
522+
list_or_dict (list or dict): An arbitrarily nested list/dict structure
523+
"""
524+
if isinstance(list_or_dict, dict):
525+
for key, value in list_or_dict.items():
526+
if isinstance(value, (dict, list)):
527+
ErrorHandler.replace_tag_references(value)
528+
elif isinstance(value, (bool, float, int)):
529+
list_or_dict[key] = value
530+
else:
531+
list_or_dict[key] = str(value)
532+
elif isinstance(list_or_dict, list):
533+
for key, value in enumerate(list_or_dict):
534+
if isinstance(value, (dict, list)):
535+
ErrorHandler.replace_tag_references(value)
536+
elif isinstance(value, (bool, float, int)):
537+
list_or_dict[key] = value
538+
else:
539+
list_or_dict[key] = str(value)
540+
493541

494542
def sort_issues(issues, reverse=False) -> list[dict]:
495543
"""Sort a list of issues by the error context values.
@@ -822,31 +870,3 @@ def _create_error_tree(error_dict, parent_element=None, add_link=True):
822870
_create_error_tree(value, context_ul, add_link)
823871

824872
return parent_element
825-
826-
827-
def replace_tag_references(list_or_dict):
828-
"""Utility function to remove any references to tags, strings, etc. from any type of nested list or dict.
829-
830-
Use this if you want to save out issues to a file.
831-
832-
If you'd prefer a copy returned, use replace_tag_references(list_or_dict.copy()).
833-
834-
Parameters:
835-
list_or_dict (list or dict): An arbitrarily nested list/dict structure
836-
"""
837-
if isinstance(list_or_dict, dict):
838-
for key, value in list_or_dict.items():
839-
if isinstance(value, (dict, list)):
840-
replace_tag_references(value)
841-
elif isinstance(value, (bool, float, int)):
842-
list_or_dict[key] = value
843-
else:
844-
list_or_dict[key] = str(value)
845-
elif isinstance(list_or_dict, list):
846-
for key, value in enumerate(list_or_dict):
847-
if isinstance(value, (dict, list)):
848-
replace_tag_references(value)
849-
elif isinstance(value, (bool, float, int)):
850-
list_or_dict[key] = value
851-
else:
852-
list_or_dict[key] = str(value)

hed/scripts/schema_script_util.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22
from collections import defaultdict
33
from hed.schema import from_string, load_schema, from_dataframes
44
from hed.schema import hed_cache
5-
from hed.errors import get_printable_issue_string, HedFileError
6-
from hed.errors.error_types import ErrorSeverity
5+
from hed.errors import get_printable_issue_string, separate_issues, HedFileError
76
from hed.schema.schema_comparer import SchemaComparer
87

98
all_extensions = [".tsv", ".mediawiki", ".xml", ".json"]
@@ -32,26 +31,32 @@ def _is_prerelease_partner(base_schema) -> bool:
3231
return hed_cache.get_hed_version_path(with_standard, check_prerelease=False) is None
3332

3433

35-
def validate_schema_object(base_schema, schema_name):
36-
"""Validate a schema object by checking non-warning compliance and roundtrip conversion.
34+
def validate_schema_object(base_schema, schema_name, check_warnings=False):
35+
"""Validate a schema object by checking compliance and roundtrip conversion.
3736
38-
Tests the schema for non-warning compliance issues and validates that it can be successfully
37+
Tests the schema for compliance issues and validates that it can be successfully
3938
converted to and reloaded from all four formats (MEDIAWIKI, XML, JSON, TSV).
4039
4140
Parameters:
4241
base_schema (HedSchema): The schema object to validate.
4342
schema_name (str): The name/path of the schema for error reporting.
43+
check_warnings (bool): If True, include warnings in the validation. Default is False.
4444
4545
Returns:
4646
list: A list of validation issue strings. Empty if no issues found.
4747
"""
4848
validation_issues = []
4949
try:
50-
issues = base_schema.check_compliance()
51-
issues = [issue for issue in issues if issue.get("severity", ErrorSeverity.ERROR) == ErrorSeverity.ERROR]
50+
issues = base_schema.check_compliance(check_for_warnings=check_warnings)
51+
if issues and check_warnings:
52+
errors, warnings = separate_issues(issues)
53+
issues = errors + warnings
54+
else:
55+
errors = issues
56+
5257
if issues:
53-
error_message = get_printable_issue_string(issues, title=schema_name)
54-
validation_issues.append(error_message)
58+
validation_issues.append(get_printable_issue_string(issues, title=schema_name))
59+
if errors:
5560
return validation_issues
5661

5762
# If the withStandard partner only exists in the prerelease cache, all unmerged
@@ -74,14 +79,18 @@ def validate_schema_object(base_schema, schema_name):
7479
return validation_issues
7580

7681

77-
def validate_schema(file_path):
82+
def validate_schema(file_path, check_warnings=False):
7883
"""Validate a schema file, ensuring it can save/load and passes validation.
7984
8085
Loads the schema from file, checks the file extension is lowercase,
81-
and validates the schema object for compliance and roundtrip conversion.
86+
and validates the schema object for compliance errors and roundtrip conversion.
8287
8388
Parameters:
8489
file_path (str): The path to the schema file to validate.
90+
If loading a TSV file, this should be a single filename where:
91+
Template: basename.tsv, where files are named basename_Struct.tsv, basename_Tag.tsv, etc.
92+
Alternatively, you can point to a directory containing the .tsv files.
93+
check_warnings (bool): If True, include warnings in the validation. Default is False.
8594
8695
Returns:
8796
list: A list of validation issue strings. Empty if no issues found.
@@ -96,7 +105,7 @@ def validate_schema(file_path):
96105
validation_issues = []
97106
try:
98107
base_schema = load_schema(file_path)
99-
validation_issues = validate_schema_object(base_schema, file_path)
108+
validation_issues = validate_schema_object(base_schema, file_path, check_warnings=check_warnings)
100109
except HedFileError as e:
101110
print(f"Saving/loading error: {file_path} {e.message}")
102111
error_text = e.message

tests/errors/test_error_reporter.py

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
SchemaWarnings,
88
get_printable_issue_string,
99
sort_issues,
10-
replace_tag_references,
10+
separate_issues,
1111
)
1212
from hed.errors.error_reporter import hed_tag_error, get_printable_issue_string_html, iter_errors
1313
from hed import HedString, HedTag
@@ -230,17 +230,17 @@ def test_replace_tag_references(self):
230230
"b": {"c": 2, "d": [3, {"e": HedString("Hed2", self._schema)}]},
231231
"f": [5, 6],
232232
}
233-
replace_tag_references(nested_dict)
233+
ErrorHandler.replace_tag_references(nested_dict)
234234
self.assertEqual(nested_dict, {"a": "Hed1", "b": {"c": 2, "d": [3, {"e": "Hed2"}]}, "f": [5, 6]})
235235

236236
# Test with mixed data types and HedString in a nested list
237237
nested_list = [HedString("Hed1", self._schema), {"a": 2, "b": [3, {"c": HedString("Hed2", self._schema)}]}]
238-
replace_tag_references(nested_list)
238+
ErrorHandler.replace_tag_references(nested_list)
239239
self.assertEqual(nested_list, ["Hed1", {"a": 2, "b": [3, {"c": "Hed2"}]}])
240240

241241
# Test with mixed data types and HedString in a list within a dict
242242
mixed = {"a": HedString("Hed1", self._schema), "b": [2, 3, {"c": HedString("Hed2", self._schema)}, 4]}
243-
replace_tag_references(mixed)
243+
ErrorHandler.replace_tag_references(mixed)
244244
self.assertEqual(mixed, {"a": "Hed1", "b": [2, 3, {"c": "Hed2"}, 4]})
245245

246246
def test_register_error_twice(self):
@@ -299,3 +299,50 @@ def test_get_code_counts(self):
299299
result_with_missing = ErrorHandler.get_code_counts(issues_with_missing_code)
300300
expected_with_missing = {"VALID_CODE": 2, "UNKNOWN": 1} # Default for missing code
301301
self.assertEqual(result_with_missing, expected_with_missing)
302+
303+
304+
class TestSeparateIssues(unittest.TestCase):
305+
"""Tests for separate_issues."""
306+
307+
@staticmethod
308+
def _make_issue(severity):
309+
return {"severity": severity, "message": "test"}
310+
311+
def test_empty_list(self):
312+
errors, warnings = separate_issues([])
313+
self.assertEqual(errors, [])
314+
self.assertEqual(warnings, [])
315+
316+
def test_only_errors(self):
317+
issues = [self._make_issue(ErrorSeverity.ERROR), self._make_issue(ErrorSeverity.ERROR)]
318+
errors, warnings = separate_issues(issues)
319+
self.assertEqual(len(errors), 2)
320+
self.assertEqual(len(warnings), 0)
321+
322+
def test_only_warnings(self):
323+
issues = [self._make_issue(ErrorSeverity.WARNING), self._make_issue(ErrorSeverity.WARNING)]
324+
errors, warnings = separate_issues(issues)
325+
self.assertEqual(len(errors), 0)
326+
self.assertEqual(len(warnings), 2)
327+
328+
def test_mixed(self):
329+
issues = [
330+
self._make_issue(ErrorSeverity.ERROR),
331+
self._make_issue(ErrorSeverity.WARNING),
332+
self._make_issue(ErrorSeverity.ERROR),
333+
]
334+
errors, warnings = separate_issues(issues)
335+
self.assertEqual(len(errors), 2)
336+
self.assertEqual(len(warnings), 1)
337+
338+
def test_original_list_unchanged(self):
339+
issues = [self._make_issue(ErrorSeverity.ERROR), self._make_issue(ErrorSeverity.WARNING)]
340+
separate_issues(issues)
341+
self.assertEqual(len(issues), 2)
342+
343+
def test_missing_severity_treated_as_error(self):
344+
"""Issues without a 'severity' key should be treated as errors, not raise KeyError."""
345+
issues = [{"message": "no severity"}, self._make_issue(ErrorSeverity.WARNING)]
346+
errors, warnings = separate_issues(issues)
347+
self.assertEqual(len(errors), 1, "Issue missing severity should default to ERROR")
348+
self.assertEqual(len(warnings), 1)

0 commit comments

Comments
 (0)