From 0e9b9afb9181dec4c1a4d954f48c07ef0624c004 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Tue, 11 Mar 2025 14:57:15 +0100 Subject: [PATCH 01/32] start work on new config system --- problemtools/verifyproblem.py | 379 ++++++++++++++++++++++++++++++++++ 1 file changed, 379 insertions(+) diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index 646e262f..bc70405e 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -781,6 +781,385 @@ class ProblemStatementLegacy(ProblemStatement): class ProblemStatement2023_07(ProblemStatement): EXTENSIONS = ['md', 'tex'] +config_format = { + "type":"object", + "required": [], + "config_fields": ["problem_format_version", "type", "name", "uuid", "author", "source", "source_url", "license", "rights_owner", "limits", "validation", "validator_flags", "keywords", "grading"], + "properties": { + "problem_format_version": { + "type": "string", + "default": "legacy", + "alternatives": {"legacy":{}} + }, + "type": { + "type": "string", + "default": "pass-fail", + "alternatives": { + "pass-fail": { + "forbid": ["grading/on_reject:grade"] + }, + "scoring": {}, + }, + }, + "name": { + "type": "string", + "default": "", + }, + "uuid": { + "type": "string", + "default": "", + }, + "author": { + "type": "string", + "default": "", + }, + "source": { + "type": "string", + "default": "", + }, + "source_url": { + "type": "string", + "default": "", + "alternatives": { + ".*": { + "require": ["source"] + } + }, + }, + "licence": { + "type": "string", + "default": "unknown", + "alternatives": { + "unknown": { + "warn": "License is unknown", + "require": ["rights_owner"] + }, + "cc0|cc by|cc by-sa|educational|permission": { + "require": ["rights_owner"] + }, + "public domain": { + "forbid": ["rights_owner"] + } + } + }, + "rights_owner": { + "type": "string", + "default": "copy-from:author", + }, + "limits": { + "type": "object", + "required": [], + "properties": { + "time_multiplier": { + "type": "float", + "default": 5.0 + }, + "time_safety_margin": { + "type": "float", + "default": 2.0 + }, + "memory": { + "type": "int", + "default": "copy-from:system_default/memory" + }, + "output": { + "type": "int", + "default": "copy-from:system_default/output" + }, + "code": { + "type": "int", + "default": "copy-from:system_default/code" + }, + "compilation_time": { + "type": "float", + "default": "copy-from:system_default/compilation_time" + }, + "compilation_memory": { + "type": "int", + "default": "copy-from:system_default/compilation_memory" + }, + "validation_time": { + "type": "float", + "default": "copy-from:system_default/validation_time" + }, + "validation_memory": { + "type": "int", + "default": "copy-from:system_default/validation_memory" + }, + "validation_output": { + "type": "int", + "default": "copy-from:system_default/validation_output" + } + } + }, + "validation": { + "type": "object", + "parsing": "legacy-validation", + "required": [], + "properties": { + "type": { + "type": "string", + "default": "default", + "alternatives": { + "default": { + "forbid": ["validation/interactive:true", "validation/score:true"] + }, + "custom": {} + } + }, + "interactive": { + "type": "bool", + "default": False + }, + "score": { + "type": "bool", + "default": False + } + } + }, + "validator_flags": { + "type": "string", + "default": "", + }, + "keywords": { + "type": "list", + "parsing": "space-separated-strings", + "content": { + "type": "string", + }, + "default": [], + } + } +} + +class BaseValidator: + def __init__(self, layout: dict, aspect: ProblemAspect, path: str = ""): + self.layout = layout + self.aspect = aspect + self.path = path + + def verify(self, value): + """ + Verifies the value: + - Applies defaults + - Converts types + - Logs warnings/errors if needed + """ + raise NotImplementedError("Subclasses must implement verify") + + def check(self, value, get_path_func): + """ + Performs extra-checks (like forbid/require logic) + get_path_func can be used to fetch other values by path. + """ + raise NotImplementedError("Subclasses must implement check") + + +class StringValidator(BaseValidator): + def __init__(self, layout: dict, aspect: ProblemAspect, path: str = ""): + super().__init__(layout, aspect, path) + alternatives = self.layout.get("alternatives") + if alternatives: + self.patterns = {alt: re.compile('^' + alt + '$') for alt in alternatives} + else: + self.patterns = None + + def verify(self, value): + if value is None: + value = self.layout.get("default", "") + if not isinstance(value, str): + self.aspect.warning(f'Property {self.path} was expected to be of type string') + value = str(value) + if self.patterns: + if not any(pattern.match(value) for pattern in self.patterns.values()): + self.aspect.error(f"Property {self.path} is {value} but must match one of {list(self.patterns.keys())}") + value = self.layout.get("default", "") + return value + + def check(self, value, get_path_func): + if not self.patterns: + return + match = next((key for key, pattern in self.patterns.items() if pattern.match(value)), None) + checks = self.layout["alternatives"][match] + for forbidden in checks.get("forbid", []): + other_path, expected = forbidden.split(':') + if get_path_func(other_path) == expected: + self.aspect.error(f"Property {self.path} has value {value} which forbids property {other_path} to have value {expected}") + for required in checks.get("required", []): + if not get_path_func(required): #TODO: This is not a good way to handle this check I think + self.aspect.error(f"Property {self.path} has value {value} which requires property {required}") + if "warn" in checks: + self.aspect.warning(checks["warn"]) + +class ObjectValidator(BaseValidator): + def verify(self, value): + if value is None: + return self.layout.get("default", {}) + if self.layout.get("parsing") == "legacy-validation": + if not isinstance(value, str): + self.aspect.error(f"Property {self.path} was expected to be a string") + return {} + elements = value.split() + value = { + "type": elements[0], + "interactive": "interactive" in elements[1:], + "score": "score" in elements[1:] + } + if not isinstance(value, dict): + self.aspect.error(f"property {self.path} was expected to be a dictionary") + return {} + for prop in self.layout.get("required", []): + if prop not in value: + self.aspect.error(f"Missing required property: {self.path}/{prop}") + for prop in value.keys(): + if prop not in self.layout["properties"]: + self.aspect.warning(f"Unknown property: {self.path}/{prop}") + return value + + def check(self, value, get_path_func): + pass + +class ListValidator(BaseValidator): + def verify(self, value): + if value is None: + return self.layout.get("default", []) + if self.layout.get("parsing") == "space-separated-strings": + if not isinstance(value, str): + self.aspect.error(f"Property {self.path} was expected to be a string") + return {} + value = value.split() + if not isinstance(value, list): + self.aspect.error(f"property {self.path} was expected to be a list") + return {} + return value + + def check(self, value, get_path_func): + pass + +class FloatValidator(BaseValidator): + def verify(self, value): + if value is None: + return self.layout.get("default", 0.0) + if not isinstance(value, float): + try: + value = float(value) + except Exception: + self.aspect.error(f"Property {self.path} was expected to be a float") + value = self.layout.get("default", 0.0) + return value + + def check(self, value, get_path_func): + pass + +class IntValidator(BaseValidator): + def verify(self, value): + if value is None: + return self.layout.get("default", 0) + if not isinstance(value, int): + try: + value = int(value) + self.aspect.warning(f"Property {self.path} should be of type integer, interpreting as {value}") + except Exception: + self.aspect.error(f"Property {self.path} was expected to be an integer") + value = self.layout.get("default", 0) + return value + + def check(self, value, get_path_func): + pass + +def get_validator(layout: dict, aspect: ProblemAspect, path: str) -> BaseValidator: + type_map = { + "string": StringValidator, + "int": IntValidator, + "float": FloatValidator, + "object": ObjectValidator, + } + typ = layout.get("type") + if typ not in type_map: + raise NotImplementedError(f"Unrecognized type: {typ}") + return type_map[typ](layout, aspect, path) + +class NewProblemConfig(ProblemPart): + PART_NAME = 'config2' + + LAYOUT = {} + + def setup(self): + self.configfile = os.path.join(self.problem.probdir, 'problem.yaml') + self.data = {} + if os.path.isfile(self.configfile): + try: + with open(self.configfile) as f: + self.data = yaml.safe_load(f) or {} + except Exception as e: + self.error(str(e)) + self.data = self.verify_data(self.LAYOUT, self.data) + self.inject_data() + def resolve_all_copy_from(data, path): + if isinstance(data, dict): + for k, v in data.items(): + resolve_all_copy_from(v, f'{path}/{k}') + else: + self.resolve_copy_from(path) + resolve_all_copy_from(self.data, "") + + def inject_data(self): + pass + + def resolve_copy_from(self, path: str, resolving=set()): + if path in resolving: + raise NotImplementedError(f"Circular copy-from dependency between properties {list(resolving)}") + val = self.get_path(path) + if not isinstance(val, str) or not val.startswith("copy-from:"): + return + copy_path = val.split(':')[1] + resolving.add(path) + self.resolve_copy_from(copy_path) + resolving.remove(path) + cur = self.data + parts = path.split('/') + for d in parts[:-1]: + cur = cur[d] + cur[parts[-1]] = self.get_path(copy_path) + + + def get_path(self, path: str) -> Any: + cur = self.data + parts = path.split('/') + for part in parts[:-1]: + if not isinstance(cur, dict): + return None + cur = cur.get(part, {}) + return cur.get(parts[-1], None) + + def verify_data(self, layout, data, path="") -> Any: + validator = get_validator(layout, self, path) + verified = validator.verify(data) + if layout["type"] == "object": + for prop, spec in layout.get("properties", {}).items(): + verified[prop] = self.verify_data(spec, verified.get(prop), f"{path}/{prop}") + elif layout["type"] == "list": + for i in range(len(verified)): + verified[i] = self.verify_data(layout["content"], verified[i], f"{path}[{i}]") + return verified + + def check_data(self, layout, data, path=""): + validator = get_validator(layout, self, path) + validator.check(data, self.get_path) + if layout["type"] == "object": + for prop, spec in layout.get("properties", {}).items(): + self.check_data(spec, data.get(prop), f"{path}/{prop}") + elif layout["type"] == "list": + for i in range(len(verified)): + self.verify_check(layout["content"], verified[i], f"{path}[{i}]") + + def check(self, context: Context) -> bool: + self.check_data(self.LAYOUT, self.data) + return True + +class NewProblemConfigLegacy(NewProblemConfig): + LAYOUT = config_format + class ProblemConfig(ProblemPart): PART_NAME = 'config' From 62df7faa04efed45d432094b56c171298bb168da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Thu, 13 Mar 2025 19:41:35 +0100 Subject: [PATCH 02/32] begin documenting parser --- problemtools/problem_yaml_parser/__init__.py | 242 ++++++++++++++++++ problemtools/problem_yaml_parser/spec.md | 45 ++++ .../problem_yaml_parser/wip_format.yaml | 131 ++++++++++ problemtools/verifyproblem.py | 22 +- 4 files changed, 433 insertions(+), 7 deletions(-) create mode 100644 problemtools/problem_yaml_parser/__init__.py create mode 100644 problemtools/problem_yaml_parser/spec.md create mode 100644 problemtools/problem_yaml_parser/wip_format.yaml diff --git a/problemtools/problem_yaml_parser/__init__.py b/problemtools/problem_yaml_parser/__init__.py new file mode 100644 index 00000000..d613e191 --- /dev/null +++ b/problemtools/problem_yaml_parser/__init__.py @@ -0,0 +1,242 @@ +import re +from typing import Any, Callable, Literal, Pattern, Match, ParamSpec, TypeVar + + +class Path: + INDEXING_REGEX = re.compile(r'^([A-Za-z_0-9\-]+)\[(\d+)\]$') + @staticmethod + def parse(path: str) -> 'Path': + parts = path.split('/') + res = [] + for part in parts: + m = Path.INDEXING_REGEX.match(part) + if m: + res.append(m.group(1)) + res.append(int(m.group(2))) + else: + res.append(part) + return Path(tuple(res)) + + @staticmethod + def combine(*parts: list[str|int|'Path']) -> 'Path': + res = [] + for part in parts: + if isinstance(part, int): + res.append(part) + continue + if isinstance(part, str): + part = Path.parse(part) + res.extend(list(part.path)) #type: ignore + return Path(tuple(res)) + + def __init__(self, path: tuple[str|int]) -> None: + self.path = path + + def index(self, data: dict) -> Any|None: + rv = data + for part in self.path: + if isinstance(part, int): + if not isinstance(rv, list): + return None + try: + rv = rv[part] + except IndexError: + return None + else: + if part not in rv: + return None + rv = rv[part] + return rv + + def spec_path(self) -> 'Path': + res = [] + for part in self.path: + if isinstance(part, str): + res.append("properties") + res.append("part") + elif isinstance(part, int): + res.append("content") + return Path(tuple(res)) + + def __str__(self) -> str: + strings = [] + for part in self.path: + if isinstance(part, int): + strings[-1] += f'[{part}]' + else: + strings.append(part) + return '/'.join(strings) + +class Metadata: + def __init__(self, specification: dict) -> None: + self.spec = specification + self.error_func = lambda s: print(f"ERROR: {s}") + self.warning_func = lambda s: print(f"WARNING: {s}") + self.data = None + + def __getitem__(self, key: str) -> Any: + if self.data is None: + raise Exception('data has not been loaded yet') + return Path.parse(key).index(self.data) + + def set_error_callback(self, fun: Callable): + self.error_func = fun + + def set_warning_callback(self, fun: Callable): + self.warning_func = fun + + def load_config(self, config: dict) -> None: + pass + + def check_config(self) -> None: + pass + + #TODO: type for path + def get_validator(self, layout: dict, path) -> 'BaseValidator': + type_map = { + "string": StringValidator, + "int": IntValidator, + "float": FloatValidator, + "object": ObjectValidator, + } + typ = layout.get("type") + if typ not in type_map: + raise NotImplementedError(f"Unrecognized type: {typ}") + return type_map[typ](layout, self, path) + + +class BaseValidator: + def __init__(self, layout: dict, metadata: Metadata, path: str = ""): + self.layout = layout + self.metadata = metadata + self.path = path + + def verify(self, value): + """ + Verifies the value: + - Applies defaults + - Converts types + - Logs warnings/errors if needed + """ + raise NotImplementedError("Subclasses must implement verify") + + def check(self, value): + """ + Performs extra-checks (like forbid/require logic) + get_path_func can be used to fetch other values by path. + """ + raise NotImplementedError("Subclasses must implement check") + + +class StringValidator(BaseValidator): + def __init__(self, layout: dict, metadata: Metadata, path: str = ""): + super().__init__(layout, metadata, path) + alternatives = self.layout.get("alternatives") + if alternatives: + self.patterns = {alt: re.compile('^' + alt + '$') for alt in alternatives} + else: + self.patterns = None + + def verify(self, value): + if value is None: + value = self.layout.get("default", "") + if not isinstance(value, str): + self.metadata.warning_func(f'Property {self.path} was expected to be of type string') + value = str(value) + if self.patterns: + if not any(pattern.match(value) for pattern in self.patterns.values()): + self.metadata.error_func(f"Property {self.path} is {value} but must match one of {list(self.patterns.keys())}") + value = self.layout.get("default", "") + return value + + def check(self, value): + if not self.patterns: + return + match = next((key for key, pattern in self.patterns.items() if pattern.match(value)), None) + checks = self.layout["alternatives"][match] + for forbidden in checks.get("forbid", []): + other_path, expected = forbidden.split(':') + if self.metadata[other_path] == expected: + self.metadata.error_func(f"Property {self.path} has value {value} which forbids property {other_path} to have value {expected}") + for required in checks.get("required", []): + if not self.metadata[required]: #TODO: This is not a good way to handle this check I think + self.metadata.error_func(f"Property {self.path} has value {value} which requires property {required}") + if "warn" in checks: + self.metadata.warning_func(checks["warn"]) + +class ObjectValidator(BaseValidator): + def verify(self, value): + if value is None: + return self.layout.get("default", {}) + if self.layout.get("parsing") == "legacy-validation": + if not isinstance(value, str): + self.metadata.error_func(f"Property {self.path} was expected to be a string") + return {} + elements = value.split() + value = { + "type": elements[0], + "interactive": "interactive" in elements[1:], + "score": "score" in elements[1:] + } + if not isinstance(value, dict): + self.metadata.error_func(f"property {self.path} was expected to be a dictionary") + return {} + for prop in self.layout.get("required", []): + if prop not in value: + self.metadata.error_func(f"Missing required property: {self.path}/{prop}") + for prop in value.keys(): + if prop not in self.layout["properties"]: + self.metadata.warning_func(f"Unknown property: {self.path}/{prop}") + return value + + def check(self, value): + pass + +class ListValidator(BaseValidator): + def verify(self, value): + if value is None: + return self.layout.get("default", []) + if self.layout.get("parsing") == "space-separated-strings": + if not isinstance(value, str): + self.metadata.error_func(f"Property {self.path} was expected to be a string") + return [] + value = value.split() + if not isinstance(value, list): + self.metadata.error_func(f"property {self.path} was expected to be a list") + return [] + return value + + def check(self, value): + pass + +class FloatValidator(BaseValidator): + def verify(self, value): + if value is None: + return self.layout.get("default", 0.0) + if not isinstance(value, float): + try: + value = float(value) + except Exception: + self.metadata.error_func(f"Property {self.path} was expected to be a float") + value = self.layout.get("default", 0.0) + return value + + def check(self, value): + pass + +class IntValidator(BaseValidator): + def verify(self, value): + if value is None: + return self.layout.get("default", 0) + if not isinstance(value, int): + try: + value = int(value) + self.metadata.warning_func(f"Property {self.path} should be of type integer, interpreting as {value}") + except Exception: + self.metadata.error_func(f"Property {self.path} was expected to be an integer") + value = self.layout.get("default", 0) + return value + + def check(self, value): + pass + diff --git a/problemtools/problem_yaml_parser/spec.md b/problemtools/problem_yaml_parser/spec.md new file mode 100644 index 00000000..8e069595 --- /dev/null +++ b/problemtools/problem_yaml_parser/spec.md @@ -0,0 +1,45 @@ +Specification is built up of a recursive definition of items, inspired by json-schemas. + +Each item has to contain the field "type" with one of the defined types. + +# Common fields for all types +## default +The field "default" will hold the fallback-value in the case that the user-defined value +does not exist or could not be parsed. Each type will have a standard-default that will +be used if default is not specified. + +## type +Each item has to contain a type, which is one of the types defined. This decides what +type will end up there in the final result. + +## flags +List of flags that can be used to signal things about this item. +- "deprecated": if specified in user config, give deprecation-warning + +## parsing +If this item requires more complex parsing, a parsing rule can be added. This field is +a string that will map to one of the available parsing rules. A parsing rule can require +other fields to be initialized before it allows the parsing to happen. + +## More might be added... + +# Types + +## object +Standard default: {} + +Has the following properties: +- "required": List of all strictly required properties, will give an error if one is missing +- "" + +## list +Standard default: [] + +## string +Standard default: "" + +## int +standard default: 0 + +## float +standard default: 0.0 diff --git a/problemtools/problem_yaml_parser/wip_format.yaml b/problemtools/problem_yaml_parser/wip_format.yaml new file mode 100644 index 00000000..dcf1436d --- /dev/null +++ b/problemtools/problem_yaml_parser/wip_format.yaml @@ -0,0 +1,131 @@ +type: object +required: [] +config_fields: +- problem_format_version +- type +- name +- uuid +- author +- source +- source_url +- license +- rights_owner +- limits +- validation +- validator_flags +- keywords +- grading +properties: + problem_format_version: + type: string + default: legacy + alternatives: + legacy: {} + type: + type: string + default: pass-fail + alternatives: + pass-fail: + forbid: + - grading/on_reject:grade + scoring: {} + name: + type: string + default: '' + uuid: + type: string + default: '' + author: + type: string + default: '' + source: + type: string + default: '' + source_url: + type: string + default: '' + alternatives: + ".+": + require: + - source + licence: + type: string + default: unknown + alternatives: + unknown: + warn: License is unknown + require: + - rights_owner + cc0|cc by|cc by-sa|educational|permission: + require: + - rights_owner + public domain: + forbid: + - rights_owner + rights_owner: + type: string + parsing: rights_owner_legacy + default: copy-from:author + limits: + type: object + required: [] + properties: + time_multiplier: + type: float + default: 5 + time_safety_margin: + type: float + default: 2 + memory: + type: int + default: copy-from:system_default/memory + output: + type: int + default: copy-from:system_default/output + code: + type: int + default: copy-from:system_default/code + compilation_time: + type: float + default: copy-from:system_default/compilation_time + compilation_memory: + type: int + default: copy-from:system_default/compilation_memory + validation_time: + type: float + default: copy-from:system_default/validation_time + validation_memory: + type: int + default: copy-from:system_default/validation_memory + validation_output: + type: int + default: copy-from:system_default/validation_output + validation: + type: object + parsing: legacy-validation + required: [] + properties: + type: + type: string + default: default + alternatives: + default: + forbid: + - validation/interactive:true + - validation/score:true + custom: {} + interactive: + type: bool + default: false + score: + type: bool + default: false + validator_flags: + type: string + default: '' + keywords: + type: list + parsing: space-separated-strings + content: + type: string + default: [] diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index bc70405e..a17dcf59 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -1079,6 +1079,11 @@ def get_validator(layout: dict, aspect: ProblemAspect, path: str) -> BaseValidat raise NotImplementedError(f"Unrecognized type: {typ}") return type_map[typ](layout, aspect, path) +class ConfigLoader: + def __init__(self, config_specification: dict) -> None: + self.spec = config_specification + + class NewProblemConfig(ProblemPart): PART_NAME = 'config2' @@ -1123,14 +1128,17 @@ def resolve_copy_from(self, path: str, resolving=set()): cur[parts[-1]] = self.get_path(copy_path) - def get_path(self, path: str) -> Any: + def get_path(self, *path: list[str|int]) -> Any: cur = self.data - parts = path.split('/') - for part in parts[:-1]: - if not isinstance(cur, dict): - return None - cur = cur.get(part, {}) - return cur.get(parts[-1], None) + for part in path: + if isinstance(part, int): + if not isinstance(cur, list) or len(cur) >= part: + return None + cur = cur[part] + elif isinstance(cur, dict): + cur = cur.get(part) + return None + return cur def verify_data(self, layout, data, path="") -> Any: validator = get_validator(layout, self, path) From 613437eeadc0432aecf78bd1a8cbc568fb7024d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Fri, 21 Mar 2025 17:17:40 +0100 Subject: [PATCH 03/32] Planning about config verification system Co-authored-by: Zazmuz --- problemtools/problem_yaml_parser/__init__.py | 1 - problemtools/problem_yaml_parser/notes.txt | 188 ++++++++++++++++++ problemtools/problem_yaml_parser/spec.md | 58 +++++- .../problem_yaml_parser/wip_format.yaml | 84 +++++--- 4 files changed, 294 insertions(+), 37 deletions(-) create mode 100644 problemtools/problem_yaml_parser/notes.txt diff --git a/problemtools/problem_yaml_parser/__init__.py b/problemtools/problem_yaml_parser/__init__.py index d613e191..dece28e9 100644 --- a/problemtools/problem_yaml_parser/__init__.py +++ b/problemtools/problem_yaml_parser/__init__.py @@ -104,7 +104,6 @@ def get_validator(self, layout: dict, path) -> 'BaseValidator': raise NotImplementedError(f"Unrecognized type: {typ}") return type_map[typ](layout, self, path) - class BaseValidator: def __init__(self, layout: dict, metadata: Metadata, path: str = ""): self.layout = layout diff --git a/problemtools/problem_yaml_parser/notes.txt b/problemtools/problem_yaml_parser/notes.txt new file mode 100644 index 00000000..4d9a6027 --- /dev/null +++ b/problemtools/problem_yaml_parser/notes.txt @@ -0,0 +1,188 @@ +1. Load problem-config from problem.yaml +2. Override config-field 'name' with languages found in statements + +I guess previously the 'name' field did not matter, but now it has to match the problemstatements + +Is being checked: + [x] Config file exists + [c] Problem has at least one name, either through the name-field or through \problemname{X} from statements + [x] All fields are either optional or mandatory, where optional fields are taken from a standard-configuration in some standard-paths + [x] All fields have a value + [x] Field 'type' is either 'pass-fail' or 'scoring' + [x] Check that problem has either a license or a rights-owner, and that problem does not have a rights-owner if it is in public domain + [x] Check that field 'source' exists if 'source_url' exists + [x] Check that the license is valid + [x] Warn if license is unknown + [x] Check that 'grading/show_test_data_groups' is either True or False + [x] If above is true, check that problem is not pass-fail + [c] Warn if problem has custom test data groups, that is subgroups beyond sample and secret, and does not specify 'grading/show_test_data_groups' + [x] Check that 'grading/on_reject' is not 'grade' if problem-type is 'pass-fail' + [x] Check that 'grading/on_reject' is either 'first_error', 'worst_error' or 'grade' if it exists + [x] Check that 'grading/objective' is either 'min' or 'max' + [x] Warn if a deprecated grading-keys are found under 'grading' ('accept_score', 'reject_score', 'range', 'on_reject') + [x] Check that 'validation-type' is either 'default' or 'custom' + [x] Check that there are no 'validation-params' if 'validation-type' is 'default' + [x] Check that 'validation-params' are either 'grade' or 'interactive' if 'validation-type' is 'custom' + [x] Check that 'limits' is a dictionary + [r] Check that key 'libraries' does not exist (???) Remove this + + +"format": { + "argument": { + "alternatives": [dict|value], + "is_mandatory": true|false, + "fallback": Any, + "forbids": { + "field": [values] + }, + "get_from_config": true|false, # Allow stuff to come from other parts of the problem + } +} + +"legacy": { + "type":"object", + "required": [], + "config_fields": ["problem_format_version", "type", "name", "uuid", "author", "source", "source_url", "license", "rights_owner", "limits", "validation", "validator_flags", "keywords", "grading"], + "properties": { + "problem_format_version": { + "type": "string", + "default": "legacy", + "alternatives": {"legacy":{}} + }, + "type": { + "type": "string", + "default": "pass-fail", + "alternatives": { + "pass-fail": { + "forbid": "grading/on_reject:grade" + }, + "scoring": {}, + }, + }, + "name": { + "type": "string", + "default": "", + }, + "uuid": { + "type": "string", + "default": "", + }, + "author": { + "type": "string", + "default": "", + }, + "source": { + "type": "string", + "default": "", + }, + "source_url": { + "type": "string", + "default": "", + "alternatives": { + ".*": { + "require": ["source"] + } + }, + }, + "licence": { + "type": "string", + "default": "unknown", + "alternatives": { + "unknown": { + "warn": "License is unknown", + "require": ["rights_owner"] + } + "cc0|cc by|cc by-sa|educational|permission": { + "require": ["rights_owner"] + }, + "public domain": { + "forbid": ["rights_owner"] + } + } + }, + "rights_owner": { + "type": "string", + "default": "copy-from:author", + "alternatives": {".*":[]} + }, + "limits": { + "type": "object", + "required": [], + "properties": { + "time_multiplier": { + "type": "float", + "default": 5.0 + }, + "time_safety_margin": { + "type": "float", + "default": 2.0 + }, + "memory": { + "type": "int", + "default": "copy-from:system_default/memory" + }, + "output": { + "type": "int", + "default": "copy-from:system_default/output" + }, + "code": { + "type": "int", + "default": "copy-from:system_default/code" + }, + "compilation_time": { + "type": "float", + "default": "copy-from:system_default/compilation_time" + }, + "compilation_memory": { + "type": "int", + "default": "copy-from:system_default/compilation_memory" + }, + "validation_time": { + "type": "float", + "default": "copy-from:system_default/validation_time" + }, + "validation_memory": { + "type": "int", + "default": "copy-from:system_default/validation_memory" + }, + "validation_output": { + "type": "int", + "default": "copy-from:system_default/validation_output" + } + } + }, + "validation": { + "type": "specially-parsed-object", + "required": [], + "properties": { + "type": { + "type": "string", + "default": "default", + "alternatives": { + "default": { + "forbid": ["validation/interactive:true", "validation/score:true"] + }, + "custom": {} + } + }, + "interactive": { + "type": "bool", + "default": false + }, + "score": { + "type": "bool", + "default": false + } + } + }, + "validator_flags": { + "type": "string", + "default": "", + "alternatives": { ".*": {} } + }, + "keywords": { + "type": "space-separated-strings", + "default": [], + } + } +} diff --git a/problemtools/problem_yaml_parser/spec.md b/problemtools/problem_yaml_parser/spec.md index 8e069595..05865057 100644 --- a/problemtools/problem_yaml_parser/spec.md +++ b/problemtools/problem_yaml_parser/spec.md @@ -21,25 +21,75 @@ If this item requires more complex parsing, a parsing rule can be added. This fi a string that will map to one of the available parsing rules. A parsing rule can require other fields to be initialized before it allows the parsing to happen. -## More might be added... +Parsing rules will be defined with the following information: +- name of the rule +- type of the output +- prerequisite fields that should be validated before this one + # Types +The following are the types available to be specified. ## object Standard default: {} Has the following properties: - "required": List of all strictly required properties, will give an error if one is missing -- "" +- "properties": Dictionary of property-names to their types ## list Standard default: [] +Has the following properties: +- "content": Specification of type contained within list + ## string Standard default: "" +Has the following properties: +- "alternatives": If specified, one of these have to match the string provided in the config. Additional rules may be associated for different alternatives. Multiple rules may be matched to one alternative, in which case all checks will be triggered. + - "warn": _message_, send a the _message_ as a warning if alternative is matched + - "forbid": [Path], forbid any value other than the default standard for all paths in list + - "require": [Path], require all values to be different from the default standard of the type for all paths in list + +"foo/bar/baz" +"foo/bar/baz:asd" + ## int -standard default: 0 +Standard default: 0 + +Properties: +- minimum: _value_, minimum value allowed +- maximum: _value_, maximum value allowed ## float -standard default: 0.0 +Standard default: 0.0 + +Properties: +- minimum: _value_, minimum value allowed +- maximum: _value_, maximum value allowed + +## bool +Standard default: False + +"alternatives": { + True: { + "warn": "message", + "forbid": [Path] + }, + False: { + "warn": "message", + "forbid": [Path] + } +} + + +No properties + +# Stages +There are a couple of stages that will happen when loading and verifying config. + +1. Data is loaded and compared to format-specfication-document. Parsing rules are applied in an order that resolves dependencies. +2. External data is injected. +3. copy-from directives are executed in an order such that all are resolved. +4. Checks are performed, like the ones in the string-types alternatives. \ No newline at end of file diff --git a/problemtools/problem_yaml_parser/wip_format.yaml b/problemtools/problem_yaml_parser/wip_format.yaml index dcf1436d..d6af586c 100644 --- a/problemtools/problem_yaml_parser/wip_format.yaml +++ b/problemtools/problem_yaml_parser/wip_format.yaml @@ -1,20 +1,5 @@ type: object required: [] -config_fields: -- problem_format_version -- type -- name -- uuid -- author -- source -- source_url -- license -- rights_owner -- limits -- validation -- validator_flags -- keywords -- grading properties: problem_format_version: type: string @@ -31,23 +16,18 @@ properties: scoring: {} name: type: string - default: '' uuid: type: string - default: '' author: type: string - default: '' source: type: string - default: '' source_url: type: string - default: '' alternatives: ".+": require: - - source + - source:.+ licence: type: string default: unknown @@ -55,20 +35,18 @@ properties: unknown: warn: License is unknown require: - - rights_owner + - rights_owner:.+ cc0|cc by|cc by-sa|educational|permission: require: - - rights_owner + - rights_owner:.+ public domain: forbid: - - rights_owner + - rights_owner:.+ rights_owner: type: string parsing: rights_owner_legacy - default: copy-from:author limits: type: object - required: [] properties: time_multiplier: type: float @@ -103,11 +81,9 @@ properties: validation: type: object parsing: legacy-validation - required: [] properties: type: type: string - default: default alternatives: default: forbid: @@ -116,16 +92,60 @@ properties: custom: {} interactive: type: bool - default: false score: type: bool - default: false validator_flags: type: string - default: '' keywords: type: list parsing: space-separated-strings content: type: string - default: [] + grading: + type: object + properties: + show_test_data_groups: + type: bool + alternatives: + True: + forbid: + - type:pass-fail + False: {} + on_reject: + type: string + default: worst_error + flags: + - deprecated + alternatives: + first_error: {} + worst_error: {} + grade: {} + objective: + type: string + default: max + alternatives: + min: {} + max: {} + accept_score: + type: float # Should actually be type string, add custom parsing? + default: 1.0 + flags: + - deprecated + reject_score: + type: float # Should actually be type string, add custom parsing? + default: 0.0 + flags: + - deprecated + range: + type: object + parsing: min-max-float-string + flags: + - deprecated + properties: + min: + type: float + default: .inf + max: + type: float + default: -.inf + From 6cc31311ebebc348161165d74f575db8952949e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Sat, 22 Mar 2025 15:34:38 +0100 Subject: [PATCH 04/32] Work on defining "alternatives" property better and allowing it for ints and floats --- problemtools/problem_yaml_parser/spec.md | 79 +++++++++++++------ .../problem_yaml_parser/wip_format.yaml | 34 ++++++-- 2 files changed, 80 insertions(+), 33 deletions(-) diff --git a/problemtools/problem_yaml_parser/spec.md b/problemtools/problem_yaml_parser/spec.md index 05865057..c1c3085b 100644 --- a/problemtools/problem_yaml_parser/spec.md +++ b/problemtools/problem_yaml_parser/spec.md @@ -47,44 +47,25 @@ Has the following properties: Standard default: "" Has the following properties: -- "alternatives": If specified, one of these have to match the string provided in the config. Additional rules may be associated for different alternatives. Multiple rules may be matched to one alternative, in which case all checks will be triggered. - - "warn": _message_, send a the _message_ as a warning if alternative is matched - - "forbid": [Path], forbid any value other than the default standard for all paths in list - - "require": [Path], require all values to be different from the default standard of the type for all paths in list - -"foo/bar/baz" -"foo/bar/baz:asd" +- "alternatives": See section "alternatives" ## int Standard default: 0 Properties: -- minimum: _value_, minimum value allowed -- maximum: _value_, maximum value allowed +- "alternatives": See section "alternatives" ## float Standard default: 0.0 Properties: -- minimum: _value_, minimum value allowed -- maximum: _value_, maximum value allowed +- "alternatives": See section "alternatives" ## bool Standard default: False -"alternatives": { - True: { - "warn": "message", - "forbid": [Path] - }, - False: { - "warn": "message", - "forbid": [Path] - } -} - - -No properties +Properties: +- "alternatives": See section "alternatives" # Stages There are a couple of stages that will happen when loading and verifying config. @@ -92,4 +73,52 @@ There are a couple of stages that will happen when loading and verifying config. 1. Data is loaded and compared to format-specfication-document. Parsing rules are applied in an order that resolves dependencies. 2. External data is injected. 3. copy-from directives are executed in an order such that all are resolved. -4. Checks are performed, like the ones in the string-types alternatives. \ No newline at end of file +4. Checks are performed, like the ones in the string-types alternatives. + +# alternatives +This is a property that exists on the types string, bool, ints and floats. + +The value of "alternatives" is a dictionary, with keys that indicate certain "matches", see section about "matching" for different types for more details. Each match is a key that maps to another dictionary with checks. Further info can be found about these checks in the "Checks" section. Below is an example of how the property can look on a string-parameter. + + +```yaml +type: string +default: unknown +alternatives: + unknown: + warn: License is unknown + require: + - rights_owner: ".+" + cc0|cc by|cc by-sa|educational|permission: + require: + - rights_owner: ".+" + public domain: + forbid: + - rights_owner: ".+" +``` + +## matching +The following formats are used to match the different types. +### string +For strings, matches will be given as regex strings. +### bool +For bools, the values "true" and "false" will be given and compared like expected. +### int +For ints, matches will be given as a string formated like "A:B", where A and B are integers. This will form an inclusive interval that is matched. A or B may be excluded to indicate that one endpoint is infinite. A single number may also be provided, which will match only that number. All numbers should be given in decimal notation. + +### float +Similar to int, but A and B are floats instead of integers. Single numbers may not be provided due to floating point imprecisions. The floats should be able to be parsed using Python's built in float parsing. + +## Checks +Each alternative may provide certain checks. The checks are described in the following subsections. Each check has a name and an argument, with the name being a key in the dictionary for that alternative, and the argument being the value for that key. + +If the value found in the parsed config does not match any value in the alternatives, this will generate an error. If alternatives is not provided in the config, this will be treated as all alternatives being okay without any checks. + +### warn +If this check is provided, a warning will be generated with the text in the argument if the alternative is matched. This can be used to give an indication that an alternative should preferrably not be used, like an unknown license. + +### require +This check ensures certain properties in the config match a certain value (see "matches" for further details about matching). The argument is a list of dictionaries which map a path to a property to the value it should match to. If it does not match, an error will be generated during the check stage. + +### forbid +Works the same as require, but instead of requiring the properties to match, it forbids them from matching. \ No newline at end of file diff --git a/problemtools/problem_yaml_parser/wip_format.yaml b/problemtools/problem_yaml_parser/wip_format.yaml index d6af586c..b967a450 100644 --- a/problemtools/problem_yaml_parser/wip_format.yaml +++ b/problemtools/problem_yaml_parser/wip_format.yaml @@ -12,7 +12,7 @@ properties: alternatives: pass-fail: forbid: - - grading/on_reject:grade + - grading/on_reject: grade scoring: {} name: type: string @@ -27,7 +27,7 @@ properties: alternatives: ".+": require: - - source:.+ + - source: ".+" licence: type: string default: unknown @@ -35,13 +35,13 @@ properties: unknown: warn: License is unknown require: - - rights_owner:.+ + - rights_owner: ".+" cc0|cc by|cc by-sa|educational|permission: require: - - rights_owner:.+ + - rights_owner: ".+" public domain: forbid: - - rights_owner:.+ + - rights_owner: ".+" rights_owner: type: string parsing: rights_owner_legacy @@ -51,33 +51,51 @@ properties: time_multiplier: type: float default: 5 + alternatives: + "0.0:": {} time_safety_margin: type: float default: 2 memory: type: int default: copy-from:system_default/memory + alternatives: + "0:": {} output: type: int default: copy-from:system_default/output + alternatives: + "0:": {} code: type: int default: copy-from:system_default/code + alternatives: + "0:": {} compilation_time: type: float default: copy-from:system_default/compilation_time + alternatives: + "0.0:": {} compilation_memory: type: int default: copy-from:system_default/compilation_memory + alternatives: + "0:": {} validation_time: type: float default: copy-from:system_default/validation_time + alternatives: + "0.0:": {} validation_memory: type: int default: copy-from:system_default/validation_memory + alternatives: + "0:": {} validation_output: type: int default: copy-from:system_default/validation_output + alternatives: + "0:": {} validation: type: object parsing: legacy-validation @@ -87,8 +105,8 @@ properties: alternatives: default: forbid: - - validation/interactive:true - - validation/score:true + - validation/interactive: true + - validation/score: true custom: {} interactive: type: bool @@ -109,7 +127,7 @@ properties: alternatives: True: forbid: - - type:pass-fail + - type: pass-fail False: {} on_reject: type: string From 71cd0ebcf057ef60ab8d3e8740837416ca894500 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Sat, 22 Mar 2025 16:15:30 +0100 Subject: [PATCH 05/32] Make code to match alternatives --- problemtools/problem_yaml_parser/__init__.py | 52 ++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/problemtools/problem_yaml_parser/__init__.py b/problemtools/problem_yaml_parser/__init__.py index dece28e9..f796401a 100644 --- a/problemtools/problem_yaml_parser/__init__.py +++ b/problemtools/problem_yaml_parser/__init__.py @@ -67,6 +67,58 @@ def __str__(self) -> str: strings.append(part) return '/'.join(strings) +class AlternativeMatch: + def __init__(self, matchstr): + raise NotImplementedError("Specialize in subclass") + + def check(self, val) -> bool: + raise NotImplementedError("Specialize in subclass") + + @staticmethod + def get_matcher(type, matchstr) -> 'AlternativeMatch': + matchers = { + 'string': StringMatch, + 'int': IntMatch, + 'float': FloatMatch, + 'bool': BoolMatch + } + assert type in matchers + return matchers[type](matchstr) + +class StringMatch(AlternativeMatch): + def __init__(self, matchstr): + self.regex = re.compile(matchstr) + + def check(self, val) -> bool: + return self.regex.match(val) + +class IntMatch(AlternativeMatch): + def __init__(self, matchstr: str): + if ':' in matchstr: + self.start, self.end = [int(p.strip()) for p in matchstr.split()] + else: + self.start = self.end = int(matchstr.strip()) + + def check(self, val) -> bool: + return isinstance(val, int) and self.start <= val <= self.end + +class FloatMatch(AlternativeMatch): + def __init__(self, matchstr: str): + assert ':' in matchstr + self.start, self.end = [float(p.strip()) for p in matchstr.split()] + + def check(self, val) -> bool: + return isinstance(val, float) and self.start <= val <= self.end + +class BoolMatch(AlternativeMatch): + def __init__(self, matchstr: str): + matchstr = matchstr.strip().lower() + assert matchstr in ('true', 'false') + self.val = {'true':True,'false':False}[matchstr] + + def check(self, val) -> bool: + return isinstance(val, bool) and val == self.val + class Metadata: def __init__(self, specification: dict) -> None: self.spec = specification From 3fa220a5b082b21006c731c7532571c89eddd389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Sat, 22 Mar 2025 16:31:48 +0100 Subject: [PATCH 06/32] Start working on parsing rules --- problemtools/problem_yaml_parser/__init__.py | 26 ++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/problemtools/problem_yaml_parser/__init__.py b/problemtools/problem_yaml_parser/__init__.py index f796401a..150560e0 100644 --- a/problemtools/problem_yaml_parser/__init__.py +++ b/problemtools/problem_yaml_parser/__init__.py @@ -1,6 +1,8 @@ import re from typing import Any, Callable, Literal, Pattern, Match, ParamSpec, TypeVar +class SpecificationError(Exception): + pass class Path: INDEXING_REGEX = re.compile(r'^([A-Za-z_0-9\-]+)\[(\d+)\]$') @@ -156,6 +158,30 @@ def get_validator(self, layout: dict, path) -> 'BaseValidator': raise NotImplementedError(f"Unrecognized type: {typ}") return type_map[typ](layout, self, path) +class Parser: + NAME: str = "" + PATH_DEPENDENCIES: list[Path] = [] + OUTPUT_TYPE: str = "" + + def __init__(self, data: dict, specification: dict, path: Path): + if not self.NAME: + raise NotImplementedError("Subclasses of BaseParser need to set the name of the parsing rule") + if not self.OUTPUT_TYPE: + raise NotImplementedError("Subclasses of BaseParser need to set the output type of the parsing rule") + required_type = path.spec_path().index(data)["type"] + if required_type != self.OUTPUT_TYPE: + raise SpecificationError(f"Parsing rule for {path} outputs {self.OUTPUT_TYPE}, but the output should be of type {required_type}") + + def parse(self, data: dict, path: Path): + pass + +class DefaultStringParser(Parser): + NAME = "default-string-parser" + OUTPUT_TYPE = "string" + + pass + + class BaseValidator: def __init__(self, layout: dict, metadata: Metadata, path: str = ""): self.layout = layout From 1d6144eb50bc6b4eb04cc4f6a8b36c06e7df86f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Sun, 23 Mar 2025 01:55:09 +0100 Subject: [PATCH 07/32] Work on implementing config loading and parsing --- problemtools/problem_yaml_parser/__init__.py | 341 +++++++++++++++++-- problemtools/problem_yaml_parser/spec.md | 2 +- 2 files changed, 306 insertions(+), 37 deletions(-) diff --git a/problemtools/problem_yaml_parser/__init__.py b/problemtools/problem_yaml_parser/__init__.py index 150560e0..533e4581 100644 --- a/problemtools/problem_yaml_parser/__init__.py +++ b/problemtools/problem_yaml_parser/__init__.py @@ -1,9 +1,18 @@ import re from typing import Any, Callable, Literal, Pattern, Match, ParamSpec, TypeVar +from collections import defaultdict class SpecificationError(Exception): pass +type_mapping = { + "string": str, + "object": dict, + "list": list, + "int": int, + "float": float +} + class Path: INDEXING_REGEX = re.compile(r'^([A-Za-z_0-9\-]+)\[(\d+)\]$') @staticmethod @@ -17,10 +26,10 @@ def parse(path: str) -> 'Path': res.append(int(m.group(2))) else: res.append(part) - return Path(tuple(res)) + return Path(*res) @staticmethod - def combine(*parts: list[str|int|'Path']) -> 'Path': + def combine(*parts: str|int|'Path') -> 'Path': res = [] for part in parts: if isinstance(part, int): @@ -29,9 +38,9 @@ def combine(*parts: list[str|int|'Path']) -> 'Path': if isinstance(part, str): part = Path.parse(part) res.extend(list(part.path)) #type: ignore - return Path(tuple(res)) + return Path(*res) - def __init__(self, path: tuple[str|int]) -> None: + def __init__(self, *path: str|int) -> None: self.path = path def index(self, data: dict) -> Any|None: @@ -58,7 +67,7 @@ def spec_path(self) -> 'Path': res.append("part") elif isinstance(part, int): res.append("content") - return Path(tuple(res)) + return Path(*res) def __str__(self) -> str: strings = [] @@ -68,6 +77,12 @@ def __str__(self) -> str: else: strings.append(part) return '/'.join(strings) + + def __eq__(self, value): + return self.path == value.path + + def __hash__(self): + return hash(self.path) class AlternativeMatch: def __init__(self, matchstr): @@ -93,24 +108,61 @@ def __init__(self, matchstr): def check(self, val) -> bool: return self.regex.match(val) + + def __str__(self) -> str: + self.regex.pattern class IntMatch(AlternativeMatch): def __init__(self, matchstr: str): - if ':' in matchstr: - self.start, self.end = [int(p.strip()) for p in matchstr.split()] - else: - self.start = self.end = int(matchstr.strip()) + try: + if matchstr.count(':') > 1: + raise ValueError + if ':' in matchstr: + self.start, self.end = [int(p) if p else None for p in map(str.strip, matchstr.split())] + else: + matchstr = matchstr.strip() + if not matchstr: + raise SpecificationError('Match string for integer was left empty') + self.start = self.end = int(matchstr) + except ValueError: + raise SpecificationError('Int match string should be of the form "A:B" where A and B can be parsed as ints or left empty, or a single integer') def check(self, val) -> bool: - return isinstance(val, int) and self.start <= val <= self.end + if not isinstance(val, int): + return False + if self.start is not None: + if val < self.start: + return False + if self.end is not None: + if val > self.start: + return False + return True + + def __str__(self): + if A == B: + return str(A) + A = str(self.start) if self.start is not None else "" + B = str(self.end) if self.end is not None else "" + return f'{A}:{B}' class FloatMatch(AlternativeMatch): def __init__(self, matchstr: str): - assert ':' in matchstr - self.start, self.end = [float(p.strip()) for p in matchstr.split()] + try: + if matchstr.count(':') != 1: + raise ValueError + first, second = [p.strip() for p in matchstr.split()] + self.start = float(first) if first else float('-inf') + self.end = float(second) if second else float('inf') + except ValueError: + raise SpecificationError('Float match string should be of the form "A:B" where A and B can be parsed as floats or left empty') def check(self, val) -> bool: return isinstance(val, float) and self.start <= val <= self.end + + def __str__(self): + A = str(self.start) if self.start != float('-inf') else "" + B = str(self.end) if self.end != float('inf') else "" + return f'{A}:{B}' class BoolMatch(AlternativeMatch): def __init__(self, matchstr: str): @@ -120,6 +172,9 @@ def __init__(self, matchstr: str): def check(self, val) -> bool: return isinstance(val, bool) and val == self.val + + def __str__(self): + return str(self.val) class Metadata: def __init__(self, specification: dict) -> None: @@ -128,10 +183,12 @@ def __init__(self, specification: dict) -> None: self.warning_func = lambda s: print(f"WARNING: {s}") self.data = None - def __getitem__(self, key: str) -> Any: + def __getitem__(self, key: str|Path) -> Any: if self.data is None: raise Exception('data has not been loaded yet') - return Path.parse(key).index(self.data) + if isinstance(key, str): + return Path.parse(key).index(self.data) + return key.index(self.data) def set_error_callback(self, fun: Callable): self.error_func = fun @@ -139,47 +196,259 @@ def set_error_callback(self, fun: Callable): def set_warning_callback(self, fun: Callable): self.warning_func = fun - def load_config(self, config: dict) -> None: - pass + def load_config(self, config: dict, injected_data: dict) -> None: + self.data: dict = DefaultObjectParser(config, self.spec, Path(), self.warning_func, self.error_func).parse() + + ready: list[tuple[Path, str]] = [] + dependencies = {} + depends_on = defaultdict(list) + solved = set() + for prop, spec in self.spec["properties"].items(): + parser = Parser.get_parser_type(spec) + deps = parser.PATH_DEPENDENCIES + if deps: + dependencies[(Path(), prop)] = len(deps) + for d in deps: + depends_on[d].append((Path(), prop)) + else: + ready.append((Path(), prop)) + + while ready: + p, c = ready.pop() + full_path = Path.combine(p, c) + spec = full_path.spec_path().index(self.spec) + parser = Parser.get_parser_type(spec)(self.data, self.spec, full_path, self.warning_func, self.error_func) + p.index(self.data)[c] = parser.parse() + if spec["type"] == "object": + for prop, c_spec in self.spec["properties"].items(): + c_parser = Parser.get_parser_type(c_spec) + deps = set(c_parser.PATH_DEPENDENCIES) - solved + if deps: + dependencies[(full_path, prop)] = len(deps) + for d in deps: + depends_on[d].append((full_path, prop)) + else: + ready.append((full_path, prop)) + elif spec["type"] == "list": + c_spec = Parser.get_parser_type(spec["content"]) + deps = set(c_parser.PATH_DEPENDENCIES) - solved + for i in range(len(full_path.index(self.data))): + if deps: + dependencies[(full_path, i)] = len(deps) + for d in deps: + depends_on[d].append((full_path, i)) + else: + ready.append((full_path, i)) + for x in depends_on[full_path]: + dependencies[x] -= 1 + if dependencies[x] == 0: + ready.append(x) + del dependencies[x] + if any(v > 0 for v in dependencies.items()): + raise SpecificationError("Circular dependency in specification by parsing rules") + self.data.update(injected_data) + # TODO: copy-from directives def check_config(self) -> None: pass - #TODO: type for path - def get_validator(self, layout: dict, path) -> 'BaseValidator': - type_map = { - "string": StringValidator, - "int": IntValidator, - "float": FloatValidator, - "object": ObjectValidator, - } - typ = layout.get("type") - if typ not in type_map: - raise NotImplementedError(f"Unrecognized type: {typ}") - return type_map[typ](layout, self, path) - class Parser: NAME: str = "" PATH_DEPENDENCIES: list[Path] = [] OUTPUT_TYPE: str = "" - def __init__(self, data: dict, specification: dict, path: Path): + def __init__(self, data: dict, specification: dict, path: Path, warning_func: Callable, error_func: Callable): + self.data = data + self.specification = specification + self.path = path + self.spec_path = path.spec_path() + self.warning_func = warning_func + self.error_func = error_func + if not self.NAME: - raise NotImplementedError("Subclasses of BaseParser need to set the name of the parsing rule") + raise NotImplementedError("Subclasses of Parser need to set the name of the parsing rule") if not self.OUTPUT_TYPE: - raise NotImplementedError("Subclasses of BaseParser need to set the output type of the parsing rule") - required_type = path.spec_path().index(data)["type"] + raise NotImplementedError("Subclasses of Parser need to set the output type of the parsing rule") + + required_type = self.spec_path.index(specification)["type"] if required_type != self.OUTPUT_TYPE: raise SpecificationError(f"Parsing rule for {path} outputs {self.OUTPUT_TYPE}, but the output should be of type {required_type}") + + if self.OUTPUT_TYPE in ("string", "int", "float", "bool"): + alternatives = Path.combine(self.spec_path, "alternatives").index(self.specification) + if alternatives is None: + self.alternatives = None + else: + self.alternatives = [(AlternativeMatch.get_matcher(self.OUTPUT_TYPE, key), val) for key, val in alternatives.items()] + else: + self.alternatives = None - def parse(self, data: dict, path: Path): - pass + def parse(self): + val = self.path.index(self.data) + out = self._parse(val) + + flags = Path.combine(self.spec_path, "flags").index(self.specification) or [] + if 'deprecated' in flags and out is not None: + self.warning_func(f'deprecated property was provided ({self.path})') + + if self.alternatives is not None: + found_match = False + for matcher, checks in self.alternatives: + if matcher.check(out): + found_match = True + if "warn" in checks: + self.warning_func(checks["warn"]) + if not found_match: + alts = ', '.join(f'"{matcher}"' for matcher in self.alternatives.keys()) + self.error_func(f"Property {self.path} did not match any of the specified alternatives ({alts})") + out = None + + if out is None: + fallback = Path.combine(self.spec_path, "default").index(self.specification) + if fallback is not None: + if isinstance(fallback, str) and fallback.startswith('copy-from:'): + fallback = ('copy-from', fallback.split(':')[1]) + return fallback + return type_mapping[self.OUTPUT_TYPE]() + + if not (isinstance(out, tuple) or isinstance(out, type_mapping[self.OUTPUT_TYPE])): + raise SpecificationError(f'Parsing rule "{self.NAME}" did not output the correct type. Output was: {out}') + return out + + def _parse(self, val): + raise NotImplementedError("Subclasses of Parse need to implement _parse()") + + @staticmethod + def get_parser_type(specification: dict) -> type: + parsing_rule = specification.get('parsing') + if parsing_rule is None: + typ = specification.get("type") + parsing_rule = f'default-{typ}-parser' + if parsing_rule not in parsers: + raise SpecificationError(f'Parser "{parsing_rule}" is not implemented') + return parsers[parsing_rule] + class DefaultStringParser(Parser): NAME = "default-string-parser" OUTPUT_TYPE = "string" - pass + def _parse(self, val): + if val is None: + return None + if not isinstance(val, str): + self.warning_func(f'Expected value of type string but got {val}, casting to string ({self.path})') + val = str(val) + return val + +class DefaultObjectParser(Parser): + NAME = "default-object-parser" + OUTPUT_TYPE = "object" + + def _parse(self, val): + if val is None: + return None + + if not isinstance(val, dict): + self.error_func(f'Expected an object, got {val} ({self.path})') + return None + + required = Path.combine(self.spec_path, 'required').index(self.specification) or [] + for req in required: + req_path = Path.combine(self.path, req) + if req_path.index(self.data) is None: + self.error_func(f'Missing required property: {req_path}') + + remove = [] + known_props = Path.combine(self.spec_path, 'properties').index(self.specification) + for prop in val.keys(): + if prop not in known_props: + self.warning_func(f'Unknown property: {Path.combine(self.path, prop)}') + remove.append(prop) + for r in remove: + del val[r] + + return val + +class DefaultListParser(Parser): + NAME = "default-list-parser" + OUTPUT_TYPE = "list" + + def _parse(self, val): + if val is None: + return None + + if not isinstance(val, list): + self.error_func(f'Expected a list, got {val} ({self.path})') + return None + + return val + +class DefaultIntParser(Parser): + NAME = "default-int-parser" + OUTPUT_TYPE = "int" + + def _parse(self, val): + if val is None: + return None + + if not isinstance(val, int): + try: + cast = int(val) + self.warning_func(f'Expected type int, got {val}. Casting to {cast} ({self.path})') + val = cast + except ValueError: + self.error_func(f'Expected a int, got {val} ({self.path})') + return None + + return val + +class DefaultFloatParser(Parser): + NAME = "default-float-parser" + OUTPUT_TYPE = "float" + + def _parse(self, val): + if val is None: + return None + + if not isinstance(val, float): + try: + cast = float(val) + self.warning_func(f'Expected type float, got {val}. Casting to {cast} ({self.path})') + val = cast + except ValueError: + self.error_func(f'Expected a float, got {val} ({self.path})') + return None + + return val + +class DefaultBoolParser(Parser): + NAME = "default-bool-parser" + OUTPUT_TYPE = "bool" + + def _parse(self, val): + if val is None: + return None + + if isinstance(val, str): + if val.lower() in ("true", "false"): + interpretation = val.lower() == 'true' + self.warning_func(f'Expected type bool but got a string "{val}" which will be interpreted as {interpretation} ({self.path})') + val = interpretation + else: + self.error_func(f'Expected type bool but got "{val}" ({self.path})') + return None + + if not isinstance(val, bool): + self.error_func(f'Expected type bool, got {val} ({self.path})') + return None + + return val + +parsers = {p.NAME: p for p in + [DefaultObjectParser, DefaultListParser, DefaultStringParser, + DefaultIntParser, DefaultFloatParser, DefaultBoolParser] +} class BaseValidator: diff --git a/problemtools/problem_yaml_parser/spec.md b/problemtools/problem_yaml_parser/spec.md index c1c3085b..6eb1c798 100644 --- a/problemtools/problem_yaml_parser/spec.md +++ b/problemtools/problem_yaml_parser/spec.md @@ -102,7 +102,7 @@ The following formats are used to match the different types. ### string For strings, matches will be given as regex strings. ### bool -For bools, the values "true" and "false" will be given and compared like expected. +For bools, the values "true" and "false" will be given and compared like expected. This check is case-insensitive. ### int For ints, matches will be given as a string formated like "A:B", where A and B are integers. This will form an inclusive interval that is matched. A or B may be excluded to indicate that one endpoint is infinite. A single number may also be provided, which will match only that number. All numbers should be given in decimal notation. From 0cb4831d63194050d9cb635f2c2892fa4d8d6ccc Mon Sep 17 00:00:00 2001 From: Zazmuz <44049876+Zazmuz@users.noreply.github.com> Date: Sun, 23 Mar 2025 08:38:57 +0100 Subject: [PATCH 08/32] Update spec.md standardise casing on bools --- problemtools/problem_yaml_parser/spec.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/problemtools/problem_yaml_parser/spec.md b/problemtools/problem_yaml_parser/spec.md index 6eb1c798..55e0999f 100644 --- a/problemtools/problem_yaml_parser/spec.md +++ b/problemtools/problem_yaml_parser/spec.md @@ -102,7 +102,7 @@ The following formats are used to match the different types. ### string For strings, matches will be given as regex strings. ### bool -For bools, the values "true" and "false" will be given and compared like expected. This check is case-insensitive. +For bools, the values "True" and "False" will be given and compared like expected. This check is case-insensitive. ### int For ints, matches will be given as a string formated like "A:B", where A and B are integers. This will form an inclusive interval that is matched. A or B may be excluded to indicate that one endpoint is infinite. A single number may also be provided, which will match only that number. All numbers should be given in decimal notation. @@ -121,4 +121,4 @@ If this check is provided, a warning will be generated with the text in the argu This check ensures certain properties in the config match a certain value (see "matches" for further details about matching). The argument is a list of dictionaries which map a path to a property to the value it should match to. If it does not match, an error will be generated during the check stage. ### forbid -Works the same as require, but instead of requiring the properties to match, it forbids them from matching. \ No newline at end of file +Works the same as require, but instead of requiring the properties to match, it forbids them from matching. From 0e2223ca0fd7ffd4c0db65fb5cbc3feabeface66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Sun, 23 Mar 2025 10:23:10 +0100 Subject: [PATCH 09/32] Begin implementing copy-from dependencies --- problemtools/problem_yaml_parser/__init__.py | 76 +++++++++++++------- 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/problemtools/problem_yaml_parser/__init__.py b/problemtools/problem_yaml_parser/__init__.py index 533e4581..5b403f2d 100644 --- a/problemtools/problem_yaml_parser/__init__.py +++ b/problemtools/problem_yaml_parser/__init__.py @@ -69,6 +69,10 @@ def spec_path(self) -> 'Path': res.append("content") return Path(*res) + def up(self, levels=1) -> 'Path': + assert levels > 0 + return Path(*self.path[:-levels]) + def __str__(self) -> str: strings = [] for part in self.path: @@ -202,16 +206,18 @@ def load_config(self, config: dict, injected_data: dict) -> None: ready: list[tuple[Path, str]] = [] dependencies = {} depends_on = defaultdict(list) - solved = set() - for prop, spec in self.spec["properties"].items(): - parser = Parser.get_parser_type(spec) - deps = parser.PATH_DEPENDENCIES + solved = {Path()} + + def consider_prop(path: Path, key: str|int, deps: set[Path]): if deps: - dependencies[(Path(), prop)] = len(deps) + dependencies[(path, key)] = len(deps) for d in deps: - depends_on[d].append((Path(), prop)) + depends_on[d].append((path, key)) else: - ready.append((Path(), prop)) + ready.append((path, key)) + + for prop, spec in self.spec["properties"].items(): + consider_prop(Path(), prop, set(Parser.get_parser_type(spec).PATH_DEPENDENCIES)) while ready: p, c = ready.pop() @@ -221,33 +227,55 @@ def load_config(self, config: dict, injected_data: dict) -> None: p.index(self.data)[c] = parser.parse() if spec["type"] == "object": for prop, c_spec in self.spec["properties"].items(): - c_parser = Parser.get_parser_type(c_spec) - deps = set(c_parser.PATH_DEPENDENCIES) - solved - if deps: - dependencies[(full_path, prop)] = len(deps) - for d in deps: - depends_on[d].append((full_path, prop)) - else: - ready.append((full_path, prop)) + consider_prop(full_path, prop, set(Parser.get_parser_type(c_spec).PATH_DEPENDENCIES) - solved) elif spec["type"] == "list": - c_spec = Parser.get_parser_type(spec["content"]) - deps = set(c_parser.PATH_DEPENDENCIES) - solved + deps = set(Parser.get_parser_type(spec["content"]).PATH_DEPENDENCIES) - solved for i in range(len(full_path.index(self.data))): - if deps: - dependencies[(full_path, i)] = len(deps) - for d in deps: - depends_on[d].append((full_path, i)) - else: - ready.append((full_path, i)) + consider_prop(full_path, i, deps) for x in depends_on[full_path]: dependencies[x] -= 1 if dependencies[x] == 0: ready.append(x) del dependencies[x] + solved.add(full_path) if any(v > 0 for v in dependencies.items()): raise SpecificationError("Circular dependency in specification by parsing rules") self.data.update(injected_data) - # TODO: copy-from directives + + # TODO: resolve inherited dependencies, e.g. if baz copies foo/bar, but foo is copied from xyz + ready: list[tuple[Path, str]] = [] + dependencies = {} + depends_on = defaultdict(list) + solved = {Path()} + for prop in self.spec["properties"].keys(): + val = Path(prop).index(self.data) + deps = {Path.parse(val[1])} if isinstance(val, tuple) and val[0] == 'copy-from' else set() + consider_prop(Path(), prop, deps) + + while ready: + p, c = ready.pop() + full_path = Path.combine(p, c) + val = full_path.index(self.data) + if isinstance(val, tuple) and val[0] == 'copy-from': + copy_val = val[1].index(self.data) + assert copy_val is not None + p.index(self.data)[c] = copy_val + elif isinstance(val, dict): + for k, v in val.items(): + deps = {Path.parse(v[1])} if isinstance(v, tuple) and v[0] == 'copy-from' else set() + consider_prop(full_path, k, deps) + elif isinstance(val, list): + for k, v in enumerate(val): + deps = {Path.parse(v[1])} if isinstance(v, tuple) and v[0] == 'copy-from' else set() + consider_prop(full_path, k, deps) + for x in depends_on[full_path]: + dependencies[x] -= 1 + if dependencies[x] == 0: + ready.append(x) + del dependencies[x] + solved.add(full_path) + if any(v > 0 for v in dependencies.items()): + raise SpecificationError("Circular dependency in specification by copy-from directives") def check_config(self) -> None: pass From 78c9d364e7481795662f2517b851c88ea6c67bd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Sun, 23 Mar 2025 14:29:19 +0100 Subject: [PATCH 10/32] Implement system for loading config Co-authored-by: Zazmuz --- .../__init__.py | 195 +++++++++++------- .../notes.txt | 0 .../spec.md | 0 .../wip_format.yaml | 2 +- 4 files changed, 120 insertions(+), 77 deletions(-) rename problemtools/{problem_yaml_parser => config_parser}/__init__.py (80%) rename problemtools/{problem_yaml_parser => config_parser}/notes.txt (100%) rename problemtools/{problem_yaml_parser => config_parser}/spec.md (100%) rename problemtools/{problem_yaml_parser => config_parser}/wip_format.yaml (99%) diff --git a/problemtools/problem_yaml_parser/__init__.py b/problemtools/config_parser/__init__.py similarity index 80% rename from problemtools/problem_yaml_parser/__init__.py rename to problemtools/config_parser/__init__.py index 5b403f2d..cc66bcdf 100644 --- a/problemtools/problem_yaml_parser/__init__.py +++ b/problemtools/config_parser/__init__.py @@ -1,7 +1,7 @@ import re -from typing import Any, Callable, Literal, Pattern, Match, ParamSpec, TypeVar -from collections import defaultdict - +from typing import Any, Callable, Generator, Literal, Pattern, Match, ParamSpec, TypeVar +from __future__ import annotations +from collections import defaultdict, deque class SpecificationError(Exception): pass @@ -9,14 +9,19 @@ class SpecificationError(Exception): "string": str, "object": dict, "list": list, + "bool": bool, "int": int, "float": float } +def is_copyfrom(val: Any) -> bool: + return isinstance(val, tuple) and val[0] == 'copy-from' + + class Path: INDEXING_REGEX = re.compile(r'^([A-Za-z_0-9\-]+)\[(\d+)\]$') @staticmethod - def parse(path: str) -> 'Path': + def parse(path: str) -> Path: parts = path.split('/') res = [] for part in parts: @@ -29,7 +34,7 @@ def parse(path: str) -> 'Path': return Path(*res) @staticmethod - def combine(*parts: str|int|'Path') -> 'Path': + def combine(*parts: str|int|Path) -> Path: res = [] for part in parts: if isinstance(part, int): @@ -59,7 +64,7 @@ def index(self, data: dict) -> Any|None: rv = rv[part] return rv - def spec_path(self) -> 'Path': + def spec_path(self) -> Path: res = [] for part in self.path: if isinstance(part, str): @@ -69,10 +74,43 @@ def spec_path(self) -> 'Path': res.append("content") return Path(*res) - def up(self, levels=1) -> 'Path': + def data_paths(self, data: dict) -> list[Path]: + """Finds all data paths that a spec_path is pointing towards (meaning it will explore all items in lists)""" + def path_is_not_copyfrom(path: Path) -> bool: + return not is_copyfrom(path.index(data)) + out = [Path()] + state = 'base' + for part in self.path: + if state == 'base': + if part == 'properties': + state = 'object-properties' + elif part == 'content': + state = 'base' + new_out = [] + for path in out: + val = out.index(data) or [] + if is_copyfrom(val): # skip copied + continue + assert isinstance(val, list) + new_out.extend(Path.combine(path, i) for i in range(len(val))) + out = new_out + if len(out) == 0: + return [] + else: + assert False + elif state == 'object-properties': + combined_paths = [Path.combine(path, part) for path in out] + out = [*filter(path_is_not_copyfrom, combined_paths)] + + return out + + def up(self, levels=1) -> Path: assert levels > 0 return Path(*self.path[:-levels]) + def last_name(self) -> int|str: + return self.path[-1] + def __str__(self) -> str: strings = [] for part in self.path: @@ -200,83 +238,88 @@ def set_error_callback(self, fun: Callable): def set_warning_callback(self, fun: Callable): self.warning_func = fun - def load_config(self, config: dict, injected_data: dict) -> None: - self.data: dict = DefaultObjectParser(config, self.spec, Path(), self.warning_func, self.error_func).parse() + def invert_graph(dependency_graph: dict[Path, list[Path]]): + depends_on_graph = {k : [] for k in dependency_graph.keys()} + for dependant, dependencies in dependency_graph.items(): + for dependency in dependencies: + depends_on_graph[dependency].append(dependant) + return depends_on_graph + + def topo_sort(dependency_graph: dict[Path, list[Path]]) -> Generator[Path]: + # Dependancy graph: + # Path : [Depends on Path, ...] - ready: list[tuple[Path, str]] = [] - dependencies = {} - depends_on = defaultdict(list) - solved = {Path()} + in_degree = {p: len(l) for p, l in dependency_graph.items()} - def consider_prop(path: Path, key: str|int, deps: set[Path]): - if deps: - dependencies[(path, key)] = len(deps) - for d in deps: - depends_on[d].append((path, key)) - else: - ready.append((path, key)) - - for prop, spec in self.spec["properties"].items(): - consider_prop(Path(), prop, set(Parser.get_parser_type(spec).PATH_DEPENDENCIES)) - - while ready: - p, c = ready.pop() - full_path = Path.combine(p, c) - spec = full_path.spec_path().index(self.spec) - parser = Parser.get_parser_type(spec)(self.data, self.spec, full_path, self.warning_func, self.error_func) - p.index(self.data)[c] = parser.parse() + q = deque(p for p in dependency_graph.keys() if in_degree[p] == 0) + + depends_on_graph = Metadata.invert_graph(dependency_graph) + + while q: + current = q.popleft() + yield current + + for dependency in depends_on_graph[current]: + in_degree[dependency] -= 1 + + if in_degree[dependency] == 0: + q.append(dependency) + + if any(x > 0 for x in in_degree.values()): + raise SpecificationError("Unresolvable, cyclic dependencies") + + def get_path_dependencies(self) -> dict[Path, list[Path]]: + stack = [(Path(), Path("properties", prop)) for prop in self.spec['properties']] + graph = {} + while stack: + parent, p = stack.pop() + spec = p.index(self.spec) + deps = Parser.get_parser_type(spec).PATH_DEPENDENCIES + if len(parent.path) > 0: + deps.append(parent) + graph[p] = deps if spec["type"] == "object": - for prop, c_spec in self.spec["properties"].items(): - consider_prop(full_path, prop, set(Parser.get_parser_type(c_spec).PATH_DEPENDENCIES) - solved) + stack.extend((p, Path.combine(p, "properties", prop)) for prop in spec["properties"]) elif spec["type"] == "list": - deps = set(Parser.get_parser_type(spec["content"]).PATH_DEPENDENCIES) - solved - for i in range(len(full_path.index(self.data))): - consider_prop(full_path, i, deps) - for x in depends_on[full_path]: - dependencies[x] -= 1 - if dependencies[x] == 0: - ready.append(x) - del dependencies[x] - solved.add(full_path) - if any(v > 0 for v in dependencies.items()): - raise SpecificationError("Circular dependency in specification by parsing rules") - self.data.update(injected_data) + stack.append((p, Path.combine(p, 'content'))) + return graph + + def get_copy_dependencies(self) -> dict[Path, list[Path]]: + stack = [(Path(), Path(child)) for child in self.data.keys()] + graph = {Path():[]} + + while stack: + parent, path = stack.pop() + val = path.index(self.data) + graph[parent].append(path) + deps = [] + if is_copyfrom(val): + deps.append(val[1]) + graph[path] = deps + if isinstance(val, dict): + stack.extend((path, Path.combine(path, child)) for child in val.keys()) + elif isinstance(val, list): + stack.extend((path, Path.combine(path, i)) for i in range(len(val))) + + return graph + + def load_config(self, config: dict, injected_data: dict) -> None: + self.data: dict = DefaultObjectParser(config, self.spec, Path(), self.warning_func, self.error_func).parse() - # TODO: resolve inherited dependencies, e.g. if baz copies foo/bar, but foo is copied from xyz - ready: list[tuple[Path, str]] = [] - dependencies = {} - depends_on = defaultdict(list) - solved = {Path()} - for prop in self.spec["properties"].keys(): - val = Path(prop).index(self.data) - deps = {Path.parse(val[1])} if isinstance(val, tuple) and val[0] == 'copy-from' else set() - consider_prop(Path(), prop, deps) + for cfg_path in Metadata.topo_sort(self.get_path_dependencies()): + spec = cfg_path.index(self.spec) + for full_path in cfg_path.data_paths(self.data): + parser = Parser.get_parser_type(spec)(self.data, self.spec, full_path, self.warning_func, self.error_func) + full_path.up().index(self.data)[full_path.last_name()] = parser.parse() - while ready: - p, c = ready.pop() - full_path = Path.combine(p, c) + self.data.update(injected_data) + + for full_path in Metadata.topo_sort(self.get_copy_dependencies()): val = full_path.index(self.data) - if isinstance(val, tuple) and val[0] == 'copy-from': + if is_copyfrom(val): copy_val = val[1].index(self.data) - assert copy_val is not None - p.index(self.data)[c] = copy_val - elif isinstance(val, dict): - for k, v in val.items(): - deps = {Path.parse(v[1])} if isinstance(v, tuple) and v[0] == 'copy-from' else set() - consider_prop(full_path, k, deps) - elif isinstance(val, list): - for k, v in enumerate(val): - deps = {Path.parse(v[1])} if isinstance(v, tuple) and v[0] == 'copy-from' else set() - consider_prop(full_path, k, deps) - for x in depends_on[full_path]: - dependencies[x] -= 1 - if dependencies[x] == 0: - ready.append(x) - del dependencies[x] - solved.add(full_path) - if any(v > 0 for v in dependencies.items()): - raise SpecificationError("Circular dependency in specification by copy-from directives") - + full_path.up().index(self.data)[full_path.last_name()] = copy_val + def check_config(self) -> None: pass diff --git a/problemtools/problem_yaml_parser/notes.txt b/problemtools/config_parser/notes.txt similarity index 100% rename from problemtools/problem_yaml_parser/notes.txt rename to problemtools/config_parser/notes.txt diff --git a/problemtools/problem_yaml_parser/spec.md b/problemtools/config_parser/spec.md similarity index 100% rename from problemtools/problem_yaml_parser/spec.md rename to problemtools/config_parser/spec.md diff --git a/problemtools/problem_yaml_parser/wip_format.yaml b/problemtools/config_parser/wip_format.yaml similarity index 99% rename from problemtools/problem_yaml_parser/wip_format.yaml rename to problemtools/config_parser/wip_format.yaml index b967a450..6296d0fd 100644 --- a/problemtools/problem_yaml_parser/wip_format.yaml +++ b/problemtools/config_parser/wip_format.yaml @@ -32,7 +32,7 @@ properties: type: string default: unknown alternatives: - unknown: + unknown: warn: License is unknown require: - rights_owner: ".+" From f9eaa785a83c917fc0f88d7e7f6d9dee26fb2737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Sun, 23 Mar 2025 18:04:27 +0100 Subject: [PATCH 11/32] Make tests and fix bugs Co-authored-by: Zazmuz --- problemtools/config_parser/__init__.py | 519 ++++++++++++------ .../config/basic_config.yaml | 38 ++ .../config/complex_copies.yaml | 43 ++ .../config_alternatives_misspelled.yaml | 21 + .../config/config_field_misspelled.yaml | 21 + .../config/config_type_misspelled.yaml | 21 + .../config/config_type_value_misspelled.yaml | 21 + .../config/empty_config.yaml | 0 .../config/follows_basic_config.yaml | 3 + .../config/follows_config_misspelled.yaml | 2 + .../tests/config_parser_tests/helper.py | 28 + .../test_basic_config_valid.py | 20 + .../test_complex_copy_from.py | 33 ++ .../config_parser_tests/test_config_path.py | 47 ++ .../test_invalid_config.py | 47 ++ 15 files changed, 705 insertions(+), 159 deletions(-) create mode 100644 problemtools/tests/config_parser_tests/config/basic_config.yaml create mode 100644 problemtools/tests/config_parser_tests/config/complex_copies.yaml create mode 100644 problemtools/tests/config_parser_tests/config/config_alternatives_misspelled.yaml create mode 100644 problemtools/tests/config_parser_tests/config/config_field_misspelled.yaml create mode 100644 problemtools/tests/config_parser_tests/config/config_type_misspelled.yaml create mode 100644 problemtools/tests/config_parser_tests/config/config_type_value_misspelled.yaml create mode 100644 problemtools/tests/config_parser_tests/config/empty_config.yaml create mode 100644 problemtools/tests/config_parser_tests/config/follows_basic_config.yaml create mode 100644 problemtools/tests/config_parser_tests/config/follows_config_misspelled.yaml create mode 100644 problemtools/tests/config_parser_tests/helper.py create mode 100644 problemtools/tests/config_parser_tests/test_basic_config_valid.py create mode 100644 problemtools/tests/config_parser_tests/test_complex_copy_from.py create mode 100644 problemtools/tests/config_parser_tests/test_config_path.py create mode 100644 problemtools/tests/config_parser_tests/test_invalid_config.py diff --git a/problemtools/config_parser/__init__.py b/problemtools/config_parser/__init__.py index cc66bcdf..48ad4540 100644 --- a/problemtools/config_parser/__init__.py +++ b/problemtools/config_parser/__init__.py @@ -1,31 +1,47 @@ +from __future__ import annotations import re from typing import Any, Callable, Generator, Literal, Pattern, Match, ParamSpec, TypeVar -from __future__ import annotations from collections import defaultdict, deque +from copy import deepcopy + + class SpecificationError(Exception): pass + type_mapping = { "string": str, "object": dict, "list": list, "bool": bool, "int": int, - "float": float + "float": float, +} + +type_field_mapping = { + "*": ["default", "type", "flags", "parsing"], + "string": ["alternatives"], + "bool": ["alternatives"], + "int": ["alternatives"], + "float": ["alternatives"], + "object": ["required", "properties"], + "list": ["content"], } + def is_copyfrom(val: Any) -> bool: - return isinstance(val, tuple) and val[0] == 'copy-from' + return isinstance(val, tuple) and val[0] == "copy-from" class Path: - INDEXING_REGEX = re.compile(r'^([A-Za-z_0-9\-]+)\[(\d+)\]$') + INDEXING_REGEX = re.compile(r"^([A-Za-z_0-9\-]+)\[(\d+)\]$") + @staticmethod def parse(path: str) -> Path: - parts = path.split('/') + parts = path.split("/") res = [] for part in parts: - m = Path.INDEXING_REGEX.match(part) + m = Path.INDEXING_REGEX.match(part) if m: res.append(m.group(1)) res.append(int(m.group(2))) @@ -34,7 +50,7 @@ def parse(path: str) -> Path: return Path(*res) @staticmethod - def combine(*parts: str|int|Path) -> Path: + def combine(*parts: str | int | Path) -> Path: res = [] for part in parts: if isinstance(part, int): @@ -42,13 +58,13 @@ def combine(*parts: str|int|Path) -> Path: continue if isinstance(part, str): part = Path.parse(part) - res.extend(list(part.path)) #type: ignore + res.extend(list(part.path)) # type: ignore return Path(*res) - def __init__(self, *path: str|int) -> None: + def __init__(self, *path: str | int) -> None: self.path = path - def index(self, data: dict) -> Any|None: + def index(self, data: dict) -> Any | None: rv = data for part in self.path: if isinstance(part, int): @@ -69,27 +85,29 @@ def spec_path(self) -> Path: for part in self.path: if isinstance(part, str): res.append("properties") - res.append("part") + res.append(part) elif isinstance(part, int): res.append("content") return Path(*res) def data_paths(self, data: dict) -> list[Path]: """Finds all data paths that a spec_path is pointing towards (meaning it will explore all items in lists)""" + def path_is_not_copyfrom(path: Path) -> bool: return not is_copyfrom(path.index(data)) + out = [Path()] - state = 'base' + state = "base" for part in self.path: - if state == 'base': - if part == 'properties': - state = 'object-properties' - elif part == 'content': - state = 'base' + if state == "base": + if part == "properties": + state = "object-properties" + elif part == "content": + state = "base" new_out = [] for path in out: - val = out.index(data) or [] - if is_copyfrom(val): # skip copied + val = path.index(data) or [] + if is_copyfrom(val): # skip copied continue assert isinstance(val, list) new_out.extend(Path.combine(path, i) for i in range(len(val))) @@ -98,9 +116,10 @@ def path_is_not_copyfrom(path: Path) -> bool: return [] else: assert False - elif state == 'object-properties': + elif state == "object-properties": combined_paths = [Path.combine(path, part) for path in out] out = [*filter(path_is_not_copyfrom, combined_paths)] + state = 'base' return out @@ -108,67 +127,80 @@ def up(self, levels=1) -> Path: assert levels > 0 return Path(*self.path[:-levels]) - def last_name(self) -> int|str: + def last_name(self) -> int | str: return self.path[-1] def __str__(self) -> str: strings = [] for part in self.path: if isinstance(part, int): - strings[-1] += f'[{part}]' + strings[-1] += f"[{part}]" else: strings.append(part) - return '/'.join(strings) - + return "/".join(strings) + + def __repr__(self): + return f"Path({self})" + def __eq__(self, value): return self.path == value.path - + def __hash__(self): return hash(self.path) + class AlternativeMatch: def __init__(self, matchstr): raise NotImplementedError("Specialize in subclass") def check(self, val) -> bool: raise NotImplementedError("Specialize in subclass") - + @staticmethod - def get_matcher(type, matchstr) -> 'AlternativeMatch': + def get_matcher(type, matchstr) -> "AlternativeMatch": matchers = { - 'string': StringMatch, - 'int': IntMatch, - 'float': FloatMatch, - 'bool': BoolMatch + "string": StringMatch, + "int": IntMatch, + "float": FloatMatch, + "bool": BoolMatch, } assert type in matchers return matchers[type](matchstr) + class StringMatch(AlternativeMatch): def __init__(self, matchstr): self.regex = re.compile(matchstr) - + def check(self, val) -> bool: return self.regex.match(val) - + def __str__(self) -> str: self.regex.pattern + class IntMatch(AlternativeMatch): - def __init__(self, matchstr: str): + def __init__(self, matchstr: str | int): + if isinstance(matchstr, int): + self.start = self.end = matchstr + return try: - if matchstr.count(':') > 1: + if matchstr.count(":") > 1: raise ValueError - if ':' in matchstr: - self.start, self.end = [int(p) if p else None for p in map(str.strip, matchstr.split())] + if ":" in matchstr: + self.start, self.end = [ + int(p) if p else None for p in map(str.strip, matchstr.split()) + ] else: matchstr = matchstr.strip() if not matchstr: - raise SpecificationError('Match string for integer was left empty') + raise SpecificationError("Match string for integer was left empty") self.start = self.end = int(matchstr) except ValueError: - raise SpecificationError('Int match string should be of the form "A:B" where A and B can be parsed as ints or left empty, or a single integer') - + raise SpecificationError( + 'Int match string should be of the form "A:B" where A and B can be parsed as ints or left empty, or a single integer' + ) + def check(self, val) -> bool: if not isinstance(val, int): return False @@ -179,45 +211,56 @@ def check(self, val) -> bool: if val > self.start: return False return True - + def __str__(self): if A == B: return str(A) A = str(self.start) if self.start is not None else "" B = str(self.end) if self.end is not None else "" - return f'{A}:{B}' + return f"{A}:{B}" + class FloatMatch(AlternativeMatch): def __init__(self, matchstr: str): try: - if matchstr.count(':') != 1: + if matchstr.count(":") != 1: raise ValueError - first, second = [p.strip() for p in matchstr.split()] - self.start = float(first) if first else float('-inf') - self.end = float(second) if second else float('inf') + first, second = [p.strip() for p in matchstr.split(":")] + self.start = float(first) if first else float("-inf") + self.end = float(second) if second else float("inf") except ValueError: - raise SpecificationError('Float match string should be of the form "A:B" where A and B can be parsed as floats or left empty') - + raise SpecificationError( + 'Float match string should be of the form "A:B" where A and B can be parsed as floats or left empty' + ) + def check(self, val) -> bool: return isinstance(val, float) and self.start <= val <= self.end - + def __str__(self): - A = str(self.start) if self.start != float('-inf') else "" - B = str(self.end) if self.end != float('inf') else "" - return f'{A}:{B}' + A = str(self.start) if self.start != float("-inf") else "" + B = str(self.end) if self.end != float("inf") else "" + return f"{A}:{B}" + class BoolMatch(AlternativeMatch): - def __init__(self, matchstr: str): + def __init__(self, matchstr: str | bool): + if isinstance(matchstr, bool): + self.val = matchstr + return matchstr = matchstr.strip().lower() - assert matchstr in ('true', 'false') - self.val = {'true':True,'false':False}[matchstr] - + if matchstr not in {"true", "false"}: + raise SpecificationError( + 'Bool match string should be either "true" or "false"' + ) + self.val = {"true": True, "false": False}[matchstr] + def check(self, val) -> bool: return isinstance(val, bool) and val == self.val - + def __str__(self): return str(self.val) + class Metadata: def __init__(self, specification: dict) -> None: self.spec = specification @@ -225,9 +268,9 @@ def __init__(self, specification: dict) -> None: self.warning_func = lambda s: print(f"WARNING: {s}") self.data = None - def __getitem__(self, key: str|Path) -> Any: + def __getitem__(self, key: str | Path) -> Any: if self.data is None: - raise Exception('data has not been loaded yet') + raise Exception("data has not been loaded yet") if isinstance(key, str): return Path.parse(key).index(self.data) return key.index(self.data) @@ -239,7 +282,7 @@ def set_warning_callback(self, fun: Callable): self.warning_func = fun def invert_graph(dependency_graph: dict[Path, list[Path]]): - depends_on_graph = {k : [] for k in dependency_graph.keys()} + depends_on_graph = {k: [] for k in dependency_graph.keys()} for dependant, dependencies in dependency_graph.items(): for dependency in dependencies: depends_on_graph[dependency].append(dependant) @@ -248,9 +291,9 @@ def invert_graph(dependency_graph: dict[Path, list[Path]]): def topo_sort(dependency_graph: dict[Path, list[Path]]) -> Generator[Path]: # Dependancy graph: # Path : [Depends on Path, ...] - + in_degree = {p: len(l) for p, l in dependency_graph.items()} - + q = deque(p for p in dependency_graph.keys() if in_degree[p] == 0) depends_on_graph = Metadata.invert_graph(dependency_graph) @@ -266,28 +309,38 @@ def topo_sort(dependency_graph: dict[Path, list[Path]]) -> Generator[Path]: q.append(dependency) if any(x > 0 for x in in_degree.values()): - raise SpecificationError("Unresolvable, cyclic dependencies") + nodes = ", ".join( + str(path) for path, count in in_degree.items() if count > 0 + ) + raise SpecificationError( + f"Unresolvable, cyclic dependencies involving ({nodes})" + ) def get_path_dependencies(self) -> dict[Path, list[Path]]: - stack = [(Path(), Path("properties", prop)) for prop in self.spec['properties']] + stack = [(Path(), Path("properties", prop)) for prop in self.spec["properties"]] graph = {} while stack: parent, p = stack.pop() spec = p.index(self.spec) - deps = Parser.get_parser_type(spec).PATH_DEPENDENCIES + deps = Parser.get_parser_type(spec).get_dependencies() if len(parent.path) > 0: deps.append(parent) graph[p] = deps + for dep in deps: + assert dep != p if spec["type"] == "object": - stack.extend((p, Path.combine(p, "properties", prop)) for prop in spec["properties"]) + stack.extend( + (p, Path.combine(p, "properties", prop)) + for prop in spec["properties"] + ) elif spec["type"] == "list": - stack.append((p, Path.combine(p, 'content'))) + stack.append((p, Path.combine(p, "content"))) return graph def get_copy_dependencies(self) -> dict[Path, list[Path]]: stack = [(Path(), Path(child)) for child in self.data.keys()] - graph = {Path():[]} - + graph = {Path(): []} + while stack: parent, path = stack.pop() val = path.index(self.data) @@ -304,64 +357,103 @@ def get_copy_dependencies(self) -> dict[Path, list[Path]]: return graph def load_config(self, config: dict, injected_data: dict) -> None: - self.data: dict = DefaultObjectParser(config, self.spec, Path(), self.warning_func, self.error_func).parse() - + self.data: dict = DefaultObjectParser( + config, self.spec, Path(), self.warning_func, self.error_func + ).parse() for cfg_path in Metadata.topo_sort(self.get_path_dependencies()): spec = cfg_path.index(self.spec) for full_path in cfg_path.data_paths(self.data): - parser = Parser.get_parser_type(spec)(self.data, self.spec, full_path, self.warning_func, self.error_func) + parser = Parser.get_parser_type(spec)( + self.data, self.spec, full_path, self.warning_func, self.error_func + ) full_path.up().index(self.data)[full_path.last_name()] = parser.parse() - self.data.update(injected_data) for full_path in Metadata.topo_sort(self.get_copy_dependencies()): val = full_path.index(self.data) if is_copyfrom(val): - copy_val = val[1].index(self.data) + if any(isinstance(part, int) for part in val[1].path): + raise SpecificationError( + f"copy-from directives may not copy from lists (property: {full_path}, copy-property: {val[1]})" + ) + copy_val = deepcopy(val[1].index(self.data)) + if copy_val is None: + raise SpecificationError( + f"copy-from directive returned None (property: {full_path}, copy-property: {val[1]})" + ) + if not isinstance( + copy_val, + type_mapping[full_path.spec_path().index(self.spec)["type"]], + ): + raise SpecificationError( + f"copy-from directive provided the wrong type (property: {full_path}, copy-property: {val[1]})" + ) full_path.up().index(self.data)[full_path.last_name()] = copy_val - + def check_config(self) -> None: pass + class Parser: NAME: str = "" - PATH_DEPENDENCIES: list[Path] = [] OUTPUT_TYPE: str = "" - def __init__(self, data: dict, specification: dict, path: Path, warning_func: Callable, error_func: Callable): + @staticmethod + def get_dependencies() -> list[Path]: + return [] + + def __init__( + self, + data: dict, + specification: dict, + path: Path, + warning_func: Callable, + error_func: Callable, + ): self.data = data self.specification = specification self.path = path self.spec_path = path.spec_path() self.warning_func = warning_func self.error_func = error_func - + if not self.NAME: - raise NotImplementedError("Subclasses of Parser need to set the name of the parsing rule") + raise NotImplementedError( + "Subclasses of Parser need to set the name of the parsing rule" + ) if not self.OUTPUT_TYPE: - raise NotImplementedError("Subclasses of Parser need to set the output type of the parsing rule") - + raise NotImplementedError( + "Subclasses of Parser need to set the output type of the parsing rule" + ) + required_type = self.spec_path.index(specification)["type"] if required_type != self.OUTPUT_TYPE: - raise SpecificationError(f"Parsing rule for {path} outputs {self.OUTPUT_TYPE}, but the output should be of type {required_type}") - + raise SpecificationError( + f"Parsing rule for {path} outputs {self.OUTPUT_TYPE}, but the output should be of type {required_type}" + ) + if self.OUTPUT_TYPE in ("string", "int", "float", "bool"): - alternatives = Path.combine(self.spec_path, "alternatives").index(self.specification) + alternatives = Path.combine(self.spec_path, "alternatives").index( + self.specification + ) if alternatives is None: self.alternatives = None else: - self.alternatives = [(AlternativeMatch.get_matcher(self.OUTPUT_TYPE, key), val) for key, val in alternatives.items()] + self.alternatives = [ + (AlternativeMatch.get_matcher(self.OUTPUT_TYPE, key), val) + for key, val in alternatives.items() + ] else: self.alternatives = None - + def parse(self): val = self.path.index(self.data) out = self._parse(val) - + flags = Path.combine(self.spec_path, "flags").index(self.specification) or [] - if 'deprecated' in flags and out is not None: - self.warning_func(f'deprecated property was provided ({self.path})') - + if "deprecated" in flags and out is not None: + self.warning_func(f"deprecated property was provided ({self.path})") + if self.alternatives is not None: found_match = False for matcher, checks in self.alternatives: @@ -370,155 +462,234 @@ def parse(self): if "warn" in checks: self.warning_func(checks["warn"]) if not found_match: - alts = ', '.join(f'"{matcher}"' for matcher in self.alternatives.keys()) - self.error_func(f"Property {self.path} did not match any of the specified alternatives ({alts})") + alts = ", ".join(f'"{matcher}"' for matcher in self.alternatives.keys()) + self.error_func( + f"Property {self.path} did not match any of the specified alternatives ({alts})" + ) out = None if out is None: fallback = Path.combine(self.spec_path, "default").index(self.specification) if fallback is not None: - if isinstance(fallback, str) and fallback.startswith('copy-from:'): - fallback = ('copy-from', fallback.split(':')[1]) + if isinstance(fallback, str) and fallback.startswith("copy-from:"): + fallback = ("copy-from", Path.parse(fallback.split(":")[1])) return fallback return type_mapping[self.OUTPUT_TYPE]() - - if not (isinstance(out, tuple) or isinstance(out, type_mapping[self.OUTPUT_TYPE])): - raise SpecificationError(f'Parsing rule "{self.NAME}" did not output the correct type. Output was: {out}') + + if not ( + isinstance(out, tuple) or isinstance(out, type_mapping[self.OUTPUT_TYPE]) + ): + raise SpecificationError( + f'Parsing rule "{self.NAME}" did not output the correct type. Output was: {out}' + ) return out - + def _parse(self, val): raise NotImplementedError("Subclasses of Parse need to implement _parse()") + def smallest_edit_dist(a: str, b: list[str]) -> str: + def edit_dist(a: str, b: str) -> int: + n = len(a) + m = len(b) + dp = [[0] * (m + 1) for _ in range(n + 1)] + for i in range(n + 1): + dp[i][0] = i + for j in range(m + 1): + dp[0][j] = j + for i in range(1, n + 1): + for j in range(1, m + 1): + if a[i - 1] == b[j - 1]: + dp[i][j] = dp[i - 1][j - 1] + else: + dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) + return dp[n][m] + best = b[0] + best_dist = edit_dist(a, best) + for s in b[1:]: + dist = edit_dist(a, s) + if dist < best_dist: + best = s + best_dist = dist + return best + @staticmethod def get_parser_type(specification: dict) -> type: - parsing_rule = specification.get('parsing') + parsing_rule = specification.get("parsing") if parsing_rule is None: typ = specification.get("type") - parsing_rule = f'default-{typ}-parser' + + if typ is None: + had = "', '".join(specification.keys()) + raise SpecificationError( + f"Specification did not have a MUST HAVE field 'type', the provided fields were: ('{had}')" + ) + + if typ not in type_mapping: + valid = "', '".join(type_mapping.keys()) + closest = Parser.smallest_edit_dist(typ, [*type_mapping.keys()]) + raise SpecificationError( + f"Type '{typ}' is not a valid type. Did you mean: '{closest}'? Otherwise valid types are: ('{valid}')" + ) + + + fields = specification.keys() + allowed_fields = type_field_mapping.get(typ, []) + type_field_mapping["*"] + for field in fields: + if field not in allowed_fields: + raise SpecificationError( + f"Field '{field}' is not allowed for type '{typ}', did you mean '{Parser.smallest_edit_dist(field, allowed_fields)}'?" + ) + + parsing_rule = f"default-{typ}-parser" if parsing_rule not in parsers: raise SpecificationError(f'Parser "{parsing_rule}" is not implemented') return parsers[parsing_rule] - + class DefaultStringParser(Parser): NAME = "default-string-parser" OUTPUT_TYPE = "string" - + def _parse(self, val): if val is None: return None if not isinstance(val, str): - self.warning_func(f'Expected value of type string but got {val}, casting to string ({self.path})') + self.warning_func( + f"Expected value of type string but got {val}, casting to string ({self.path})" + ) val = str(val) return val + class DefaultObjectParser(Parser): NAME = "default-object-parser" OUTPUT_TYPE = "object" - + def _parse(self, val): if val is None: return None - + if not isinstance(val, dict): - self.error_func(f'Expected an object, got {val} ({self.path})') + self.error_func(f"Expected an object, got {val} ({self.path})") return None - - required = Path.combine(self.spec_path, 'required').index(self.specification) or [] + + required = ( + Path.combine(self.spec_path, "required").index(self.specification) or [] + ) for req in required: req_path = Path.combine(self.path, req) if req_path.index(self.data) is None: - self.error_func(f'Missing required property: {req_path}') - + self.error_func(f"Missing required property: {req_path}") + remove = [] - known_props = Path.combine(self.spec_path, 'properties').index(self.specification) + known_props = Path.combine(self.spec_path, "properties").index( + self.specification + ) for prop in val.keys(): if prop not in known_props: - self.warning_func(f'Unknown property: {Path.combine(self.path, prop)}') + self.warning_func(f"Unknown property: {Path.combine(self.path, prop)}") remove.append(prop) for r in remove: del val[r] return val + class DefaultListParser(Parser): NAME = "default-list-parser" OUTPUT_TYPE = "list" - + def _parse(self, val): if val is None: return None - + if not isinstance(val, list): - self.error_func(f'Expected a list, got {val} ({self.path})') + self.error_func(f"Expected a list, got {val} ({self.path})") return None - - return val - + + return val + + class DefaultIntParser(Parser): NAME = "default-int-parser" OUTPUT_TYPE = "int" - + def _parse(self, val): if val is None: return None - + if not isinstance(val, int): try: cast = int(val) - self.warning_func(f'Expected type int, got {val}. Casting to {cast} ({self.path})') + self.warning_func( + f"Expected type int, got {val}. Casting to {cast} ({self.path})" + ) val = cast except ValueError: - self.error_func(f'Expected a int, got {val} ({self.path})') + self.error_func(f"Expected a int, got {val} ({self.path})") return None - + return val + class DefaultFloatParser(Parser): NAME = "default-float-parser" OUTPUT_TYPE = "float" - + def _parse(self, val): if val is None: return None - + if not isinstance(val, float): try: cast = float(val) - self.warning_func(f'Expected type float, got {val}. Casting to {cast} ({self.path})') + self.warning_func( + f"Expected type float, got {val}. Casting to {cast} ({self.path})" + ) val = cast except ValueError: - self.error_func(f'Expected a float, got {val} ({self.path})') + self.error_func(f"Expected a float, got {val} ({self.path})") return None - + return val - + + class DefaultBoolParser(Parser): NAME = "default-bool-parser" OUTPUT_TYPE = "bool" - + def _parse(self, val): if val is None: return None - + if isinstance(val, str): if val.lower() in ("true", "false"): - interpretation = val.lower() == 'true' - self.warning_func(f'Expected type bool but got a string "{val}" which will be interpreted as {interpretation} ({self.path})') + interpretation = val.lower() == "true" + self.warning_func( + f'Expected type bool but got a string "{val}" which will be interpreted as {interpretation} ({self.path})' + ) val = interpretation else: self.error_func(f'Expected type bool but got "{val}" ({self.path})') return None - + if not isinstance(val, bool): - self.error_func(f'Expected type bool, got {val} ({self.path})') + self.error_func(f"Expected type bool, got {val} ({self.path})") return None - + return val -parsers = {p.NAME: p for p in - [DefaultObjectParser, DefaultListParser, DefaultStringParser, - DefaultIntParser, DefaultFloatParser, DefaultBoolParser] + +parsers = { + p.NAME: p + for p in [ + DefaultObjectParser, + DefaultListParser, + DefaultStringParser, + DefaultIntParser, + DefaultFloatParser, + DefaultBoolParser, + ] } @@ -550,7 +721,7 @@ def __init__(self, layout: dict, metadata: Metadata, path: str = ""): super().__init__(layout, metadata, path) alternatives = self.layout.get("alternatives") if alternatives: - self.patterns = {alt: re.compile('^' + alt + '$') for alt in alternatives} + self.patterns = {alt: re.compile("^" + alt + "$") for alt in alternatives} else: self.patterns = None @@ -558,74 +729,98 @@ def verify(self, value): if value is None: value = self.layout.get("default", "") if not isinstance(value, str): - self.metadata.warning_func(f'Property {self.path} was expected to be of type string') + self.metadata.warning_func( + f"Property {self.path} was expected to be of type string" + ) value = str(value) if self.patterns: if not any(pattern.match(value) for pattern in self.patterns.values()): - self.metadata.error_func(f"Property {self.path} is {value} but must match one of {list(self.patterns.keys())}") + self.metadata.error_func( + f"Property {self.path} is {value} but must match one of {list(self.patterns.keys())}" + ) value = self.layout.get("default", "") return value def check(self, value): if not self.patterns: return - match = next((key for key, pattern in self.patterns.items() if pattern.match(value)), None) + match = next( + (key for key, pattern in self.patterns.items() if pattern.match(value)), + None, + ) checks = self.layout["alternatives"][match] for forbidden in checks.get("forbid", []): - other_path, expected = forbidden.split(':') + other_path, expected = forbidden.split(":") if self.metadata[other_path] == expected: - self.metadata.error_func(f"Property {self.path} has value {value} which forbids property {other_path} to have value {expected}") + self.metadata.error_func( + f"Property {self.path} has value {value} which forbids property {other_path} to have value {expected}" + ) for required in checks.get("required", []): - if not self.metadata[required]: #TODO: This is not a good way to handle this check I think - self.metadata.error_func(f"Property {self.path} has value {value} which requires property {required}") + if not self.metadata[ + required + ]: # TODO: This is not a good way to handle this check I think + self.metadata.error_func( + f"Property {self.path} has value {value} which requires property {required}" + ) if "warn" in checks: self.metadata.warning_func(checks["warn"]) + class ObjectValidator(BaseValidator): def verify(self, value): if value is None: return self.layout.get("default", {}) if self.layout.get("parsing") == "legacy-validation": if not isinstance(value, str): - self.metadata.error_func(f"Property {self.path} was expected to be a string") + self.metadata.error_func( + f"Property {self.path} was expected to be a string" + ) return {} elements = value.split() value = { "type": elements[0], "interactive": "interactive" in elements[1:], - "score": "score" in elements[1:] + "score": "score" in elements[1:], } if not isinstance(value, dict): - self.metadata.error_func(f"property {self.path} was expected to be a dictionary") + self.metadata.error_func( + f"property {self.path} was expected to be a dictionary" + ) return {} for prop in self.layout.get("required", []): if prop not in value: - self.metadata.error_func(f"Missing required property: {self.path}/{prop}") + self.metadata.error_func( + f"Missing required property: {self.path}/{prop}" + ) for prop in value.keys(): if prop not in self.layout["properties"]: self.metadata.warning_func(f"Unknown property: {self.path}/{prop}") return value - + def check(self, value): pass + class ListValidator(BaseValidator): def verify(self, value): if value is None: return self.layout.get("default", []) if self.layout.get("parsing") == "space-separated-strings": if not isinstance(value, str): - self.metadata.error_func(f"Property {self.path} was expected to be a string") + self.metadata.error_func( + f"Property {self.path} was expected to be a string" + ) return [] value = value.split() if not isinstance(value, list): self.metadata.error_func(f"property {self.path} was expected to be a list") return [] return value - + def check(self, value): pass + class FloatValidator(BaseValidator): def verify(self, value): if value is None: @@ -634,13 +829,16 @@ def verify(self, value): try: value = float(value) except Exception: - self.metadata.error_func(f"Property {self.path} was expected to be a float") + self.metadata.error_func( + f"Property {self.path} was expected to be a float" + ) value = self.layout.get("default", 0.0) return value - + def check(self, value): pass + class IntValidator(BaseValidator): def verify(self, value): if value is None: @@ -648,12 +846,15 @@ def verify(self, value): if not isinstance(value, int): try: value = int(value) - self.metadata.warning_func(f"Property {self.path} should be of type integer, interpreting as {value}") + self.metadata.warning_func( + f"Property {self.path} should be of type integer, interpreting as {value}" + ) except Exception: - self.metadata.error_func(f"Property {self.path} was expected to be an integer") + self.metadata.error_func( + f"Property {self.path} was expected to be an integer" + ) value = self.layout.get("default", 0) return value def check(self, value): pass - diff --git a/problemtools/tests/config_parser_tests/config/basic_config.yaml b/problemtools/tests/config_parser_tests/config/basic_config.yaml new file mode 100644 index 00000000..f2b870b7 --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/basic_config.yaml @@ -0,0 +1,38 @@ +type: object +require: + - bar + - boz +properties: + foo: + flags: + - deprecated + type: int + default: 1337 + bar: + type: string + alternatives: + x: {} + y: + warn: watch out you are using y + z: + require: + - baz: true + baz: + type: bool + default: True + alternatives: + True: + warn: "true is now deprecated" + False: {} + boz: + type: float + default: 3.5 + alternatives: + "3.1414:3.1416": + require: + - foo: 1337 + warn: this is pie not real pi 7/2 + ":": {} # Allows all values + copied: + type: string + default: copy-from:external/cool-string diff --git a/problemtools/tests/config_parser_tests/config/complex_copies.yaml b/problemtools/tests/config_parser_tests/config/complex_copies.yaml new file mode 100644 index 00000000..fcd96f2f --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/complex_copies.yaml @@ -0,0 +1,43 @@ +type: object +properties: + a: + type: object + default: "copy-from:h/j" + properties: + k: + type: int + default: 1 + l: + type: int + default: 1 + b: + type: object + properties: + c: + type: int + default: 123 + d: + type: object + properties: + k: + type: int + default: 2 + l: + type: int + default: "copy-from:h/i" + h: + type: object + properties: + i: + type: int + default: 1000 + j: + type: object + default: "copy-from:b/d" + properties: + k: + type: int + default: 13 + l: + type: int + default: "copy-from:b/c" diff --git a/problemtools/tests/config_parser_tests/config/config_alternatives_misspelled.yaml b/problemtools/tests/config_parser_tests/config/config_alternatives_misspelled.yaml new file mode 100644 index 00000000..339036c9 --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/config_alternatives_misspelled.yaml @@ -0,0 +1,21 @@ +type: object +require: + - bar + - boz +properties: + foo: + flags: + - deprecated + type: int + default: 69 + bar: + type: string + alternatives: + x: {} + baz: + type: bool + default: True + alternatives: + Sann: + warn: "true is now deprecated" + Falsk: {} \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/config/config_field_misspelled.yaml b/problemtools/tests/config_parser_tests/config/config_field_misspelled.yaml new file mode 100644 index 00000000..246bbfc7 --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/config_field_misspelled.yaml @@ -0,0 +1,21 @@ +type: object +require: + - bar + - boz +properties: + foo: + flags: + - deprecated + type: int + classic: 69 + bar: + type: string + alternativ: + x: {} + baz: + type: bool + defalt: True + alteratives: + True: + warn: "true is now deprecated" + False: {} \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/config/config_type_misspelled.yaml b/problemtools/tests/config_parser_tests/config/config_type_misspelled.yaml new file mode 100644 index 00000000..e3ac337c --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/config_type_misspelled.yaml @@ -0,0 +1,21 @@ +type: object +require: + - bar + - boz +properties: + foo: + flags: + - deprecated + typ: int + classic: 69 + bar: + typ: string + alternative: + x: {} + baz: + typer: bool + default: True + alteratives: + True: + warn: "true is now deprecated" + False: {} \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/config/config_type_value_misspelled.yaml b/problemtools/tests/config_parser_tests/config/config_type_value_misspelled.yaml new file mode 100644 index 00000000..920e6bdb --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/config_type_value_misspelled.yaml @@ -0,0 +1,21 @@ +type: object +require: + - bar + - boz +properties: + foo: + flags: + - deprecated + type: Integer + classic: 69 + bar: + type: Sträng + alternatives: + x: {} + baz: + type: boolean + default: True + alteratives: + True: + warn: "true is now deprecated" + False: {} \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/config/empty_config.yaml b/problemtools/tests/config_parser_tests/config/empty_config.yaml new file mode 100644 index 00000000..e69de29b diff --git a/problemtools/tests/config_parser_tests/config/follows_basic_config.yaml b/problemtools/tests/config_parser_tests/config/follows_basic_config.yaml new file mode 100644 index 00000000..7bcfd34f --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/follows_basic_config.yaml @@ -0,0 +1,3 @@ +bar: z +baz: True +boz: 3.5 \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/config/follows_config_misspelled.yaml b/problemtools/tests/config_parser_tests/config/follows_config_misspelled.yaml new file mode 100644 index 00000000..5584ca76 --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/follows_config_misspelled.yaml @@ -0,0 +1,2 @@ +bar: x +baz: True \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/helper.py b/problemtools/tests/config_parser_tests/helper.py new file mode 100644 index 00000000..507bd4ae --- /dev/null +++ b/problemtools/tests/config_parser_tests/helper.py @@ -0,0 +1,28 @@ +from problemtools.config_parser import Metadata +from yaml import safe_load +import os + +base_dir = os.path.join(os.path.dirname(__file__), 'config') + +def load_yaml(filename) -> dict: + with open(os.path.join(base_dir, filename), 'r') as f: + content = safe_load(f) + return content + +warnings = [] +errors = [] + +def warnings_add(text: str): + warnings.append(text) + +def errors_add(text: str): + errors.append(text) + +def construct_metadata(spec_file, config_file, injected_data) -> Metadata: + spec = load_yaml(spec_file) + config = load_yaml(config_file) + md = Metadata(spec) + md.set_error_callback(errors_add) + md.set_warning_callback(warnings_add) + md.load_config(config, injected_data) + return md \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/test_basic_config_valid.py b/problemtools/tests/config_parser_tests/test_basic_config_valid.py new file mode 100644 index 00000000..a24c170d --- /dev/null +++ b/problemtools/tests/config_parser_tests/test_basic_config_valid.py @@ -0,0 +1,20 @@ +from problemtools.config_parser import Metadata +from helper import * + +injected = { + 'external': { + "cool-string": "yo this string is ballin" + } +} + +data = construct_metadata('basic_config.yaml', 'follows_basic_config.yaml', injected) + +print(f"warnings: {warnings}") +print(f"errors: {errors}") + +assert data["foo"] == 1337 +assert data["bar"] == "z" +assert data["baz"] == True +assert abs(data["boz"] - 3.5) < 0.01 +assert data["copied"] == "yo this string is ballin" +assert len(warnings) > 0 \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/test_complex_copy_from.py b/problemtools/tests/config_parser_tests/test_complex_copy_from.py new file mode 100644 index 00000000..0037d68b --- /dev/null +++ b/problemtools/tests/config_parser_tests/test_complex_copy_from.py @@ -0,0 +1,33 @@ +from helper import * +import yaml + +config = construct_metadata('complex_copies.yaml', 'empty_config.yaml', {}) + +expected = { + "a": { + "k": 2, + "l": 1000 + }, + "b": { + "c": 123, + "d": { + "k": 2, + "l": 1000 + } + }, + "h": { + "i": 1000, + "j": { + "k": 2, + "l": 1000 + } + } +} + +if config.data != expected: + print("Data did not match expected result:") + print("expected:") + print(yaml.dump(expected)) + print("got:") + print(yaml.dump(config.data)) + assert False diff --git a/problemtools/tests/config_parser_tests/test_config_path.py b/problemtools/tests/config_parser_tests/test_config_path.py new file mode 100644 index 00000000..0d5bc382 --- /dev/null +++ b/problemtools/tests/config_parser_tests/test_config_path.py @@ -0,0 +1,47 @@ +from problemtools.config_parser import Path + +def test_parse(): + assert Path.parse("a/b/c") == Path("a", "b", "c") + assert Path.parse("array[0]/key") == Path("array", 0, "key") + assert Path.parse("nested/list[2]/item") == Path("nested", "list", 2, "item") + +def test_combine(): + assert Path.combine("a", "b", "c") == Path("a", "b", "c") + assert Path.combine("list[1]", "key") == Path("list", 1, "key") + assert Path.combine(Path("x", "y"), "z") == Path("x", "y", "z") + +def test_index(): + data = {"a": {"b": [1, 2, 3]}} + assert Path("a", "b", 1).index(data) == 2 + assert Path("a", "c").index(data) is None + assert Path("a", "b", 10).index(data) is None + +def test_spec_path(): + assert Path("a", "b", 2).spec_path() == Path("properties", "a", "properties", "b", "content") + assert Path("x", 3, "y").spec_path() == Path("properties", "x", "content", "properties", "y") + +def test_data_paths(): + data = {"list": ["a", "b", "c"]} + path = Path("properties", "list", "content") + assert path.data_paths(data) == [Path("list", 0), Path("list", 1), Path("list", 2)] + +def test_up(): + assert Path("a", "b", "c").up() == Path("a", "b") + assert Path("x", "y", "z").up(2) == Path("x") + +def test_last_name(): + assert Path("a", "b", "c").last_name() == "c" + assert Path("list", 3).last_name() == 3 + +def test_str_repr(): + assert str(Path("a", "b", 2)) == "a/b[2]" + assert repr(Path("x", "y")) == "Path(x/y)" + +def test_equality_hash(): + p1 = Path("a", "b", 1) + p2 = Path("a", "b", 1) + p3 = Path("a", "b", 2) + assert p1 == p2 + assert p1 != p3 + assert hash(p1) == hash(p2) + assert hash(p1) != hash(p3) diff --git a/problemtools/tests/config_parser_tests/test_invalid_config.py b/problemtools/tests/config_parser_tests/test_invalid_config.py new file mode 100644 index 00000000..26a71dc0 --- /dev/null +++ b/problemtools/tests/config_parser_tests/test_invalid_config.py @@ -0,0 +1,47 @@ +from problemtools.config_parser import Metadata +from problemtools.config_parser import SpecificationError +from helper import construct_metadata, warnings, errors + +def test_misspelled_type(): + try: + construct_metadata('config_type_misspelled.yaml', 'follows_config_misspelled.yaml', {}) + assert False, 'Should have raised SpecificationError' + + except SpecificationError as e: + e = str(e) + assert e.startswith("Specification did not have a MUST HAVE field 'type',") + print(f'warnings: {warnings}') + print(f'errors: {errors}') + +def test_misspelled_typevalue(): + try: + construct_metadata('config_type_value_misspelled.yaml', 'follows_config_misspelled.yaml', {}) + assert False, 'Should have raised SpecificationError' + except SpecificationError as e: + e = str(e) + assert e.startswith('Type ') + assert 'is not a valid type. Did you mean:' in e + print(f'warnings: {warnings}') + print(f'errors: {errors}') + +def test_misspelled_field(): + try: + construct_metadata('config_field_misspelled.yaml', 'follows_config_misspelled.yaml', {}) + assert False, 'Should have raised SpecificationError' + except SpecificationError as e: + e = str(e) + assert e.startswith('Field ') + assert ' is not allowed for type ' in e + print(f'warnings: {warnings}') + print(f'errors: {errors}') + +def test_misspelled_alternatives(): + try: + construct_metadata('config_alternatives_misspelled.yaml', 'follows_config_misspelled.yaml', {}) + assert False, 'Should have raised SpecificationError' + except SpecificationError as e: + e = str(e) + assert e == 'Bool match string should be either "true" or "false"' + + print(f'warnings: {warnings}') + print(f'errors: {errors}') \ No newline at end of file From 2123163b9414c608a1b6f7a34c2113756908c123 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Sun, 23 Mar 2025 18:18:45 +0100 Subject: [PATCH 12/32] Split config parsing into multiple files --- problemtools/config_parser/__init__.py | 724 +--------------------- problemtools/config_parser/config_path.py | 120 ++++ problemtools/config_parser/general.py | 26 + problemtools/config_parser/matcher.py | 113 ++++ problemtools/config_parser/parser.py | 301 +++++++++ 5 files changed, 564 insertions(+), 720 deletions(-) create mode 100644 problemtools/config_parser/config_path.py create mode 100644 problemtools/config_parser/general.py create mode 100644 problemtools/config_parser/matcher.py create mode 100644 problemtools/config_parser/parser.py diff --git a/problemtools/config_parser/__init__.py b/problemtools/config_parser/__init__.py index 48ad4540..cb086975 100644 --- a/problemtools/config_parser/__init__.py +++ b/problemtools/config_parser/__init__.py @@ -3,262 +3,10 @@ from typing import Any, Callable, Generator, Literal, Pattern, Match, ParamSpec, TypeVar from collections import defaultdict, deque from copy import deepcopy - - -class SpecificationError(Exception): - pass - - -type_mapping = { - "string": str, - "object": dict, - "list": list, - "bool": bool, - "int": int, - "float": float, -} - -type_field_mapping = { - "*": ["default", "type", "flags", "parsing"], - "string": ["alternatives"], - "bool": ["alternatives"], - "int": ["alternatives"], - "float": ["alternatives"], - "object": ["required", "properties"], - "list": ["content"], -} - - -def is_copyfrom(val: Any) -> bool: - return isinstance(val, tuple) and val[0] == "copy-from" - - -class Path: - INDEXING_REGEX = re.compile(r"^([A-Za-z_0-9\-]+)\[(\d+)\]$") - - @staticmethod - def parse(path: str) -> Path: - parts = path.split("/") - res = [] - for part in parts: - m = Path.INDEXING_REGEX.match(part) - if m: - res.append(m.group(1)) - res.append(int(m.group(2))) - else: - res.append(part) - return Path(*res) - - @staticmethod - def combine(*parts: str | int | Path) -> Path: - res = [] - for part in parts: - if isinstance(part, int): - res.append(part) - continue - if isinstance(part, str): - part = Path.parse(part) - res.extend(list(part.path)) # type: ignore - return Path(*res) - - def __init__(self, *path: str | int) -> None: - self.path = path - - def index(self, data: dict) -> Any | None: - rv = data - for part in self.path: - if isinstance(part, int): - if not isinstance(rv, list): - return None - try: - rv = rv[part] - except IndexError: - return None - else: - if part not in rv: - return None - rv = rv[part] - return rv - - def spec_path(self) -> Path: - res = [] - for part in self.path: - if isinstance(part, str): - res.append("properties") - res.append(part) - elif isinstance(part, int): - res.append("content") - return Path(*res) - - def data_paths(self, data: dict) -> list[Path]: - """Finds all data paths that a spec_path is pointing towards (meaning it will explore all items in lists)""" - - def path_is_not_copyfrom(path: Path) -> bool: - return not is_copyfrom(path.index(data)) - - out = [Path()] - state = "base" - for part in self.path: - if state == "base": - if part == "properties": - state = "object-properties" - elif part == "content": - state = "base" - new_out = [] - for path in out: - val = path.index(data) or [] - if is_copyfrom(val): # skip copied - continue - assert isinstance(val, list) - new_out.extend(Path.combine(path, i) for i in range(len(val))) - out = new_out - if len(out) == 0: - return [] - else: - assert False - elif state == "object-properties": - combined_paths = [Path.combine(path, part) for path in out] - out = [*filter(path_is_not_copyfrom, combined_paths)] - state = 'base' - - return out - - def up(self, levels=1) -> Path: - assert levels > 0 - return Path(*self.path[:-levels]) - - def last_name(self) -> int | str: - return self.path[-1] - - def __str__(self) -> str: - strings = [] - for part in self.path: - if isinstance(part, int): - strings[-1] += f"[{part}]" - else: - strings.append(part) - return "/".join(strings) - - def __repr__(self): - return f"Path({self})" - - def __eq__(self, value): - return self.path == value.path - - def __hash__(self): - return hash(self.path) - - -class AlternativeMatch: - def __init__(self, matchstr): - raise NotImplementedError("Specialize in subclass") - - def check(self, val) -> bool: - raise NotImplementedError("Specialize in subclass") - - @staticmethod - def get_matcher(type, matchstr) -> "AlternativeMatch": - matchers = { - "string": StringMatch, - "int": IntMatch, - "float": FloatMatch, - "bool": BoolMatch, - } - assert type in matchers - return matchers[type](matchstr) - - -class StringMatch(AlternativeMatch): - def __init__(self, matchstr): - self.regex = re.compile(matchstr) - - def check(self, val) -> bool: - return self.regex.match(val) - - def __str__(self) -> str: - self.regex.pattern - - -class IntMatch(AlternativeMatch): - def __init__(self, matchstr: str | int): - if isinstance(matchstr, int): - self.start = self.end = matchstr - return - try: - if matchstr.count(":") > 1: - raise ValueError - if ":" in matchstr: - self.start, self.end = [ - int(p) if p else None for p in map(str.strip, matchstr.split()) - ] - else: - matchstr = matchstr.strip() - if not matchstr: - raise SpecificationError("Match string for integer was left empty") - self.start = self.end = int(matchstr) - except ValueError: - raise SpecificationError( - 'Int match string should be of the form "A:B" where A and B can be parsed as ints or left empty, or a single integer' - ) - - def check(self, val) -> bool: - if not isinstance(val, int): - return False - if self.start is not None: - if val < self.start: - return False - if self.end is not None: - if val > self.start: - return False - return True - - def __str__(self): - if A == B: - return str(A) - A = str(self.start) if self.start is not None else "" - B = str(self.end) if self.end is not None else "" - return f"{A}:{B}" - - -class FloatMatch(AlternativeMatch): - def __init__(self, matchstr: str): - try: - if matchstr.count(":") != 1: - raise ValueError - first, second = [p.strip() for p in matchstr.split(":")] - self.start = float(first) if first else float("-inf") - self.end = float(second) if second else float("inf") - except ValueError: - raise SpecificationError( - 'Float match string should be of the form "A:B" where A and B can be parsed as floats or left empty' - ) - - def check(self, val) -> bool: - return isinstance(val, float) and self.start <= val <= self.end - - def __str__(self): - A = str(self.start) if self.start != float("-inf") else "" - B = str(self.end) if self.end != float("inf") else "" - return f"{A}:{B}" - - -class BoolMatch(AlternativeMatch): - def __init__(self, matchstr: str | bool): - if isinstance(matchstr, bool): - self.val = matchstr - return - matchstr = matchstr.strip().lower() - if matchstr not in {"true", "false"}: - raise SpecificationError( - 'Bool match string should be either "true" or "false"' - ) - self.val = {"true": True, "false": False}[matchstr] - - def check(self, val) -> bool: - return isinstance(val, bool) and val == self.val - - def __str__(self): - return str(self.val) +from .general import SpecificationError, is_copyfrom, type_field_mapping, type_mapping +from .matcher import AlternativeMatch +from .config_path import Path +from .parser import Parser, DefaultObjectParser class Metadata: @@ -394,467 +142,3 @@ def check_config(self) -> None: pass -class Parser: - NAME: str = "" - OUTPUT_TYPE: str = "" - - @staticmethod - def get_dependencies() -> list[Path]: - return [] - - def __init__( - self, - data: dict, - specification: dict, - path: Path, - warning_func: Callable, - error_func: Callable, - ): - self.data = data - self.specification = specification - self.path = path - self.spec_path = path.spec_path() - self.warning_func = warning_func - self.error_func = error_func - - if not self.NAME: - raise NotImplementedError( - "Subclasses of Parser need to set the name of the parsing rule" - ) - if not self.OUTPUT_TYPE: - raise NotImplementedError( - "Subclasses of Parser need to set the output type of the parsing rule" - ) - - required_type = self.spec_path.index(specification)["type"] - if required_type != self.OUTPUT_TYPE: - raise SpecificationError( - f"Parsing rule for {path} outputs {self.OUTPUT_TYPE}, but the output should be of type {required_type}" - ) - - if self.OUTPUT_TYPE in ("string", "int", "float", "bool"): - alternatives = Path.combine(self.spec_path, "alternatives").index( - self.specification - ) - if alternatives is None: - self.alternatives = None - else: - self.alternatives = [ - (AlternativeMatch.get_matcher(self.OUTPUT_TYPE, key), val) - for key, val in alternatives.items() - ] - else: - self.alternatives = None - - def parse(self): - val = self.path.index(self.data) - out = self._parse(val) - - flags = Path.combine(self.spec_path, "flags").index(self.specification) or [] - if "deprecated" in flags and out is not None: - self.warning_func(f"deprecated property was provided ({self.path})") - - if self.alternatives is not None: - found_match = False - for matcher, checks in self.alternatives: - if matcher.check(out): - found_match = True - if "warn" in checks: - self.warning_func(checks["warn"]) - if not found_match: - alts = ", ".join(f'"{matcher}"' for matcher in self.alternatives.keys()) - self.error_func( - f"Property {self.path} did not match any of the specified alternatives ({alts})" - ) - out = None - - if out is None: - fallback = Path.combine(self.spec_path, "default").index(self.specification) - if fallback is not None: - if isinstance(fallback, str) and fallback.startswith("copy-from:"): - fallback = ("copy-from", Path.parse(fallback.split(":")[1])) - return fallback - return type_mapping[self.OUTPUT_TYPE]() - - if not ( - isinstance(out, tuple) or isinstance(out, type_mapping[self.OUTPUT_TYPE]) - ): - raise SpecificationError( - f'Parsing rule "{self.NAME}" did not output the correct type. Output was: {out}' - ) - return out - - def _parse(self, val): - raise NotImplementedError("Subclasses of Parse need to implement _parse()") - - def smallest_edit_dist(a: str, b: list[str]) -> str: - def edit_dist(a: str, b: str) -> int: - n = len(a) - m = len(b) - dp = [[0] * (m + 1) for _ in range(n + 1)] - for i in range(n + 1): - dp[i][0] = i - for j in range(m + 1): - dp[0][j] = j - for i in range(1, n + 1): - for j in range(1, m + 1): - if a[i - 1] == b[j - 1]: - dp[i][j] = dp[i - 1][j - 1] - else: - dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) - return dp[n][m] - best = b[0] - best_dist = edit_dist(a, best) - for s in b[1:]: - dist = edit_dist(a, s) - if dist < best_dist: - best = s - best_dist = dist - return best - - @staticmethod - def get_parser_type(specification: dict) -> type: - parsing_rule = specification.get("parsing") - if parsing_rule is None: - typ = specification.get("type") - - if typ is None: - had = "', '".join(specification.keys()) - raise SpecificationError( - f"Specification did not have a MUST HAVE field 'type', the provided fields were: ('{had}')" - ) - - if typ not in type_mapping: - valid = "', '".join(type_mapping.keys()) - closest = Parser.smallest_edit_dist(typ, [*type_mapping.keys()]) - raise SpecificationError( - f"Type '{typ}' is not a valid type. Did you mean: '{closest}'? Otherwise valid types are: ('{valid}')" - ) - - - fields = specification.keys() - allowed_fields = type_field_mapping.get(typ, []) + type_field_mapping["*"] - for field in fields: - if field not in allowed_fields: - raise SpecificationError( - f"Field '{field}' is not allowed for type '{typ}', did you mean '{Parser.smallest_edit_dist(field, allowed_fields)}'?" - ) - - parsing_rule = f"default-{typ}-parser" - if parsing_rule not in parsers: - raise SpecificationError(f'Parser "{parsing_rule}" is not implemented') - return parsers[parsing_rule] - - -class DefaultStringParser(Parser): - NAME = "default-string-parser" - OUTPUT_TYPE = "string" - - def _parse(self, val): - if val is None: - return None - if not isinstance(val, str): - self.warning_func( - f"Expected value of type string but got {val}, casting to string ({self.path})" - ) - val = str(val) - return val - - -class DefaultObjectParser(Parser): - NAME = "default-object-parser" - OUTPUT_TYPE = "object" - - def _parse(self, val): - if val is None: - return None - - if not isinstance(val, dict): - self.error_func(f"Expected an object, got {val} ({self.path})") - return None - - required = ( - Path.combine(self.spec_path, "required").index(self.specification) or [] - ) - for req in required: - req_path = Path.combine(self.path, req) - if req_path.index(self.data) is None: - self.error_func(f"Missing required property: {req_path}") - - remove = [] - known_props = Path.combine(self.spec_path, "properties").index( - self.specification - ) - for prop in val.keys(): - if prop not in known_props: - self.warning_func(f"Unknown property: {Path.combine(self.path, prop)}") - remove.append(prop) - for r in remove: - del val[r] - - return val - - -class DefaultListParser(Parser): - NAME = "default-list-parser" - OUTPUT_TYPE = "list" - - def _parse(self, val): - if val is None: - return None - - if not isinstance(val, list): - self.error_func(f"Expected a list, got {val} ({self.path})") - return None - - return val - - -class DefaultIntParser(Parser): - NAME = "default-int-parser" - OUTPUT_TYPE = "int" - - def _parse(self, val): - if val is None: - return None - - if not isinstance(val, int): - try: - cast = int(val) - self.warning_func( - f"Expected type int, got {val}. Casting to {cast} ({self.path})" - ) - val = cast - except ValueError: - self.error_func(f"Expected a int, got {val} ({self.path})") - return None - - return val - - -class DefaultFloatParser(Parser): - NAME = "default-float-parser" - OUTPUT_TYPE = "float" - - def _parse(self, val): - if val is None: - return None - - if not isinstance(val, float): - try: - cast = float(val) - self.warning_func( - f"Expected type float, got {val}. Casting to {cast} ({self.path})" - ) - val = cast - except ValueError: - self.error_func(f"Expected a float, got {val} ({self.path})") - return None - - return val - - -class DefaultBoolParser(Parser): - NAME = "default-bool-parser" - OUTPUT_TYPE = "bool" - - def _parse(self, val): - if val is None: - return None - - if isinstance(val, str): - if val.lower() in ("true", "false"): - interpretation = val.lower() == "true" - self.warning_func( - f'Expected type bool but got a string "{val}" which will be interpreted as {interpretation} ({self.path})' - ) - val = interpretation - else: - self.error_func(f'Expected type bool but got "{val}" ({self.path})') - return None - - if not isinstance(val, bool): - self.error_func(f"Expected type bool, got {val} ({self.path})") - return None - - return val - - -parsers = { - p.NAME: p - for p in [ - DefaultObjectParser, - DefaultListParser, - DefaultStringParser, - DefaultIntParser, - DefaultFloatParser, - DefaultBoolParser, - ] -} - - -class BaseValidator: - def __init__(self, layout: dict, metadata: Metadata, path: str = ""): - self.layout = layout - self.metadata = metadata - self.path = path - - def verify(self, value): - """ - Verifies the value: - - Applies defaults - - Converts types - - Logs warnings/errors if needed - """ - raise NotImplementedError("Subclasses must implement verify") - - def check(self, value): - """ - Performs extra-checks (like forbid/require logic) - get_path_func can be used to fetch other values by path. - """ - raise NotImplementedError("Subclasses must implement check") - - -class StringValidator(BaseValidator): - def __init__(self, layout: dict, metadata: Metadata, path: str = ""): - super().__init__(layout, metadata, path) - alternatives = self.layout.get("alternatives") - if alternatives: - self.patterns = {alt: re.compile("^" + alt + "$") for alt in alternatives} - else: - self.patterns = None - - def verify(self, value): - if value is None: - value = self.layout.get("default", "") - if not isinstance(value, str): - self.metadata.warning_func( - f"Property {self.path} was expected to be of type string" - ) - value = str(value) - if self.patterns: - if not any(pattern.match(value) for pattern in self.patterns.values()): - self.metadata.error_func( - f"Property {self.path} is {value} but must match one of {list(self.patterns.keys())}" - ) - value = self.layout.get("default", "") - return value - - def check(self, value): - if not self.patterns: - return - match = next( - (key for key, pattern in self.patterns.items() if pattern.match(value)), - None, - ) - checks = self.layout["alternatives"][match] - for forbidden in checks.get("forbid", []): - other_path, expected = forbidden.split(":") - if self.metadata[other_path] == expected: - self.metadata.error_func( - f"Property {self.path} has value {value} which forbids property {other_path} to have value {expected}" - ) - for required in checks.get("required", []): - if not self.metadata[ - required - ]: # TODO: This is not a good way to handle this check I think - self.metadata.error_func( - f"Property {self.path} has value {value} which requires property {required}" - ) - if "warn" in checks: - self.metadata.warning_func(checks["warn"]) - - -class ObjectValidator(BaseValidator): - def verify(self, value): - if value is None: - return self.layout.get("default", {}) - if self.layout.get("parsing") == "legacy-validation": - if not isinstance(value, str): - self.metadata.error_func( - f"Property {self.path} was expected to be a string" - ) - return {} - elements = value.split() - value = { - "type": elements[0], - "interactive": "interactive" in elements[1:], - "score": "score" in elements[1:], - } - if not isinstance(value, dict): - self.metadata.error_func( - f"property {self.path} was expected to be a dictionary" - ) - return {} - for prop in self.layout.get("required", []): - if prop not in value: - self.metadata.error_func( - f"Missing required property: {self.path}/{prop}" - ) - for prop in value.keys(): - if prop not in self.layout["properties"]: - self.metadata.warning_func(f"Unknown property: {self.path}/{prop}") - return value - - def check(self, value): - pass - - -class ListValidator(BaseValidator): - def verify(self, value): - if value is None: - return self.layout.get("default", []) - if self.layout.get("parsing") == "space-separated-strings": - if not isinstance(value, str): - self.metadata.error_func( - f"Property {self.path} was expected to be a string" - ) - return [] - value = value.split() - if not isinstance(value, list): - self.metadata.error_func(f"property {self.path} was expected to be a list") - return [] - return value - - def check(self, value): - pass - - -class FloatValidator(BaseValidator): - def verify(self, value): - if value is None: - return self.layout.get("default", 0.0) - if not isinstance(value, float): - try: - value = float(value) - except Exception: - self.metadata.error_func( - f"Property {self.path} was expected to be a float" - ) - value = self.layout.get("default", 0.0) - return value - - def check(self, value): - pass - - -class IntValidator(BaseValidator): - def verify(self, value): - if value is None: - return self.layout.get("default", 0) - if not isinstance(value, int): - try: - value = int(value) - self.metadata.warning_func( - f"Property {self.path} should be of type integer, interpreting as {value}" - ) - except Exception: - self.metadata.error_func( - f"Property {self.path} was expected to be an integer" - ) - value = self.layout.get("default", 0) - return value - - def check(self, value): - pass diff --git a/problemtools/config_parser/config_path.py b/problemtools/config_parser/config_path.py new file mode 100644 index 00000000..4921c23c --- /dev/null +++ b/problemtools/config_parser/config_path.py @@ -0,0 +1,120 @@ +from __future__ import annotations +import re +from .general import is_copyfrom +from typing import Any + +class Path: + INDEXING_REGEX = re.compile(r"^([A-Za-z_0-9\-]+)\[(\d+)\]$") + + @staticmethod + def parse(path: str) -> Path: + parts = path.split("/") + res = [] + for part in parts: + m = Path.INDEXING_REGEX.match(part) + if m: + res.append(m.group(1)) + res.append(int(m.group(2))) + else: + res.append(part) + return Path(*res) + + @staticmethod + def combine(*parts: str | int | Path) -> Path: + res = [] + for part in parts: + if isinstance(part, int): + res.append(part) + continue + if isinstance(part, str): + part = Path.parse(part) + res.extend(list(part.path)) # type: ignore + return Path(*res) + + def __init__(self, *path: str | int) -> None: + self.path = path + + def index(self, data: dict) -> Any | None: + rv = data + for part in self.path: + if isinstance(part, int): + if not isinstance(rv, list): + return None + try: + rv = rv[part] + except IndexError: + return None + else: + if part not in rv: + return None + rv = rv[part] + return rv + + def spec_path(self) -> Path: + res = [] + for part in self.path: + if isinstance(part, str): + res.append("properties") + res.append(part) + elif isinstance(part, int): + res.append("content") + return Path(*res) + + def data_paths(self, data: dict) -> list[Path]: + """Finds all data paths that a spec_path is pointing towards (meaning it will explore all items in lists)""" + + def path_is_not_copyfrom(path: Path) -> bool: + return not is_copyfrom(path.index(data)) + + out = [Path()] + state = "base" + for part in self.path: + if state == "base": + if part == "properties": + state = "object-properties" + elif part == "content": + state = "base" + new_out = [] + for path in out: + val = path.index(data) or [] + if is_copyfrom(val): # skip copied + continue + assert isinstance(val, list) + new_out.extend(Path.combine(path, i) for i in range(len(val))) + out = new_out + if len(out) == 0: + return [] + else: + assert False + elif state == "object-properties": + combined_paths = [Path.combine(path, part) for path in out] + out = [*filter(path_is_not_copyfrom, combined_paths)] + state = 'base' + + return out + + def up(self, levels=1) -> Path: + assert levels > 0 + return Path(*self.path[:-levels]) + + def last_name(self) -> int | str: + return self.path[-1] + + def __str__(self) -> str: + strings = [] + for part in self.path: + if isinstance(part, int): + strings[-1] += f"[{part}]" + else: + strings.append(part) + return "/".join(strings) + + def __repr__(self): + return f"Path({self})" + + def __eq__(self, value): + return self.path == value.path + + def __hash__(self): + return hash(self.path) + diff --git a/problemtools/config_parser/general.py b/problemtools/config_parser/general.py new file mode 100644 index 00000000..f1c8eb07 --- /dev/null +++ b/problemtools/config_parser/general.py @@ -0,0 +1,26 @@ +from typing import Any + +class SpecificationError(Exception): + pass + +def is_copyfrom(val: Any) -> bool: + return isinstance(val, tuple) and val[0] == "copy-from" + +type_field_mapping = { + "*": ["default", "type", "flags", "parsing"], + "string": ["alternatives"], + "bool": ["alternatives"], + "int": ["alternatives"], + "float": ["alternatives"], + "object": ["required", "properties"], + "list": ["content"], +} + +type_mapping = { + "string": str, + "object": dict, + "list": list, + "bool": bool, + "int": int, + "float": float, +} \ No newline at end of file diff --git a/problemtools/config_parser/matcher.py b/problemtools/config_parser/matcher.py new file mode 100644 index 00000000..4e895b9d --- /dev/null +++ b/problemtools/config_parser/matcher.py @@ -0,0 +1,113 @@ +from .general import SpecificationError +import re + +class AlternativeMatch: + def __init__(self, matchstr): + raise NotImplementedError("Specialize in subclass") + + def check(self, val) -> bool: + raise NotImplementedError("Specialize in subclass") + + @staticmethod + def get_matcher(type, matchstr) -> "AlternativeMatch": + matchers = { + "string": StringMatch, + "int": IntMatch, + "float": FloatMatch, + "bool": BoolMatch, + } + assert type in matchers + return matchers[type](matchstr) + + +class StringMatch(AlternativeMatch): + def __init__(self, matchstr): + self.regex = re.compile(matchstr) + + def check(self, val) -> bool: + return self.regex.match(val) + + def __str__(self) -> str: + self.regex.pattern + + +class IntMatch(AlternativeMatch): + def __init__(self, matchstr: str | int): + if isinstance(matchstr, int): + self.start = self.end = matchstr + return + try: + if matchstr.count(":") > 1: + raise ValueError + if ":" in matchstr: + self.start, self.end = [ + int(p) if p else None for p in map(str.strip, matchstr.split()) + ] + else: + matchstr = matchstr.strip() + if not matchstr: + raise SpecificationError("Match string for integer was left empty") + self.start = self.end = int(matchstr) + except ValueError: + raise SpecificationError( + 'Int match string should be of the form "A:B" where A and B can be parsed as ints or left empty, or a single integer' + ) + + def check(self, val) -> bool: + if not isinstance(val, int): + return False + if self.start is not None: + if val < self.start: + return False + if self.end is not None: + if val > self.start: + return False + return True + + def __str__(self): + if A == B: + return str(A) + A = str(self.start) if self.start is not None else "" + B = str(self.end) if self.end is not None else "" + return f"{A}:{B}" + + +class FloatMatch(AlternativeMatch): + def __init__(self, matchstr: str): + try: + if matchstr.count(":") != 1: + raise ValueError + first, second = [p.strip() for p in matchstr.split(":")] + self.start = float(first) if first else float("-inf") + self.end = float(second) if second else float("inf") + except ValueError: + raise SpecificationError( + 'Float match string should be of the form "A:B" where A and B can be parsed as floats or left empty' + ) + + def check(self, val) -> bool: + return isinstance(val, float) and self.start <= val <= self.end + + def __str__(self): + A = str(self.start) if self.start != float("-inf") else "" + B = str(self.end) if self.end != float("inf") else "" + return f"{A}:{B}" + + +class BoolMatch(AlternativeMatch): + def __init__(self, matchstr: str | bool): + if isinstance(matchstr, bool): + self.val = matchstr + return + matchstr = matchstr.strip().lower() + if matchstr not in {"true", "false"}: + raise SpecificationError( + 'Bool match string should be either "true" or "false"' + ) + self.val = {"true": True, "false": False}[matchstr] + + def check(self, val) -> bool: + return isinstance(val, bool) and val == self.val + + def __str__(self): + return str(self.val) diff --git a/problemtools/config_parser/parser.py b/problemtools/config_parser/parser.py new file mode 100644 index 00000000..f51eec9c --- /dev/null +++ b/problemtools/config_parser/parser.py @@ -0,0 +1,301 @@ +from typing import Callable +from .config_path import Path +from .general import SpecificationError, type_field_mapping, type_mapping +from .matcher import AlternativeMatch + +class Parser: + NAME: str = "" + OUTPUT_TYPE: str = "" + + @staticmethod + def get_dependencies() -> list[Path]: + return [] + + def __init__( + self, + data: dict, + specification: dict, + path: Path, + warning_func: Callable, + error_func: Callable, + ): + self.data = data + self.specification = specification + self.path = path + self.spec_path = path.spec_path() + self.warning_func = warning_func + self.error_func = error_func + + if not self.NAME: + raise NotImplementedError( + "Subclasses of Parser need to set the name of the parsing rule" + ) + if not self.OUTPUT_TYPE: + raise NotImplementedError( + "Subclasses of Parser need to set the output type of the parsing rule" + ) + + required_type = self.spec_path.index(specification)["type"] + if required_type != self.OUTPUT_TYPE: + raise SpecificationError( + f"Parsing rule for {path} outputs {self.OUTPUT_TYPE}, but the output should be of type {required_type}" + ) + + if self.OUTPUT_TYPE in ("string", "int", "float", "bool"): + alternatives = Path.combine(self.spec_path, "alternatives").index( + self.specification + ) + if alternatives is None: + self.alternatives = None + else: + self.alternatives = [ + (AlternativeMatch.get_matcher(self.OUTPUT_TYPE, key), val) + for key, val in alternatives.items() + ] + else: + self.alternatives = None + + def parse(self): + val = self.path.index(self.data) + out = self._parse(val) + + flags = Path.combine(self.spec_path, "flags").index(self.specification) or [] + if "deprecated" in flags and out is not None: + self.warning_func(f"deprecated property was provided ({self.path})") + + if self.alternatives is not None: + found_match = False + for matcher, checks in self.alternatives: + if matcher.check(out): + found_match = True + if "warn" in checks: + self.warning_func(checks["warn"]) + if not found_match: + alts = ", ".join(f'"{matcher}"' for matcher in self.alternatives.keys()) + self.error_func( + f"Property {self.path} did not match any of the specified alternatives ({alts})" + ) + out = None + + if out is None: + fallback = Path.combine(self.spec_path, "default").index(self.specification) + if fallback is not None: + if isinstance(fallback, str) and fallback.startswith("copy-from:"): + fallback = ("copy-from", Path.parse(fallback.split(":")[1])) + return fallback + return type_mapping[self.OUTPUT_TYPE]() + + if not ( + isinstance(out, tuple) or isinstance(out, type_mapping[self.OUTPUT_TYPE]) + ): + raise SpecificationError( + f'Parsing rule "{self.NAME}" did not output the correct type. Output was: {out}' + ) + return out + + def _parse(self, val): + raise NotImplementedError("Subclasses of Parse need to implement _parse()") + + def smallest_edit_dist(a: str, b: list[str]) -> str: + def edit_dist(a: str, b: str) -> int: + n = len(a) + m = len(b) + dp = [[0] * (m + 1) for _ in range(n + 1)] + for i in range(n + 1): + dp[i][0] = i + for j in range(m + 1): + dp[0][j] = j + for i in range(1, n + 1): + for j in range(1, m + 1): + if a[i - 1] == b[j - 1]: + dp[i][j] = dp[i - 1][j - 1] + else: + dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) + return dp[n][m] + best = b[0] + best_dist = edit_dist(a, best) + for s in b[1:]: + dist = edit_dist(a, s) + if dist < best_dist: + best = s + best_dist = dist + return best + + @staticmethod + def get_parser_type(specification: dict) -> type: + parsing_rule = specification.get("parsing") + if parsing_rule is None: + typ = specification.get("type") + + if typ is None: + had = "', '".join(specification.keys()) + raise SpecificationError( + f"Specification did not have a MUST HAVE field 'type', the provided fields were: ('{had}')" + ) + + if typ not in type_mapping: + valid = "', '".join(type_mapping.keys()) + closest = Parser.smallest_edit_dist(typ, [*type_mapping.keys()]) + raise SpecificationError( + f"Type '{typ}' is not a valid type. Did you mean: '{closest}'? Otherwise valid types are: ('{valid}')" + ) + + + fields = specification.keys() + allowed_fields = type_field_mapping.get(typ, []) + type_field_mapping["*"] + for field in fields: + if field not in allowed_fields: + raise SpecificationError( + f"Field '{field}' is not allowed for type '{typ}', did you mean '{Parser.smallest_edit_dist(field, allowed_fields)}'?" + ) + + parsing_rule = f"default-{typ}-parser" + if parsing_rule not in parsers: + raise SpecificationError(f'Parser "{parsing_rule}" is not implemented') + return parsers[parsing_rule] + + +class DefaultStringParser(Parser): + NAME = "default-string-parser" + OUTPUT_TYPE = "string" + + def _parse(self, val): + if val is None: + return None + if not isinstance(val, str): + self.warning_func( + f"Expected value of type string but got {val}, casting to string ({self.path})" + ) + val = str(val) + return val + + +class DefaultObjectParser(Parser): + NAME = "default-object-parser" + OUTPUT_TYPE = "object" + + def _parse(self, val): + if val is None: + return None + + if not isinstance(val, dict): + self.error_func(f"Expected an object, got {val} ({self.path})") + return None + + required = ( + Path.combine(self.spec_path, "required").index(self.specification) or [] + ) + for req in required: + req_path = Path.combine(self.path, req) + if req_path.index(self.data) is None: + self.error_func(f"Missing required property: {req_path}") + + remove = [] + known_props = Path.combine(self.spec_path, "properties").index( + self.specification + ) + for prop in val.keys(): + if prop not in known_props: + self.warning_func(f"Unknown property: {Path.combine(self.path, prop)}") + remove.append(prop) + for r in remove: + del val[r] + + return val + + +class DefaultListParser(Parser): + NAME = "default-list-parser" + OUTPUT_TYPE = "list" + + def _parse(self, val): + if val is None: + return None + + if not isinstance(val, list): + self.error_func(f"Expected a list, got {val} ({self.path})") + return None + + return val + + +class DefaultIntParser(Parser): + NAME = "default-int-parser" + OUTPUT_TYPE = "int" + + def _parse(self, val): + if val is None: + return None + + if not isinstance(val, int): + try: + cast = int(val) + self.warning_func( + f"Expected type int, got {val}. Casting to {cast} ({self.path})" + ) + val = cast + except ValueError: + self.error_func(f"Expected a int, got {val} ({self.path})") + return None + + return val + + +class DefaultFloatParser(Parser): + NAME = "default-float-parser" + OUTPUT_TYPE = "float" + + def _parse(self, val): + if val is None: + return None + + if not isinstance(val, float): + try: + cast = float(val) + self.warning_func( + f"Expected type float, got {val}. Casting to {cast} ({self.path})" + ) + val = cast + except ValueError: + self.error_func(f"Expected a float, got {val} ({self.path})") + return None + + return val + + +class DefaultBoolParser(Parser): + NAME = "default-bool-parser" + OUTPUT_TYPE = "bool" + + def _parse(self, val): + if val is None: + return None + + if isinstance(val, str): + if val.lower() in ("true", "false"): + interpretation = val.lower() == "true" + self.warning_func( + f'Expected type bool but got a string "{val}" which will be interpreted as {interpretation} ({self.path})' + ) + val = interpretation + else: + self.error_func(f'Expected type bool but got "{val}" ({self.path})') + return None + + if not isinstance(val, bool): + self.error_func(f"Expected type bool, got {val} ({self.path})") + return None + + return val + +parsers = { + p.NAME: p + for p in [ + DefaultObjectParser, + DefaultListParser, + DefaultStringParser, + DefaultIntParser, + DefaultFloatParser, + DefaultBoolParser, + ] +} From c67a96f00fcd859eec76f17506cf9e9a812100d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Sun, 23 Mar 2025 22:55:41 +0100 Subject: [PATCH 13/32] Make config_path more robust, split some things Co-authored-by: Zazmuz --- problemtools/config_parser/__init__.py | 15 ++-- problemtools/config_parser/config_path.py | 78 +++++++++++++------ problemtools/config_parser/general.py | 22 ------ problemtools/config_parser/parser.py | 44 +++++++---- .../config_parser_tests/test_config_path.py | 19 +++-- 5 files changed, 108 insertions(+), 70 deletions(-) diff --git a/problemtools/config_parser/__init__.py b/problemtools/config_parser/__init__.py index cb086975..d8827624 100644 --- a/problemtools/config_parser/__init__.py +++ b/problemtools/config_parser/__init__.py @@ -1,11 +1,12 @@ from __future__ import annotations import re -from typing import Any, Callable, Generator, Literal, Pattern, Match, ParamSpec, TypeVar -from collections import defaultdict, deque +from typing import Any, Callable, Generator +from collections import deque from copy import deepcopy -from .general import SpecificationError, is_copyfrom, type_field_mapping, type_mapping -from .matcher import AlternativeMatch -from .config_path import Path +from .general import SpecificationError +from .config_path import is_copyfrom +from .parser import type_mapping +from .config_path import Path, PathError from .parser import Parser, DefaultObjectParser @@ -114,7 +115,7 @@ def load_config(self, config: dict, injected_data: dict) -> None: parser = Parser.get_parser_type(spec)( self.data, self.spec, full_path, self.warning_func, self.error_func ) - full_path.up().index(self.data)[full_path.last_name()] = parser.parse() + full_path.set(self.data, parser.parse()) self.data.update(injected_data) for full_path in Metadata.topo_sort(self.get_copy_dependencies()): @@ -136,7 +137,7 @@ def load_config(self, config: dict, injected_data: dict) -> None: raise SpecificationError( f"copy-from directive provided the wrong type (property: {full_path}, copy-property: {val[1]})" ) - full_path.up().index(self.data)[full_path.last_name()] = copy_val + full_path.set(self.data, copy_val) def check_config(self) -> None: pass diff --git a/problemtools/config_parser/config_path.py b/problemtools/config_parser/config_path.py index 4921c23c..8db39453 100644 --- a/problemtools/config_parser/config_path.py +++ b/problemtools/config_parser/config_path.py @@ -1,56 +1,88 @@ from __future__ import annotations import re -from .general import is_copyfrom from typing import Any -class Path: - INDEXING_REGEX = re.compile(r"^([A-Za-z_0-9\-]+)\[(\d+)\]$") +class PathError(Exception): + pass + +def is_copyfrom(val: Any) -> bool: + return isinstance(val, tuple) and val[0] == "copy-from" +class Path: + """Class for indexing nested dictionaries, that may also contain lists. + The text version separates keys by "/". + To index a list, use list[index]. An example of a string path is "/foo/bar[3]/baz", + which means indexing as dict["foo"]["bar"][3]["baz"]. + """ + DICT_MATCH = re.compile(r"[A-Za-z_\-][A-Za-z_\d\-]*") + LIST_MATCH = re.compile(r"\d+") + @staticmethod def parse(path: str) -> Path: - parts = path.split("/") - res = [] - for part in parts: - m = Path.INDEXING_REGEX.match(part) - if m: - res.append(m.group(1)) - res.append(int(m.group(2))) - else: - res.append(part) - return Path(*res) + def parse_part(part: str): + if Path.DICT_MATCH.match(part): + return part + elif Path.LIST_MATCH.match(part): + return int(part) + raise PathError(f'Could not parse path: "{path}"') + return Path(*(parse_part(p) for p in re.sub(r'\[(\d+)\]', r'/\1', path).split("/"))) @staticmethod def combine(*parts: str | int | Path) -> Path: + """Fuse multiple paths together into one path""" res = [] for part in parts: if isinstance(part, int): res.append(part) - continue - if isinstance(part, str): - part = Path.parse(part) - res.extend(list(part.path)) # type: ignore + elif isinstance(part, str): + res.extend(Path.parse(part).path) + elif isinstance(part, Path): + res.extend(list(part.path)) + else: + raise PathError(f'Unknown type in parts: {type(part)}') return Path(*res) def __init__(self, *path: str | int) -> None: + for p in path: + if isinstance(p, int): + if p < 0: + raise PathError('Indexes should be positive') + elif isinstance(p, str): + if not Path.DICT_MATCH.match(p): + raise PathError(f'Invalid dictionary-key: "{p}"') + else: + raise PathError(f'Invalid type for path: "{type(p)}"') self.path = path - def index(self, data: dict) -> Any | None: + def index(self, data: dict, fallback: Any = ...) -> Any: rv = data for part in self.path: if isinstance(part, int): if not isinstance(rv, list): - return None + if fallback == ...: + raise PathError(f'Tried to index non-list type with an integer ({self.path})') + return fallback try: rv = rv[part] except IndexError: - return None + if fallback == ...: + raise PathError(f'Tried to index list out of range ({self.path})') + return fallback else: if part not in rv: - return None + if fallback == ...: + raise PathError(f'Tried to access invalid key "{part}" ({self.path})') + return fallback rv = rv[part] return rv + def set(self, data: dict, value): + if self == Path(): + raise PathError('Can not set root of dictionary with Path') + self.up(1).index(data)[self.last_name()] = value + def spec_path(self) -> Path: + """Get corresponding specification-path to property""" res = [] for part in self.path: if isinstance(part, str): @@ -64,7 +96,7 @@ def data_paths(self, data: dict) -> list[Path]: """Finds all data paths that a spec_path is pointing towards (meaning it will explore all items in lists)""" def path_is_not_copyfrom(path: Path) -> bool: - return not is_copyfrom(path.index(data)) + return not is_copyfrom(path.index(data, None)) out = [Path()] state = "base" @@ -107,7 +139,7 @@ def __str__(self) -> str: strings[-1] += f"[{part}]" else: strings.append(part) - return "/".join(strings) + return "/" + "/".join(strings) def __repr__(self): return f"Path({self})" diff --git a/problemtools/config_parser/general.py b/problemtools/config_parser/general.py index f1c8eb07..08e66563 100644 --- a/problemtools/config_parser/general.py +++ b/problemtools/config_parser/general.py @@ -2,25 +2,3 @@ class SpecificationError(Exception): pass - -def is_copyfrom(val: Any) -> bool: - return isinstance(val, tuple) and val[0] == "copy-from" - -type_field_mapping = { - "*": ["default", "type", "flags", "parsing"], - "string": ["alternatives"], - "bool": ["alternatives"], - "int": ["alternatives"], - "float": ["alternatives"], - "object": ["required", "properties"], - "list": ["content"], -} - -type_mapping = { - "string": str, - "object": dict, - "list": list, - "bool": bool, - "int": int, - "float": float, -} \ No newline at end of file diff --git a/problemtools/config_parser/parser.py b/problemtools/config_parser/parser.py index f51eec9c..4640d924 100644 --- a/problemtools/config_parser/parser.py +++ b/problemtools/config_parser/parser.py @@ -1,8 +1,28 @@ from typing import Callable from .config_path import Path -from .general import SpecificationError, type_field_mapping, type_mapping +from .general import SpecificationError from .matcher import AlternativeMatch +type_mapping = { + "string": str, + "object": dict, + "list": list, + "bool": bool, + "int": int, + "float": float, +} + +type_field_mapping = { + "*": ["default", "type", "flags", "parsing"], + "string": ["alternatives"], + "bool": ["alternatives"], + "int": ["alternatives"], + "float": ["alternatives"], + "object": ["required", "properties"], + "list": ["content"], +} + + class Parser: NAME: str = "" OUTPUT_TYPE: str = "" @@ -38,12 +58,13 @@ def __init__( required_type = self.spec_path.index(specification)["type"] if required_type != self.OUTPUT_TYPE: raise SpecificationError( - f"Parsing rule for {path} outputs {self.OUTPUT_TYPE}, but the output should be of type {required_type}" + f"Parsing rule ({self.NAME}) for {path} outputs {self.OUTPUT_TYPE}, but the output should be of type {required_type}" ) if self.OUTPUT_TYPE in ("string", "int", "float", "bool"): alternatives = Path.combine(self.spec_path, "alternatives").index( - self.specification + self.specification, + None ) if alternatives is None: self.alternatives = None @@ -56,10 +77,9 @@ def __init__( self.alternatives = None def parse(self): - val = self.path.index(self.data) - out = self._parse(val) + out = self._parse(self.path.index(self.data, None)) - flags = Path.combine(self.spec_path, "flags").index(self.specification) or [] + flags = Path.combine(self.spec_path, "flags").index(self.specification, []) if "deprecated" in flags and out is not None: self.warning_func(f"deprecated property was provided ({self.path})") @@ -78,12 +98,10 @@ def parse(self): out = None if out is None: - fallback = Path.combine(self.spec_path, "default").index(self.specification) - if fallback is not None: - if isinstance(fallback, str) and fallback.startswith("copy-from:"): - fallback = ("copy-from", Path.parse(fallback.split(":")[1])) - return fallback - return type_mapping[self.OUTPUT_TYPE]() + fallback = Path.combine(self.spec_path, "default").index(self.specification, type_mapping[self.OUTPUT_TYPE]()) + if isinstance(fallback, str) and fallback.startswith("copy-from:"): + fallback = ("copy-from", Path.parse(fallback.split(":")[1])) + return fallback if not ( isinstance(out, tuple) or isinstance(out, type_mapping[self.OUTPUT_TYPE]) @@ -183,7 +201,7 @@ def _parse(self, val): return None required = ( - Path.combine(self.spec_path, "required").index(self.specification) or [] + Path.combine(self.spec_path, "required").index(self.specification, []) ) for req in required: req_path = Path.combine(self.path, req) diff --git a/problemtools/tests/config_parser_tests/test_config_path.py b/problemtools/tests/config_parser_tests/test_config_path.py index 0d5bc382..02eebd50 100644 --- a/problemtools/tests/config_parser_tests/test_config_path.py +++ b/problemtools/tests/config_parser_tests/test_config_path.py @@ -1,9 +1,10 @@ -from problemtools.config_parser import Path +from problemtools.config_parser import Path, PathError def test_parse(): assert Path.parse("a/b/c") == Path("a", "b", "c") assert Path.parse("array[0]/key") == Path("array", 0, "key") assert Path.parse("nested/list[2]/item") == Path("nested", "list", 2, "item") + assert Path.parse("multiple_lists[0][0][0][0]") == Path("multiple_lists", 0, 0, 0, 0) def test_combine(): assert Path.combine("a", "b", "c") == Path("a", "b", "c") @@ -13,8 +14,16 @@ def test_combine(): def test_index(): data = {"a": {"b": [1, 2, 3]}} assert Path("a", "b", 1).index(data) == 2 - assert Path("a", "c").index(data) is None - assert Path("a", "b", 10).index(data) is None + try: + Path("a", "c").index(data) + assert False and "Should crash on invalid indexing" + except PathError: + pass + try: + Path("a", "b", 10).index(data) is None + assert False and "Should crash on invalid indexing" + except PathError: + pass def test_spec_path(): assert Path("a", "b", 2).spec_path() == Path("properties", "a", "properties", "b", "content") @@ -34,8 +43,8 @@ def test_last_name(): assert Path("list", 3).last_name() == 3 def test_str_repr(): - assert str(Path("a", "b", 2)) == "a/b[2]" - assert repr(Path("x", "y")) == "Path(x/y)" + assert str(Path("a", "b", 2)) == "/a/b[2]" + assert repr(Path("x", "y")) == "Path(/x/y)" def test_equality_hash(): p1 = Path("a", "b", 1) From 5a4bd24746a63dfd3485f1fd99fac4621bfe634f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Mon, 24 Mar 2025 11:31:10 +0100 Subject: [PATCH 14/32] More parsing rules, more tests Co-authored-by: Zazmuz --- problemtools/config_parser/__init__.py | 3 +- problemtools/config_parser/matcher.py | 7 +- problemtools/config_parser/notes.txt | 2 +- problemtools/config_parser/parser.py | 100 ++++++++++- problemtools/config_parser/wip_format.yaml | 4 +- ...config_alternatives_bool_misspelled_1.yaml | 21 +++ ...onfig_alternatives_bool_misspelled_2.yaml} | 2 +- .../config/legacy_specification.yaml | 169 ++++++++++++++++++ .../tests/config_parser_tests/helper.py | 2 + .../test_basic_config_valid.py | 20 --- .../test_invalid_config.py | 71 ++++---- .../config_parser_tests/test_valid_config.py | 38 ++++ problemtools/verifyproblem.py | 2 +- 13 files changed, 379 insertions(+), 62 deletions(-) create mode 100644 problemtools/tests/config_parser_tests/config/config_alternatives_bool_misspelled_1.yaml rename problemtools/tests/config_parser_tests/config/{config_alternatives_misspelled.yaml => config_alternatives_bool_misspelled_2.yaml} (89%) create mode 100644 problemtools/tests/config_parser_tests/config/legacy_specification.yaml delete mode 100644 problemtools/tests/config_parser_tests/test_basic_config_valid.py create mode 100644 problemtools/tests/config_parser_tests/test_valid_config.py diff --git a/problemtools/config_parser/__init__.py b/problemtools/config_parser/__init__.py index d8827624..01938680 100644 --- a/problemtools/config_parser/__init__.py +++ b/problemtools/config_parser/__init__.py @@ -31,6 +31,7 @@ def set_warning_callback(self, fun: Callable): self.warning_func = fun def invert_graph(dependency_graph: dict[Path, list[Path]]): + # TODO: catch key errors better depends_on_graph = {k: [] for k in dependency_graph.keys()} for dependant, dependencies in dependency_graph.items(): for dependency in dependencies: @@ -71,7 +72,7 @@ def get_path_dependencies(self) -> dict[Path, list[Path]]: while stack: parent, p = stack.pop() spec = p.index(self.spec) - deps = Parser.get_parser_type(spec).get_dependencies() + deps = [d.spec_path() for d in Parser.get_parser_type(spec).get_dependencies()] if len(parent.path) > 0: deps.append(parent) graph[p] = deps diff --git a/problemtools/config_parser/matcher.py b/problemtools/config_parser/matcher.py index 4e895b9d..324b1d95 100644 --- a/problemtools/config_parser/matcher.py +++ b/problemtools/config_parser/matcher.py @@ -25,10 +25,11 @@ def __init__(self, matchstr): self.regex = re.compile(matchstr) def check(self, val) -> bool: - return self.regex.match(val) + return isinstance(val, str) and self.regex.match(val) def __str__(self) -> str: - self.regex.pattern + return self.regex.pattern + class IntMatch(AlternativeMatch): @@ -50,7 +51,7 @@ def __init__(self, matchstr: str | int): self.start = self.end = int(matchstr) except ValueError: raise SpecificationError( - 'Int match string should be of the form "A:B" where A and B can be parsed as ints or left empty, or a single integer' + f'Int match string should be of the form "A:B" where A and B can be parsed as ints or left empty, or a single integer, not "{matchstr}"' ) def check(self, val) -> bool: diff --git a/problemtools/config_parser/notes.txt b/problemtools/config_parser/notes.txt index 4d9a6027..425f4bac 100644 --- a/problemtools/config_parser/notes.txt +++ b/problemtools/config_parser/notes.txt @@ -84,7 +84,7 @@ Is being checked: } }, }, - "licence": { + "license": { "type": "string", "default": "unknown", "alternatives": { diff --git a/problemtools/config_parser/parser.py b/problemtools/config_parser/parser.py index 4640d924..e82d31f1 100644 --- a/problemtools/config_parser/parser.py +++ b/problemtools/config_parser/parser.py @@ -91,7 +91,7 @@ def parse(self): if "warn" in checks: self.warning_func(checks["warn"]) if not found_match: - alts = ", ".join(f'"{matcher}"' for matcher in self.alternatives.keys()) + alts = ", ".join(f'"{matcher}"' for matcher, _ in self.alternatives) self.error_func( f"Property {self.path} did not match any of the specified alternatives ({alts})" ) @@ -293,11 +293,11 @@ def _parse(self, val): if val.lower() in ("true", "false"): interpretation = val.lower() == "true" self.warning_func( - f'Expected type bool but got a string "{val}" which will be interpreted as {interpretation} ({self.path})' + f'Expected type bool but got stringified bool: "{val}" which will be interpreted as {interpretation} ({self.path})' ) val = interpretation else: - self.error_func(f'Expected type bool but got "{val}" ({self.path})') + self.error_func(f'Expected type bool, but got "{val}" ({self.path})') return None if not isinstance(val, bool): @@ -306,6 +306,96 @@ def _parse(self, val): return val +class RightsOwnerLegacy(Parser): + NAME = "rights-owner-legacy" + OUTPUT_TYPE = "string" + + @staticmethod + def get_dependencies() -> list[Path]: + return [Path("author"), Path("source"), Path("license")] + + def _parse(self, val): + if isinstance(val, str): + return val + + if val is None and Path("license").index(self.data) != "public domain": + author = Path("author").index(self.data) + if len(author) > 0: + return author + source = Path("source").index(self.data) + if len(source) > 0: + return source + + return None + +class LegacyValidation(Parser): + NAME = "legacy-validation" + OUTPUT_TYPE = "object" + + def _parse(self, val): + if val is None: + return None + + if not isinstance(val, str): + self.error_func(f'Property {self.path} was expected to be given as type string') + return None + + args = val.split() + if args[0] not in ("default", "custom"): + self.error_func(f'First argument of {self.path} was expected to be either "default" or "custom"') + return None + + if len(set(args)) != len(args): + self.warning_func(f'Arguments of {self.path} contains duplicate values') + + for arg in args[1:]: + if arg not in ("score", "interactive"): + self.warning_func(f'Invalid argument "{arg}" in {self.path}') + + return { + "type": args[0], + "interactive": "interactive" in args, + "score": "score" in args + } + +class SpaceSeparatedStrings(Parser): + NAME = "space-separated-strings" + OUTPUT_TYPE = "list" + + def _parse(self, val): + if val is None: + return None + + if not isinstance(val, str): + self.error_func(f'Property {self.path} was expected to be of type string') + return None + + return val.split() + +class MinMaxFloatString(Parser): + NAME = "min-max-float-string" + OUTPUT_TYPE = "object" + + def _parse(self, val): + if val is None: + return None + + if not isinstance(val, str): + self.error_func(f'Property {self.path} was expected to be of type string') + return None + + args = val.split() + if len(args) != 2: + self.error_func(f'Property {self.path} was expected to contain exactly two space-separated floats') + return None + + try: + a, b = map(float, args) + except ValueError: + self.error_func(f'Failed to parse arguments of {self.path} as floats') + + return {"min": a, "max": b} + parsers = { p.NAME: p for p in [ @@ -315,5 +405,9 @@ def _parse(self, val): DefaultIntParser, DefaultFloatParser, DefaultBoolParser, + RightsOwnerLegacy, + LegacyValidation, + SpaceSeparatedStrings, + MinMaxFloatString, ] } diff --git a/problemtools/config_parser/wip_format.yaml b/problemtools/config_parser/wip_format.yaml index 6296d0fd..5feaeadd 100644 --- a/problemtools/config_parser/wip_format.yaml +++ b/problemtools/config_parser/wip_format.yaml @@ -28,7 +28,7 @@ properties: ".+": require: - source: ".+" - licence: + license: type: string default: unknown alternatives: @@ -44,7 +44,7 @@ properties: - rights_owner: ".+" rights_owner: type: string - parsing: rights_owner_legacy + parsing: rights-owner-legacy limits: type: object properties: diff --git a/problemtools/tests/config_parser_tests/config/config_alternatives_bool_misspelled_1.yaml b/problemtools/tests/config_parser_tests/config/config_alternatives_bool_misspelled_1.yaml new file mode 100644 index 00000000..d391b0f6 --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/config_alternatives_bool_misspelled_1.yaml @@ -0,0 +1,21 @@ +type: object +require: + - bar + - boz +properties: + foo: + flags: + - deprecated + type: int + default: 69 + bar: + type: string + alternatives: + x: {} + baz: + type: bool + default: True + alternatives: + Sann: + warn: "true is now deprecated" + False: {} \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/config/config_alternatives_misspelled.yaml b/problemtools/tests/config_parser_tests/config/config_alternatives_bool_misspelled_2.yaml similarity index 89% rename from problemtools/tests/config_parser_tests/config/config_alternatives_misspelled.yaml rename to problemtools/tests/config_parser_tests/config/config_alternatives_bool_misspelled_2.yaml index 339036c9..3d2be4d8 100644 --- a/problemtools/tests/config_parser_tests/config/config_alternatives_misspelled.yaml +++ b/problemtools/tests/config_parser_tests/config/config_alternatives_bool_misspelled_2.yaml @@ -16,6 +16,6 @@ properties: type: bool default: True alternatives: - Sann: + True: warn: "true is now deprecated" Falsk: {} \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/config/legacy_specification.yaml b/problemtools/tests/config_parser_tests/config/legacy_specification.yaml new file mode 100644 index 00000000..5feaeadd --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/legacy_specification.yaml @@ -0,0 +1,169 @@ +type: object +required: [] +properties: + problem_format_version: + type: string + default: legacy + alternatives: + legacy: {} + type: + type: string + default: pass-fail + alternatives: + pass-fail: + forbid: + - grading/on_reject: grade + scoring: {} + name: + type: string + uuid: + type: string + author: + type: string + source: + type: string + source_url: + type: string + alternatives: + ".+": + require: + - source: ".+" + license: + type: string + default: unknown + alternatives: + unknown: + warn: License is unknown + require: + - rights_owner: ".+" + cc0|cc by|cc by-sa|educational|permission: + require: + - rights_owner: ".+" + public domain: + forbid: + - rights_owner: ".+" + rights_owner: + type: string + parsing: rights-owner-legacy + limits: + type: object + properties: + time_multiplier: + type: float + default: 5 + alternatives: + "0.0:": {} + time_safety_margin: + type: float + default: 2 + memory: + type: int + default: copy-from:system_default/memory + alternatives: + "0:": {} + output: + type: int + default: copy-from:system_default/output + alternatives: + "0:": {} + code: + type: int + default: copy-from:system_default/code + alternatives: + "0:": {} + compilation_time: + type: float + default: copy-from:system_default/compilation_time + alternatives: + "0.0:": {} + compilation_memory: + type: int + default: copy-from:system_default/compilation_memory + alternatives: + "0:": {} + validation_time: + type: float + default: copy-from:system_default/validation_time + alternatives: + "0.0:": {} + validation_memory: + type: int + default: copy-from:system_default/validation_memory + alternatives: + "0:": {} + validation_output: + type: int + default: copy-from:system_default/validation_output + alternatives: + "0:": {} + validation: + type: object + parsing: legacy-validation + properties: + type: + type: string + alternatives: + default: + forbid: + - validation/interactive: true + - validation/score: true + custom: {} + interactive: + type: bool + score: + type: bool + validator_flags: + type: string + keywords: + type: list + parsing: space-separated-strings + content: + type: string + grading: + type: object + properties: + show_test_data_groups: + type: bool + alternatives: + True: + forbid: + - type: pass-fail + False: {} + on_reject: + type: string + default: worst_error + flags: + - deprecated + alternatives: + first_error: {} + worst_error: {} + grade: {} + objective: + type: string + default: max + alternatives: + min: {} + max: {} + accept_score: + type: float # Should actually be type string, add custom parsing? + default: 1.0 + flags: + - deprecated + reject_score: + type: float # Should actually be type string, add custom parsing? + default: 0.0 + flags: + - deprecated + range: + type: object + parsing: min-max-float-string + flags: + - deprecated + properties: + min: + type: float + default: .inf + max: + type: float + default: -.inf + diff --git a/problemtools/tests/config_parser_tests/helper.py b/problemtools/tests/config_parser_tests/helper.py index 507bd4ae..c02f70a2 100644 --- a/problemtools/tests/config_parser_tests/helper.py +++ b/problemtools/tests/config_parser_tests/helper.py @@ -19,6 +19,8 @@ def errors_add(text: str): errors.append(text) def construct_metadata(spec_file, config_file, injected_data) -> Metadata: + errors.clear() + warnings.clear() spec = load_yaml(spec_file) config = load_yaml(config_file) md = Metadata(spec) diff --git a/problemtools/tests/config_parser_tests/test_basic_config_valid.py b/problemtools/tests/config_parser_tests/test_basic_config_valid.py deleted file mode 100644 index a24c170d..00000000 --- a/problemtools/tests/config_parser_tests/test_basic_config_valid.py +++ /dev/null @@ -1,20 +0,0 @@ -from problemtools.config_parser import Metadata -from helper import * - -injected = { - 'external': { - "cool-string": "yo this string is ballin" - } -} - -data = construct_metadata('basic_config.yaml', 'follows_basic_config.yaml', injected) - -print(f"warnings: {warnings}") -print(f"errors: {errors}") - -assert data["foo"] == 1337 -assert data["bar"] == "z" -assert data["baz"] == True -assert abs(data["boz"] - 3.5) < 0.01 -assert data["copied"] == "yo this string is ballin" -assert len(warnings) > 0 \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/test_invalid_config.py b/problemtools/tests/config_parser_tests/test_invalid_config.py index 26a71dc0..116db529 100644 --- a/problemtools/tests/config_parser_tests/test_invalid_config.py +++ b/problemtools/tests/config_parser_tests/test_invalid_config.py @@ -2,46 +2,57 @@ from problemtools.config_parser import SpecificationError from helper import construct_metadata, warnings, errors -def test_misspelled_type(): +def run_test(config_yaml, follows_config_yaml, starts_with_error=None, additional_assert_function=None): + """ + Run a basic test and check for raised SpecificationError message. + """ try: - construct_metadata('config_type_misspelled.yaml', 'follows_config_misspelled.yaml', {}) + construct_metadata(config_yaml, follows_config_yaml, {}) assert False, 'Should have raised SpecificationError' except SpecificationError as e: e = str(e) - assert e.startswith("Specification did not have a MUST HAVE field 'type',") + if starts_with_error: + assert e.startswith(starts_with_error) + if additional_assert_function: + assert additional_assert_function(e) + print(f'warnings: {warnings}') print(f'errors: {errors}') +def test_misspelled_type(): + run_test( + 'config_type_misspelled.yaml', + 'follows_config_misspelled.yaml', + starts_with_error="Specification did not have a MUST HAVE field 'type'," + ) + def test_misspelled_typevalue(): - try: - construct_metadata('config_type_value_misspelled.yaml', 'follows_config_misspelled.yaml', {}) - assert False, 'Should have raised SpecificationError' - except SpecificationError as e: - e = str(e) - assert e.startswith('Type ') - assert 'is not a valid type. Did you mean:' in e - print(f'warnings: {warnings}') - print(f'errors: {errors}') + run_test( + 'config_type_value_misspelled.yaml', + 'follows_config_misspelled.yaml', + starts_with_error='Type ', + additional_assert_function=lambda e: 'is not a valid type. Did you mean:' in e + ) def test_misspelled_field(): - try: - construct_metadata('config_field_misspelled.yaml', 'follows_config_misspelled.yaml', {}) - assert False, 'Should have raised SpecificationError' - except SpecificationError as e: - e = str(e) - assert e.startswith('Field ') - assert ' is not allowed for type ' in e - print(f'warnings: {warnings}') - print(f'errors: {errors}') + run_test( + 'config_field_misspelled.yaml', + 'follows_config_misspelled.yaml', + starts_with_error='Field ', + additional_assert_function=lambda e: ' is not allowed for type ' in e + ) -def test_misspelled_alternatives(): - try: - construct_metadata('config_alternatives_misspelled.yaml', 'follows_config_misspelled.yaml', {}) - assert False, 'Should have raised SpecificationError' - except SpecificationError as e: - e = str(e) - assert e == 'Bool match string should be either "true" or "false"' +def test_misspelled_true_alternatives_1(): + run_test( + 'config_alternatives_bool_misspelled_1.yaml', + 'follows_config_misspelled.yaml', + starts_with_error='Bool match string should be either "true" or "false"' + ) - print(f'warnings: {warnings}') - print(f'errors: {errors}') \ No newline at end of file +def test_misspelled_true_alternatives_2(): + run_test( + 'config_alternatives_bool_misspelled_2.yaml', + 'follows_config_misspelled.yaml', + starts_with_error='Bool match string should be either "true" or "false"' + ) \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/test_valid_config.py b/problemtools/tests/config_parser_tests/test_valid_config.py new file mode 100644 index 00000000..282d2ecd --- /dev/null +++ b/problemtools/tests/config_parser_tests/test_valid_config.py @@ -0,0 +1,38 @@ +from problemtools.config_parser import Metadata +from helper import * + +def test_basic_config(): + injected = { + 'external': { + "cool-string": "yo this string is ballin" + } + } + data = construct_metadata('basic_config.yaml', 'follows_basic_config.yaml', injected) + + print(f"warnings: {warnings}") + print(f"errors: {errors}") + + assert data["foo"] == 1337 + assert data["bar"] == "z" + assert data["baz"] == True + assert abs(data["boz"] - 3.5) < 0.01 + assert data["copied"] == "yo this string is ballin" + assert len(warnings) > 0 + +legacy_injected_data = { + "system_default": { + "memory": 2048, + "output": 8, + "code": 128, + "compilation_time": 60, + "compilation_memory": 2048, + "validation_time": 60, + "validation_memory": 2048, + "validation_output": 8 + } +} + +def test_legacy_config_empty(): + data = construct_metadata('legacy_specification.yaml', 'empty_config.yaml', legacy_injected_data) + print(f"warnings: {warnings}") + print(f"errors: {errors}") \ No newline at end of file diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index a17dcf59..31bb6205 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -826,7 +826,7 @@ class ProblemStatement2023_07(ProblemStatement): } }, }, - "licence": { + "license": { "type": "string", "default": "unknown", "alternatives": { From cbc6d33b4583eb4b55275d27da02115ba7a65a9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Mon, 24 Mar 2025 12:53:48 +0100 Subject: [PATCH 15/32] Small fixes Co-authored-by: Zazmuz --- problemtools/config_parser/matcher.py | 7 ++++--- .../tests/config_parser_tests/test_valid_config.py | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/problemtools/config_parser/matcher.py b/problemtools/config_parser/matcher.py index 324b1d95..dbe02468 100644 --- a/problemtools/config_parser/matcher.py +++ b/problemtools/config_parser/matcher.py @@ -41,8 +41,9 @@ def __init__(self, matchstr: str | int): if matchstr.count(":") > 1: raise ValueError if ":" in matchstr: + self.start, self.end = [ - int(p) if p else None for p in map(str.strip, matchstr.split()) + int(p) if p else None for p in map(str.strip, matchstr.split(":")) ] else: matchstr = matchstr.strip() @@ -66,10 +67,10 @@ def check(self, val) -> bool: return True def __str__(self): - if A == B: - return str(A) A = str(self.start) if self.start is not None else "" B = str(self.end) if self.end is not None else "" + if A == B and A != "": + return str(A) return f"{A}:{B}" diff --git a/problemtools/tests/config_parser_tests/test_valid_config.py b/problemtools/tests/config_parser_tests/test_valid_config.py index 282d2ecd..c8d9dcbe 100644 --- a/problemtools/tests/config_parser_tests/test_valid_config.py +++ b/problemtools/tests/config_parser_tests/test_valid_config.py @@ -24,9 +24,9 @@ def test_basic_config(): "memory": 2048, "output": 8, "code": 128, - "compilation_time": 60, + "compilation_time": 60.0, "compilation_memory": 2048, - "validation_time": 60, + "validation_time": 60.0, "validation_memory": 2048, "validation_output": 8 } From 0b82957a39c8aa612be677abaf50ef1e2b8fde7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Mon, 24 Mar 2025 17:45:29 +0100 Subject: [PATCH 16/32] Tests for matching and bug fixes --- problemtools/config_parser/matcher.py | 46 ++++--- .../tests/config_parser_tests/test_matcher.py | 114 ++++++++++++++++++ 2 files changed, 142 insertions(+), 18 deletions(-) create mode 100644 problemtools/tests/config_parser_tests/test_matcher.py diff --git a/problemtools/config_parser/matcher.py b/problemtools/config_parser/matcher.py index dbe02468..bf5e7c52 100644 --- a/problemtools/config_parser/matcher.py +++ b/problemtools/config_parser/matcher.py @@ -1,31 +1,35 @@ from .general import SpecificationError import re +from typing import Any class AlternativeMatch: def __init__(self, matchstr): raise NotImplementedError("Specialize in subclass") - def check(self, val) -> bool: + def check(self, val: Any) -> bool: raise NotImplementedError("Specialize in subclass") @staticmethod - def get_matcher(type, matchstr) -> "AlternativeMatch": + def get_matcher(type: str, matchstr: Any) -> "AlternativeMatch": matchers = { "string": StringMatch, "int": IntMatch, "float": FloatMatch, "bool": BoolMatch, } - assert type in matchers + if type not in matchers: + raise SpecificationError(f'There is no matcher for type "{matchstr}"') return matchers[type](matchstr) class StringMatch(AlternativeMatch): - def __init__(self, matchstr): + def __init__(self, matchstr: str): + if type(matchstr) is not str: + raise SpecificationError(f'String match needs argument to be of type string. Got {type(matchstr)}') self.regex = re.compile(matchstr) - def check(self, val) -> bool: - return isinstance(val, str) and self.regex.match(val) + def check(self, val: Any) -> bool: + return type(val) is str and bool(self.regex.fullmatch(val)) def __str__(self) -> str: return self.regex.pattern @@ -34,7 +38,9 @@ def __str__(self) -> str: class IntMatch(AlternativeMatch): def __init__(self, matchstr: str | int): - if isinstance(matchstr, int): + if type(matchstr) not in (str, int): + raise SpecificationError(f"Int match needs argument to be of type string or int. Got {type(matchstr)}") + if type(matchstr) is int: self.start = self.end = matchstr return try: @@ -55,18 +61,18 @@ def __init__(self, matchstr: str | int): f'Int match string should be of the form "A:B" where A and B can be parsed as ints or left empty, or a single integer, not "{matchstr}"' ) - def check(self, val) -> bool: - if not isinstance(val, int): + def check(self, val: Any) -> bool: + if type(val) is not int: return False if self.start is not None: if val < self.start: return False if self.end is not None: - if val > self.start: + if val > self.end: return False return True - def __str__(self): + def __str__(self) -> str: A = str(self.start) if self.start is not None else "" B = str(self.end) if self.end is not None else "" if A == B and A != "": @@ -76,6 +82,8 @@ def __str__(self): class FloatMatch(AlternativeMatch): def __init__(self, matchstr: str): + if type(matchstr) is not str: + raise SpecificationError(f'Float match needs argument to be of type string. Got {type(matchstr)}') try: if matchstr.count(":") != 1: raise ValueError @@ -87,10 +95,10 @@ def __init__(self, matchstr: str): 'Float match string should be of the form "A:B" where A and B can be parsed as floats or left empty' ) - def check(self, val) -> bool: - return isinstance(val, float) and self.start <= val <= self.end + def check(self, val: Any) -> bool: + return type(val) is float and self.start <= val <= self.end - def __str__(self): + def __str__(self) -> str: A = str(self.start) if self.start != float("-inf") else "" B = str(self.end) if self.end != float("inf") else "" return f"{A}:{B}" @@ -98,7 +106,9 @@ def __str__(self): class BoolMatch(AlternativeMatch): def __init__(self, matchstr: str | bool): - if isinstance(matchstr, bool): + if not type(matchstr) in (bool, str): + raise SpecificationError(f'BoolMatch needs to recieve either type string or bool. Got {type(matchstr)}') + if type(matchstr) is bool: self.val = matchstr return matchstr = matchstr.strip().lower() @@ -108,8 +118,8 @@ def __init__(self, matchstr: str | bool): ) self.val = {"true": True, "false": False}[matchstr] - def check(self, val) -> bool: - return isinstance(val, bool) and val == self.val + def check(self, val: Any) -> bool: + return type(val) is bool and val == self.val - def __str__(self): + def __str__(self) -> str: return str(self.val) diff --git a/problemtools/tests/config_parser_tests/test_matcher.py b/problemtools/tests/config_parser_tests/test_matcher.py new file mode 100644 index 00000000..0c601c3f --- /dev/null +++ b/problemtools/tests/config_parser_tests/test_matcher.py @@ -0,0 +1,114 @@ +from problemtools.config_parser.matcher import AlternativeMatch, BoolMatch, StringMatch, IntMatch, FloatMatch +from problemtools.config_parser.general import SpecificationError +from itertools import chain + +import pytest + +def test_bool_match(): + for matcher in (BoolMatch(True), BoolMatch("true"), BoolMatch("tRuE")): + assert matcher.check(True) is True + assert matcher.check(False) is False + assert str(matcher) == "True" + for matcher in (BoolMatch(False), BoolMatch("false"), BoolMatch("FaLsE")): + assert matcher.check(True) is False + assert matcher.check(False) is True + assert str(matcher) == "False" + for junk in ("asdsad", 123, "foobar", 0.0): + with pytest.raises(SpecificationError): + BoolMatch(junk) + +def test_string_match(): + matcher = StringMatch("f[ou][ou]") + for s in ("foo", "fuu", "fou"): + assert matcher.check(s) is True + for s in ("bar", "fooo", "fo", "bfoo"): + assert matcher.check(s) is False + for s in ("xyz", "[aA][bB]", r"#([a-fA-F0-9]{3}|[a-fA-F0-9]{6})\b"): + assert str(StringMatch(s)) == s + for junk in (1.2, 123, True): + assert matcher.check(junk) is False + with pytest.raises(SpecificationError): + StringMatch(junk) + +def test_int_match(): + matcher = IntMatch("0:") + for i in range(-10, 0): + assert matcher.check(i) is False + for i in range(0, 10): + assert matcher.check(i) is True + assert str(matcher) == "0:" + + matcher = IntMatch(":13") + for i in range(3, 14): + assert matcher.check(i) is True + for i in range(14, 24): + assert matcher.check(i) is False + assert str(matcher) == ":13" + + matcher = IntMatch("10:20") + for i in range(10, 21): + assert matcher.check(i) is True + for i in chain(range(0, 10), range(21, 30)): + assert matcher.check(i) is False + assert str(matcher) == "10:20" + + matcher = IntMatch(":") + for i in (-100, 1000000, 23, 101010, 0): + assert matcher.check(i) is True + for i in ("foo", 0.0, True, 3.5): + assert matcher.check(i) is False + assert str(matcher) == ":" + + for v in (13, "13"): + matcher = IntMatch(v) + for i in chain(range(5, 13), range(14, 20)): + assert matcher.check(i) is False + assert matcher.check(13) is True + assert str(matcher) == "13" + + for junk in (1.2, "foo", True, "13:13:13"): + assert matcher.check(junk) is False + with pytest.raises(SpecificationError): + IntMatch(junk) + + +def test_float_match(): + for v in ("0.0:", "0:"): + matcher = FloatMatch(v) + for i in range(-10, 0): + assert matcher.check(i * 0.5) is False + for i in range(1, 10): + assert matcher.check(i * 0.5) is True + + for v in (":13", ":13.0", ":1.3e1"): + matcher = FloatMatch(":13") + for i in range(10, 25): + assert matcher.check(i * 0.5) is True + for i in range(27, 30): + assert matcher.check(i * 0.5) is False + + matcher = FloatMatch("1.0:2.0") + for i in range(11, 20): + assert matcher.check(i * 0.1) is True + for i in chain(range(0, 10), range(21, 30)): + assert matcher.check(i * 0.1) is False + + matcher = FloatMatch(":") + for i in (-100, 1000000, 23, 101010, 0): + assert matcher.check(i * 1.0) is True + for i in ("foo", 0, True, 35): + assert matcher.check(i) is False + + for junk in (1.2, "foo", True, 13, "13", "13:13:13"): + print(junk) + with pytest.raises(SpecificationError): + FloatMatch(junk) + +def test_match_factory(): + for junk in (1.2, "foo", True, 13, "13", "13:13:13"): + with pytest.raises(SpecificationError): + AlternativeMatch.get_matcher(junk, "123") + assert type(AlternativeMatch.get_matcher("string", "abc123")) is StringMatch + assert type(AlternativeMatch.get_matcher("float", "0.0:1.0")) is FloatMatch + assert type(AlternativeMatch.get_matcher("int", "13:25")) is IntMatch + assert type(AlternativeMatch.get_matcher("bool", "True")) is BoolMatch \ No newline at end of file From 7f47cb9e674f6762294d55da7bf33605214bd961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Mon, 24 Mar 2025 17:46:09 +0100 Subject: [PATCH 17/32] Add .vscode/ to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2f372208..fe44eda5 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ /support/default_validator/default_validator /support/interactive/interactive build/ +.vscode/ \ No newline at end of file From 8e58c6324c9d4e2ebe25bcfefebbb5f1ea0a8914 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Mon, 24 Mar 2025 18:55:02 +0100 Subject: [PATCH 18/32] Implement alternative-checks --- problemtools/config_parser/__init__.py | 41 +++++++++++++++++++++----- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/problemtools/config_parser/__init__.py b/problemtools/config_parser/__init__.py index 01938680..ee906e2e 100644 --- a/problemtools/config_parser/__init__.py +++ b/problemtools/config_parser/__init__.py @@ -8,6 +8,7 @@ from .parser import type_mapping from .config_path import Path, PathError from .parser import Parser, DefaultObjectParser +from .matcher import AlternativeMatch class Metadata: @@ -20,9 +21,11 @@ def __init__(self, specification: dict) -> None: def __getitem__(self, key: str | Path) -> Any: if self.data is None: raise Exception("data has not been loaded yet") - if isinstance(key, str): + if type(key) is str: return Path.parse(key).index(self.data) - return key.index(self.data) + elif type(key) is Path: + return key.index(self.data) + raise Exception(f"Invalid type for indexing data ({type(key)}). Type should be string or Path") def set_error_callback(self, fun: Callable): self.error_func = fun @@ -30,7 +33,7 @@ def set_error_callback(self, fun: Callable): def set_warning_callback(self, fun: Callable): self.warning_func = fun - def invert_graph(dependency_graph: dict[Path, list[Path]]): + def invert_graph(dependency_graph: dict[Path, list[Path]]) -> dict[Path, list[Path]]: # TODO: catch key errors better depends_on_graph = {k: [] for k in dependency_graph.keys()} for dependant, dependencies in dependency_graph.items(): @@ -99,9 +102,9 @@ def get_copy_dependencies(self) -> dict[Path, list[Path]]: if is_copyfrom(val): deps.append(val[1]) graph[path] = deps - if isinstance(val, dict): + if type(val) is dict: stack.extend((path, Path.combine(path, child)) for child in val.keys()) - elif isinstance(val, list): + elif type(val) is list: stack.extend((path, Path.combine(path, i)) for i in range(len(val))) return graph @@ -122,7 +125,7 @@ def load_config(self, config: dict, injected_data: dict) -> None: for full_path in Metadata.topo_sort(self.get_copy_dependencies()): val = full_path.index(self.data) if is_copyfrom(val): - if any(isinstance(part, int) for part in val[1].path): + if any(type(part) is int for part in val[1].path): raise SpecificationError( f"copy-from directives may not copy from lists (property: {full_path}, copy-property: {val[1]})" ) @@ -140,7 +143,31 @@ def load_config(self, config: dict, injected_data: dict) -> None: ) full_path.set(self.data, copy_val) + def do_alternative_checks(self, checks: dict, prop_path: Path) -> None: + for check_result, check_name in ((False, "forbid"), (True, "require")): + for path, matchstr in checks.get(check_name, {}): + path = Path.parse(path) + val = path.index(self.data) + typ = Path.combine(path.spec_path(), "type").index(self.spec) + if AlternativeMatch.get_matcher(typ, matchstr).check(val) is not check_result: + self.error_func(f'Property {prop_path} with value {prop_path.index(self.data)} {check_name}s property {path} to match value {matchstr}') + def check_config(self) -> None: - pass + if self.data is None: + raise Exception('Data has not been loaded yet') + def check(spec: dict, data: Any, path: Path): + if spec["type"] == "object": + for p in spec["properties"]: + check(spec["properties"][p], data[p], Path.combine(path, p)) + elif spec["type"] == "list": + for i, cont in enumerate(data): + check(spec["content"], cont, Path.combine(path, i)) + if "alternatives" in spec: + for matchstr, checks in spec["alternatives"].items(): + if AlternativeMatch.get_matcher(spec["type"], matchstr).check(data): + self.do_alternative_checks(checks, path) + check(self.spec, self.data, Path()) + + From 93906c1f79fa789b36a16d27ddca16ba9026a1b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Mon, 24 Mar 2025 23:49:38 +0100 Subject: [PATCH 19/32] Implement match_properties among some other things --- problemtools/config_parser/__init__.py | 40 +++- problemtools/config_parser/parser.py | 23 +- problemtools/config_parser/wip_format.yaml | 169 --------------- .../config/2023-07_specification.yaml | 204 ++++++++++++++++++ .../config/basic_config.yaml | 4 +- ...legacy_config_fails_alternative_match.yaml | 2 + .../config/legacy_config_fails_forbid.yaml | 2 + .../config/legacy_config_fails_require.yaml | 2 + .../config/legacy_specification.yaml | 37 ++-- .../config/system_defaults.yaml | 8 + .../test_config_alternative_directives.py | 23 ++ .../config_parser_tests/test_valid_config.py | 14 +- 12 files changed, 312 insertions(+), 216 deletions(-) delete mode 100644 problemtools/config_parser/wip_format.yaml create mode 100644 problemtools/tests/config_parser_tests/config/2023-07_specification.yaml create mode 100644 problemtools/tests/config_parser_tests/config/legacy_config_fails_alternative_match.yaml create mode 100644 problemtools/tests/config_parser_tests/config/legacy_config_fails_forbid.yaml create mode 100644 problemtools/tests/config_parser_tests/config/legacy_config_fails_require.yaml create mode 100644 problemtools/tests/config_parser_tests/config/system_defaults.yaml create mode 100644 problemtools/tests/config_parser_tests/test_config_alternative_directives.py diff --git a/problemtools/config_parser/__init__.py b/problemtools/config_parser/__init__.py index ee906e2e..fabcb2f7 100644 --- a/problemtools/config_parser/__init__.py +++ b/problemtools/config_parser/__init__.py @@ -109,10 +109,46 @@ def get_copy_dependencies(self) -> dict[Path, list[Path]]: return graph + def resolve_match_properties(spec: dict, data: dict): + if spec.get("type") == "list": + if type(data) is list: + for item in data: + Metadata.resolve_match_properties(spec["content"], item) + return + if spec.get("type") != "object" or type(data) is not dict: + return + + cur_props = set(spec.get("properties", {}).keys()) + regex_props = {re.compile(pattern): desc for pattern, desc in spec.get("match_properties", {}).items()} + for prop in data.keys(): + if prop in cur_props: + continue + matching = [desc for regex, desc in regex_props.items() if regex.fullmatch(prop)] + if len(matching) > 1: + raise SpecificationError(f'Multiple match_properties could match property name "{prop}"') + elif len(matching) == 1: + spec["properties"][prop] = deepcopy(matching[0]) + + for prop_name, prop in spec.get("properties", {}).items(): + if prop_name in data: + Metadata.resolve_match_properties(prop, data[prop_name]) + + def remove_match_properties(spec: dict): + if spec.get("type") == "list": + Metadata.remove_match_properties(spec["content"]) + if spec.get("type") != "object": + return + if "match_properties" in spec: + del spec["match_properties"] + for prop in spec.get("properties", {}).values(): + Metadata.remove_match_properties(prop) + def load_config(self, config: dict, injected_data: dict) -> None: self.data: dict = DefaultObjectParser( config, self.spec, Path(), self.warning_func, self.error_func ).parse() + Metadata.resolve_match_properties(self.spec, self.data) + Metadata.remove_match_properties(self.spec) for cfg_path in Metadata.topo_sort(self.get_path_dependencies()): spec = cfg_path.index(self.spec) for full_path in cfg_path.data_paths(self.data): @@ -144,8 +180,10 @@ def load_config(self, config: dict, injected_data: dict) -> None: full_path.set(self.data, copy_val) def do_alternative_checks(self, checks: dict, prop_path: Path) -> None: + if "warn" in checks: + self.warning_func(checks["warn"]) for check_result, check_name in ((False, "forbid"), (True, "require")): - for path, matchstr in checks.get(check_name, {}): + for path, matchstr in checks.get(check_name, {}).items(): path = Path.parse(path) val = path.index(self.data) typ = Path.combine(path.spec_path(), "type").index(self.spec) diff --git a/problemtools/config_parser/parser.py b/problemtools/config_parser/parser.py index e82d31f1..d52c16e1 100644 --- a/problemtools/config_parser/parser.py +++ b/problemtools/config_parser/parser.py @@ -70,36 +70,29 @@ def __init__( self.alternatives = None else: self.alternatives = [ - (AlternativeMatch.get_matcher(self.OUTPUT_TYPE, key), val) - for key, val in alternatives.items() + AlternativeMatch.get_matcher(self.OUTPUT_TYPE, key) for key, _ in alternatives.items() ] else: self.alternatives = None def parse(self): out = self._parse(self.path.index(self.data, None)) - + fallback = Path.combine(self.spec_path, "default").index(self.specification, type_mapping[self.OUTPUT_TYPE]()) + flags = Path.combine(self.spec_path, "flags").index(self.specification, []) if "deprecated" in flags and out is not None: self.warning_func(f"deprecated property was provided ({self.path})") - if self.alternatives is not None: - found_match = False - for matcher, checks in self.alternatives: - if matcher.check(out): - found_match = True - if "warn" in checks: - self.warning_func(checks["warn"]) - if not found_match: - alts = ", ".join(f'"{matcher}"' for matcher, _ in self.alternatives) + if out is not None and self.alternatives is not None: + if not any(matcher.check(out) for matcher in self.alternatives): + alts = ", ".join(f'"{matcher}"' for matcher in self.alternatives) self.error_func( - f"Property {self.path} did not match any of the specified alternatives ({alts})" + f"Property {self.path} with value {out} did not match any of the specified alternatives ({alts})" ) out = None if out is None: - fallback = Path.combine(self.spec_path, "default").index(self.specification, type_mapping[self.OUTPUT_TYPE]()) - if isinstance(fallback, str) and fallback.startswith("copy-from:"): + if type(fallback) is str and fallback.startswith("copy-from:"): fallback = ("copy-from", Path.parse(fallback.split(":")[1])) return fallback diff --git a/problemtools/config_parser/wip_format.yaml b/problemtools/config_parser/wip_format.yaml deleted file mode 100644 index 5feaeadd..00000000 --- a/problemtools/config_parser/wip_format.yaml +++ /dev/null @@ -1,169 +0,0 @@ -type: object -required: [] -properties: - problem_format_version: - type: string - default: legacy - alternatives: - legacy: {} - type: - type: string - default: pass-fail - alternatives: - pass-fail: - forbid: - - grading/on_reject: grade - scoring: {} - name: - type: string - uuid: - type: string - author: - type: string - source: - type: string - source_url: - type: string - alternatives: - ".+": - require: - - source: ".+" - license: - type: string - default: unknown - alternatives: - unknown: - warn: License is unknown - require: - - rights_owner: ".+" - cc0|cc by|cc by-sa|educational|permission: - require: - - rights_owner: ".+" - public domain: - forbid: - - rights_owner: ".+" - rights_owner: - type: string - parsing: rights-owner-legacy - limits: - type: object - properties: - time_multiplier: - type: float - default: 5 - alternatives: - "0.0:": {} - time_safety_margin: - type: float - default: 2 - memory: - type: int - default: copy-from:system_default/memory - alternatives: - "0:": {} - output: - type: int - default: copy-from:system_default/output - alternatives: - "0:": {} - code: - type: int - default: copy-from:system_default/code - alternatives: - "0:": {} - compilation_time: - type: float - default: copy-from:system_default/compilation_time - alternatives: - "0.0:": {} - compilation_memory: - type: int - default: copy-from:system_default/compilation_memory - alternatives: - "0:": {} - validation_time: - type: float - default: copy-from:system_default/validation_time - alternatives: - "0.0:": {} - validation_memory: - type: int - default: copy-from:system_default/validation_memory - alternatives: - "0:": {} - validation_output: - type: int - default: copy-from:system_default/validation_output - alternatives: - "0:": {} - validation: - type: object - parsing: legacy-validation - properties: - type: - type: string - alternatives: - default: - forbid: - - validation/interactive: true - - validation/score: true - custom: {} - interactive: - type: bool - score: - type: bool - validator_flags: - type: string - keywords: - type: list - parsing: space-separated-strings - content: - type: string - grading: - type: object - properties: - show_test_data_groups: - type: bool - alternatives: - True: - forbid: - - type: pass-fail - False: {} - on_reject: - type: string - default: worst_error - flags: - - deprecated - alternatives: - first_error: {} - worst_error: {} - grade: {} - objective: - type: string - default: max - alternatives: - min: {} - max: {} - accept_score: - type: float # Should actually be type string, add custom parsing? - default: 1.0 - flags: - - deprecated - reject_score: - type: float # Should actually be type string, add custom parsing? - default: 0.0 - flags: - - deprecated - range: - type: object - parsing: min-max-float-string - flags: - - deprecated - properties: - min: - type: float - default: .inf - max: - type: float - default: -.inf - diff --git a/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml b/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml new file mode 100644 index 00000000..f6e849c4 --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml @@ -0,0 +1,204 @@ +type: object +required: [problem_format_version,name,uuid] +properties: + problem_format_version: + type: string + default: "2023-07" + alternatives: + "2023-07": {} + type: + type: object + parsing: type-2023-07 + properties: + pass-fail: + type: bool + default: True + alternatives: + True: + require: + type/scoring: False + False: {} + scoring: + type: bool + multi-pass: + type: bool + interactive: + type: bool + submit-answer: + type: bool + alternatives: + True: + require: + type/interactive: False + type/multi-pass: False + False: {} + content: + type: string + alternatives: + pass-fail: {} + scoring: {} + multi-pass: {} + interactive: {} + submit-answer: {} + name: + type: object + parsing: name-parsing-2023-07 + match_properties: + "[a-z]{2,3}|[a-z]{2}-[A-Z]{2}": + type: string + uuid: + type: string + version: + type: string + credits: + type: object + parsing: credits-parsing-2023-07 + properties: + authors: + type: list + content: + type: string + contributors: + type: list + content: + type: string + testers: + type: list + content: + type: string + contributors: + type: list + content: + type: string + translators: + type: object + match_properties: + "[a-z]{2,3}|[a-z]{2}-[A-Z]{2}": + type: list + content: + type: string + packagers: + type: list + content: + type: string + acknowledgements: + type: list + content: + type: string + source: + type: list + parsing: source-parsing-2023-07 + content: + type: object + properties: + name: + type: string + url: + type: string + license: + type: string + default: unknown + alternatives: + unknown: + warn: License is unknown + require: + rights_owner: ".+" + cc0|cc by|cc by-sa|educational|permission: + require: + rights_owner: ".+" + public domain: + forbid: + rights_owner: ".+" + rights_owner: + type: string + parsing: rights-owner-2023-07 + embargo_until: + type: string + parsing: date + limits: + type: object + properties: + time_multipliers: + type: object + properties: + ac_to_time_limit: + type: float + default: 2.0 + alternatives: + "1.0:": {} + time_limit_to_tle: + type: float + default: 1.5 + alternatives: + "1.0:": {} + time_limit: + type: float + default: 0.0 # Hmm, time limit needs to be calculated by default + alternatives: + "0.0:": {} + time_resolution: + type: float + default: 1.0 + alternatives: + "0.0:": {} + memory: + type: int + default: copy-from:system_default/memory + alternatives: + "1:": {} + output: + type: int + default: copy-from:system_default/output + alternatives: + "1:": {} + code: + type: int + default: copy-from:system_default/code + alternatives: + "1:": {} + compilation_time: + type: int + default: copy-from:system_default/compilation_time + alternatives: + "1:": {} + compilation_memory: + type: int + default: copy-from:system_default/compilation_memory + alternatives: + "1:": {} + validation_time: + type: int + default: copy-from:system_default/validation_time + alternatives: + "1:": {} + validation_memory: + type: int + default: copy-from:system_default/validation_memory + alternatives: + "1:": {} + validation_output: + type: int + default: copy-from:system_default/validation_output + alternatives: + "1:": {} + validation_passes: + type: int + default: 2 + alternatives: + "2:": {} + keywords: + type: list + content: + type: string + languages: + type: list + parsing: languages-parsing + content: + type: string + allow_file_writing: + type: bool + constants: + type: object + match_properties: + "[a-zA-Z_][a-zA-Z0-9_]*": + type: string # In spec, this should be allowed to be int or float as well... This is not supported for this system diff --git a/problemtools/tests/config_parser_tests/config/basic_config.yaml b/problemtools/tests/config_parser_tests/config/basic_config.yaml index f2b870b7..bf264828 100644 --- a/problemtools/tests/config_parser_tests/config/basic_config.yaml +++ b/problemtools/tests/config_parser_tests/config/basic_config.yaml @@ -16,7 +16,7 @@ properties: warn: watch out you are using y z: require: - - baz: true + baz: true baz: type: bool default: True @@ -30,7 +30,7 @@ properties: alternatives: "3.1414:3.1416": require: - - foo: 1337 + foo: 1337 warn: this is pie not real pi 7/2 ":": {} # Allows all values copied: diff --git a/problemtools/tests/config_parser_tests/config/legacy_config_fails_alternative_match.yaml b/problemtools/tests/config_parser_tests/config/legacy_config_fails_alternative_match.yaml new file mode 100644 index 00000000..fd904729 --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/legacy_config_fails_alternative_match.yaml @@ -0,0 +1,2 @@ +license: Mr Moneys Private License +rights_owner: Mr Money \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/config/legacy_config_fails_forbid.yaml b/problemtools/tests/config_parser_tests/config/legacy_config_fails_forbid.yaml new file mode 100644 index 00000000..e4bf8c5c --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/legacy_config_fails_forbid.yaml @@ -0,0 +1,2 @@ +license: public domain +rights_owner: Mr Money \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/config/legacy_config_fails_require.yaml b/problemtools/tests/config_parser_tests/config/legacy_config_fails_require.yaml new file mode 100644 index 00000000..e64218e6 --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/legacy_config_fails_require.yaml @@ -0,0 +1,2 @@ +license: cc by-sa +rights_owner: "" \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/config/legacy_specification.yaml b/problemtools/tests/config_parser_tests/config/legacy_specification.yaml index 5feaeadd..9a85cb6e 100644 --- a/problemtools/tests/config_parser_tests/config/legacy_specification.yaml +++ b/problemtools/tests/config_parser_tests/config/legacy_specification.yaml @@ -12,7 +12,7 @@ properties: alternatives: pass-fail: forbid: - - grading/on_reject: grade + grading/on_reject: grade scoring: {} name: type: string @@ -27,7 +27,8 @@ properties: alternatives: ".+": require: - - source: ".+" + source: ".+" + "": {} license: type: string default: unknown @@ -35,13 +36,13 @@ properties: unknown: warn: License is unknown require: - - rights_owner: ".+" + rights_owner: ".+" cc0|cc by|cc by-sa|educational|permission: require: - - rights_owner: ".+" + rights_owner: ".+" public domain: forbid: - - rights_owner: ".+" + rights_owner: ".+" rights_owner: type: string parsing: rights-owner-legacy @@ -60,42 +61,42 @@ properties: type: int default: copy-from:system_default/memory alternatives: - "0:": {} + "1:": {} output: type: int default: copy-from:system_default/output alternatives: - "0:": {} + "1:": {} code: type: int default: copy-from:system_default/code alternatives: - "0:": {} + "1:": {} compilation_time: - type: float + type: int default: copy-from:system_default/compilation_time alternatives: - "0.0:": {} + "1:": {} compilation_memory: type: int default: copy-from:system_default/compilation_memory alternatives: - "0:": {} + "1:": {} validation_time: - type: float + type: int default: copy-from:system_default/validation_time alternatives: - "0.0:": {} + "1:": {} validation_memory: type: int default: copy-from:system_default/validation_memory alternatives: - "0:": {} + "1:": {} validation_output: type: int default: copy-from:system_default/validation_output alternatives: - "0:": {} + "1:": {} validation: type: object parsing: legacy-validation @@ -105,8 +106,8 @@ properties: alternatives: default: forbid: - - validation/interactive: true - - validation/score: true + validation/interactive: true + validation/score: true custom: {} interactive: type: bool @@ -127,7 +128,7 @@ properties: alternatives: True: forbid: - - type: pass-fail + type: pass-fail False: {} on_reject: type: string diff --git a/problemtools/tests/config_parser_tests/config/system_defaults.yaml b/problemtools/tests/config_parser_tests/config/system_defaults.yaml new file mode 100644 index 00000000..2083f01e --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/system_defaults.yaml @@ -0,0 +1,8 @@ +memory: 2048 +output: 8 +code: 128 +compilation_time: 60 +compilation_memory: 2048 +validation_time: 60 +validation_memory: 2048 +validation_output: 8 \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/test_config_alternative_directives.py b/problemtools/tests/config_parser_tests/test_config_alternative_directives.py new file mode 100644 index 00000000..87dce758 --- /dev/null +++ b/problemtools/tests/config_parser_tests/test_config_alternative_directives.py @@ -0,0 +1,23 @@ +from helper import * + +legacy_injected_data = { + "system_default": load_yaml("system_defaults.yaml") +} + +def test_require(): + md = construct_metadata('legacy_specification.yaml', 'legacy_config_fails_require.yaml', legacy_injected_data) + md.check_config() + print(errors) + assert len(errors) == 1 + +def test_forbid(): + md = construct_metadata('legacy_specification.yaml', 'legacy_config_fails_forbid.yaml', legacy_injected_data) + md.check_config() + print(errors) + assert len(errors) == 1 + +def test_alternatives(): + md = construct_metadata('legacy_specification.yaml', 'legacy_config_fails_alternative_match.yaml', legacy_injected_data) + md.check_config() + print(errors) + assert len(errors) == 1 \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/test_valid_config.py b/problemtools/tests/config_parser_tests/test_valid_config.py index c8d9dcbe..a88d73c0 100644 --- a/problemtools/tests/config_parser_tests/test_valid_config.py +++ b/problemtools/tests/config_parser_tests/test_valid_config.py @@ -8,7 +8,8 @@ def test_basic_config(): } } data = construct_metadata('basic_config.yaml', 'follows_basic_config.yaml', injected) - + data.check_config() + print(f"warnings: {warnings}") print(f"errors: {errors}") @@ -20,16 +21,7 @@ def test_basic_config(): assert len(warnings) > 0 legacy_injected_data = { - "system_default": { - "memory": 2048, - "output": 8, - "code": 128, - "compilation_time": 60.0, - "compilation_memory": 2048, - "validation_time": 60.0, - "validation_memory": 2048, - "validation_output": 8 - } + "system_default": load_yaml("system_defaults.yaml") } def test_legacy_config_empty(): From 6874f13d9d61e19a99a92c2e33a20f03c056e23d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Tue, 25 Mar 2025 02:05:47 +0100 Subject: [PATCH 20/32] Make parsers for new format --- problemtools/config_parser/parser.py | 270 +++++++++++++++--- .../config/2023-07_specification.yaml | 18 +- 2 files changed, 240 insertions(+), 48 deletions(-) diff --git a/problemtools/config_parser/parser.py b/problemtools/config_parser/parser.py index d52c16e1..40bbef6a 100644 --- a/problemtools/config_parser/parser.py +++ b/problemtools/config_parser/parser.py @@ -2,6 +2,7 @@ from .config_path import Path from .general import SpecificationError from .matcher import AlternativeMatch +from collections import Counter type_mapping = { "string": str, @@ -77,21 +78,42 @@ def __init__( def parse(self): out = self._parse(self.path.index(self.data, None)) - fallback = Path.combine(self.spec_path, "default").index(self.specification, type_mapping[self.OUTPUT_TYPE]()) - flags = Path.combine(self.spec_path, "flags").index(self.specification, []) - if "deprecated" in flags and out is not None: - self.warning_func(f"deprecated property was provided ({self.path})") - - if out is not None and self.alternatives is not None: - if not any(matcher.check(out) for matcher in self.alternatives): - alts = ", ".join(f'"{matcher}"' for matcher in self.alternatives) - self.error_func( - f"Property {self.path} with value {out} did not match any of the specified alternatives ({alts})" + if out is not None: + flags = Path.combine(self.spec_path, "flags").index(self.specification, []) + if "deprecated" in flags: + self.warning_func(f"deprecated property was provided ({self.path})") + + if self.OUTPUT_TYPE == "object": + required = ( + Path.combine(self.spec_path, "required").index(self.specification, []) + ) + for req in required: + req_path = Path.combine(self.path, req) + if req_path.index(self.data) is None: + self.error_func(f"Missing required property: {req_path}") + + remove = [] + known_props = Path.combine(self.spec_path, "properties").index( + self.specification ) - out = None + for prop in out.keys(): + if prop not in known_props: + self.warning_func(f"Unknown property: {Path.combine(self.path, prop)}") + remove.append(prop) + for r in remove: + del out[r] + + if self.alternatives is not None: + if not any(matcher.check(out) for matcher in self.alternatives): + alts = ", ".join(f'"{matcher}"' for matcher in self.alternatives) + self.error_func( + f"Property {self.path} with value {out} did not match any of the specified alternatives ({alts})" + ) + out = None if out is None: + fallback = Path.combine(self.spec_path, "default").index(self.specification, type_mapping[self.OUTPUT_TYPE]()) if type(fallback) is str and fallback.startswith("copy-from:"): fallback = ("copy-from", Path.parse(fallback.split(":")[1])) return fallback @@ -193,25 +215,6 @@ def _parse(self, val): self.error_func(f"Expected an object, got {val} ({self.path})") return None - required = ( - Path.combine(self.spec_path, "required").index(self.specification, []) - ) - for req in required: - req_path = Path.combine(self.path, req) - if req_path.index(self.data) is None: - self.error_func(f"Missing required property: {req_path}") - - remove = [] - known_props = Path.combine(self.spec_path, "properties").index( - self.specification - ) - for prop in val.keys(): - if prop not in known_props: - self.warning_func(f"Unknown property: {Path.combine(self.path, prop)}") - remove.append(prop) - for r in remove: - del val[r] - return val @@ -389,18 +392,195 @@ def _parse(self, val): return {"min": a, "max": b} -parsers = { - p.NAME: p - for p in [ - DefaultObjectParser, - DefaultListParser, - DefaultStringParser, - DefaultIntParser, - DefaultFloatParser, - DefaultBoolParser, - RightsOwnerLegacy, - LegacyValidation, - SpaceSeparatedStrings, - MinMaxFloatString, - ] -} +class Type2023_07(Parser): + NAME = "type-2023-07" + OUTPUT_TYPE = "object" + + def _parse(self, val): + if val is None: + return None + + if type(val) is str: + val = [val] + + if type(val) is not list: + self.error_func(f'Property {self.path} was expected to be of type list or a single string. Got {type(val)}') + return None + + if len(val) == 0: + self.error_func(f'Property {self.path} was empty list, but it should contain at least one element') + return None + + valid_options = {"pass-fail", "scoring", "multi-pass", "interactive", "submit-answer"} + out = {option: False for option in valid_options} + + for option in val: + if option not in valid_options: + self.error_func(f'Property {self.path} received invalid option "{option}"') + return None + else: + if out[option]: + self.error_func(f'Property {self.path} must not contain duplicate elements. Found duplicate "{option}"') + return None + out[option] = True + + return out + +class Name2023_07(Parser): + NAME = "name-2023-07" + OUTPUT_TYPE = "object" + + def _parse(self, val): + if val is None: + return None + + if type(val) is str: + return {"en": val} + + if type(val) is not dict: + self.error_func(f'Property {self.path} should be of type string or a dictionary of language-codes to strings. Got {type(val)}') + return None + + return val + +class Credits2023_07(Parser): + NAME = "credits-2023-07" + OUTPUT_TYPE = "object" + + def _parse(self, val): + if val is None: + return None + + if type(val) is str: + return {"authors": val} + + if type(val) is not dict: + self.error_func(f'Property {self.path} should be either a single string, or a dictionary') + return None + + return val + +class StringToList(Parser): + NAME = "string-to-list" + OUTPUT_TYPE = "list" + + def _parse(self, val): + if val is None: + return None + + if type(val) is str: + return [val] + + if type(val) is not list: + self.error_func(f'Property {self.path} should be either a single string or a list of strings') + return None + + return val + +class Source2023_07(Parser): + NAME = "source-2023-07" + OUTPUT_TYPE = "list" + + def _parse(self, val): + if val is None: + return None + + if type(val) is str: + return [{"name":val}] + + if type(val) is dict: + return [val] + + if type(val) is not list: + self.error_func(f'Property {self.path} should be of type string, object or a list') + return None + + return val + +class SourceItem2023_07(Parser): + NAME = "source-item-2023-07" + OUTPUT_TYPE = "object" + + def _parse(self, val): + if val is None: + return {"name": "???"} + + if type(val) is str: + return {"name": val} + + if type(val) is dict: + if "name" not in val: + self.error_func(f'Property {self.path} needs key "name"') + val["name"] = "???" + return val + + self.error_func(f'Property {self.path} should be of type string or object, got {type(val)}') + return {"name": "???"} + +class RightsOwner2023_07(Parser): + NAME = "rights-owner-2023-07" + OUTPUT_TYPE = "string" + + @staticmethod + def get_dependencies() -> list[Path]: + return [Path("credits", "authors"), Path("source", 0), Path("license")] + + def _parse(self, val): + if type(val) is str: + return val + + if val is None and Path("license").index(self.data) != "public domain": + authors = Path("credits", "authors").index(self.data) + if len(authors) > 0: + return ' and '.join(authors) + source = Path("source").index(self.data) + if len(source) > 0: + return ' and '.join(s["name"] for s in source) + + return None + +class LanguagesParsing(Parser): + NAME = "languages-parsing" + OUTPUT_TYPE = "list" + + def _parse(self, val): + if type(val) is str: + if val == "all": + return ("copy-from", Path("languages")) + else: + self.error_func(f'Property {self.path} should be a list or the string "all", got "{val}"') + + if type(val) is not list: + self.error_func(f'Property {self.path} should be a list or the string "all", got {val}') + + if len(val) == 0: + self.error_func(f'Property {self.path} needs to contain at least one language') + return ("copy-from", Path("languages")) + + return val + +parser_classes = [ + DefaultObjectParser, + DefaultListParser, + DefaultStringParser, + DefaultIntParser, + DefaultFloatParser, + DefaultBoolParser, + RightsOwnerLegacy, + LegacyValidation, + SpaceSeparatedStrings, + MinMaxFloatString, + Type2023_07, + Name2023_07, + Credits2023_07, + StringToList, + Source2023_07, + SourceItem2023_07, + RightsOwner2023_07, + LanguagesParsing, +] + +parsers = { p.NAME: p for p in parser_classes } +if len(parser_classes) != len(parsers): + duplicates = [f' - {prop}, {cnt} occurences' for prop, cnt in Counter(c.NAME for c in parser_classes).items() if cnt > 1] + raise NotImplementedError(f"Duplicate name(s) detected in parsers:\n{'\n'.join(duplicates)}") diff --git a/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml b/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml index f6e849c4..ee428d84 100644 --- a/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml +++ b/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml @@ -42,7 +42,10 @@ properties: submit-answer: {} name: type: object - parsing: name-parsing-2023-07 + parsing: name-2023-07 + properties: + en: # en will always exist, if english really doesn't exist, it will be an empty string + type: string match_properties: "[a-z]{2,3}|[a-z]{2}-[A-Z]{2}": type: string @@ -52,33 +55,40 @@ properties: type: string credits: type: object - parsing: credits-parsing-2023-07 + parsing: credits-2023-07 properties: authors: type: list + parsing: string-to-list content: type: string contributors: type: list + parsing: string-to-list content: type: string testers: type: list + parsing: string-to-list content: type: string contributors: type: list + parsing: string-to-list content: type: string translators: type: object + properties: {} match_properties: "[a-z]{2,3}|[a-z]{2}-[A-Z]{2}": type: list + parsing: string-to-list content: type: string packagers: type: list + parsing: string-to-list content: type: string acknowledgements: @@ -87,9 +97,10 @@ properties: type: string source: type: list - parsing: source-parsing-2023-07 + parsing: source-2023-07 content: type: object + parsing: source-item-2023-07 properties: name: type: string @@ -199,6 +210,7 @@ properties: type: bool constants: type: object + properties: {} match_properties: "[a-zA-Z_][a-zA-Z0-9_]*": type: string # In spec, this should be allowed to be int or float as well... This is not supported for this system From bd00cb74a1c7898a35e952469788f52a4a4d9dc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Tue, 25 Mar 2025 02:20:09 +0100 Subject: [PATCH 21/32] Load a minimal config of the new type in a test --- problemtools/config_parser/parser.py | 4 ++++ .../config/2023-07_specification.yaml | 4 +++- .../config/minimal_2023-07.yaml | 4 ++++ .../config_parser_tests/test_valid_config.py | 15 ++++++++++++++- 4 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 problemtools/tests/config_parser_tests/config/minimal_2023-07.yaml diff --git a/problemtools/config_parser/parser.py b/problemtools/config_parser/parser.py index 40bbef6a..7bad9f68 100644 --- a/problemtools/config_parser/parser.py +++ b/problemtools/config_parser/parser.py @@ -544,6 +544,9 @@ class LanguagesParsing(Parser): OUTPUT_TYPE = "list" def _parse(self, val): + if val is None: + return ("copy-from", Path("languages")) + if type(val) is str: if val == "all": return ("copy-from", Path("languages")) @@ -552,6 +555,7 @@ def _parse(self, val): if type(val) is not list: self.error_func(f'Property {self.path} should be a list or the string "all", got {val}') + return ("copy-from", Path("languages")) if len(val) == 0: self.error_func(f'Property {self.path} needs to contain at least one language') diff --git a/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml b/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml index ee428d84..727f74fa 100644 --- a/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml +++ b/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml @@ -125,7 +125,9 @@ properties: parsing: rights-owner-2023-07 embargo_until: type: string - parsing: date + alternatives: + "": {} + "\\d{4}-\\d{2}-\\d{2}(T\\d{2}:\\d{2}:\\d{2}Z)?": {} limits: type: object properties: diff --git a/problemtools/tests/config_parser_tests/config/minimal_2023-07.yaml b/problemtools/tests/config_parser_tests/config/minimal_2023-07.yaml new file mode 100644 index 00000000..3f7d41dd --- /dev/null +++ b/problemtools/tests/config_parser_tests/config/minimal_2023-07.yaml @@ -0,0 +1,4 @@ +problem_format_version: 2023-07 +name: Problem Name +uuid: cool-id +license: public domain \ No newline at end of file diff --git a/problemtools/tests/config_parser_tests/test_valid_config.py b/problemtools/tests/config_parser_tests/test_valid_config.py index a88d73c0..6a59e32a 100644 --- a/problemtools/tests/config_parser_tests/test_valid_config.py +++ b/problemtools/tests/config_parser_tests/test_valid_config.py @@ -27,4 +27,17 @@ def test_basic_config(): def test_legacy_config_empty(): data = construct_metadata('legacy_specification.yaml', 'empty_config.yaml', legacy_injected_data) print(f"warnings: {warnings}") - print(f"errors: {errors}") \ No newline at end of file + print(f"errors: {errors}") + +injected_data_2023_07 = { + "system_default": load_yaml("system_defaults.yaml"), + "languages": ["python", "rust", "uhhh", "c++"] +} + +def test_2023_07_config_minimal(): + data = construct_metadata('2023-07_specification.yaml', 'minimal_2023-07.yaml', injected_data_2023_07) + data.check_config() + + print(f"warnings: {warnings}") + print(f"errors: {errors}") + assert len(errors) == 0 \ No newline at end of file From 02e766ce251671b368c575efb9fd49eb8da32a3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Tue, 25 Mar 2025 13:36:29 +0100 Subject: [PATCH 22/32] Start implementing new config system in verifyproblem --- .../config/2023-07_config_specification.yaml | 218 +++++ .../config/legacy_config_specification.yaml | 170 ++++ problemtools/config/system_default.yaml | 8 + problemtools/verifyproblem.py | 749 +++++------------- 4 files changed, 607 insertions(+), 538 deletions(-) create mode 100644 problemtools/config/2023-07_config_specification.yaml create mode 100644 problemtools/config/legacy_config_specification.yaml create mode 100644 problemtools/config/system_default.yaml diff --git a/problemtools/config/2023-07_config_specification.yaml b/problemtools/config/2023-07_config_specification.yaml new file mode 100644 index 00000000..727f74fa --- /dev/null +++ b/problemtools/config/2023-07_config_specification.yaml @@ -0,0 +1,218 @@ +type: object +required: [problem_format_version,name,uuid] +properties: + problem_format_version: + type: string + default: "2023-07" + alternatives: + "2023-07": {} + type: + type: object + parsing: type-2023-07 + properties: + pass-fail: + type: bool + default: True + alternatives: + True: + require: + type/scoring: False + False: {} + scoring: + type: bool + multi-pass: + type: bool + interactive: + type: bool + submit-answer: + type: bool + alternatives: + True: + require: + type/interactive: False + type/multi-pass: False + False: {} + content: + type: string + alternatives: + pass-fail: {} + scoring: {} + multi-pass: {} + interactive: {} + submit-answer: {} + name: + type: object + parsing: name-2023-07 + properties: + en: # en will always exist, if english really doesn't exist, it will be an empty string + type: string + match_properties: + "[a-z]{2,3}|[a-z]{2}-[A-Z]{2}": + type: string + uuid: + type: string + version: + type: string + credits: + type: object + parsing: credits-2023-07 + properties: + authors: + type: list + parsing: string-to-list + content: + type: string + contributors: + type: list + parsing: string-to-list + content: + type: string + testers: + type: list + parsing: string-to-list + content: + type: string + contributors: + type: list + parsing: string-to-list + content: + type: string + translators: + type: object + properties: {} + match_properties: + "[a-z]{2,3}|[a-z]{2}-[A-Z]{2}": + type: list + parsing: string-to-list + content: + type: string + packagers: + type: list + parsing: string-to-list + content: + type: string + acknowledgements: + type: list + content: + type: string + source: + type: list + parsing: source-2023-07 + content: + type: object + parsing: source-item-2023-07 + properties: + name: + type: string + url: + type: string + license: + type: string + default: unknown + alternatives: + unknown: + warn: License is unknown + require: + rights_owner: ".+" + cc0|cc by|cc by-sa|educational|permission: + require: + rights_owner: ".+" + public domain: + forbid: + rights_owner: ".+" + rights_owner: + type: string + parsing: rights-owner-2023-07 + embargo_until: + type: string + alternatives: + "": {} + "\\d{4}-\\d{2}-\\d{2}(T\\d{2}:\\d{2}:\\d{2}Z)?": {} + limits: + type: object + properties: + time_multipliers: + type: object + properties: + ac_to_time_limit: + type: float + default: 2.0 + alternatives: + "1.0:": {} + time_limit_to_tle: + type: float + default: 1.5 + alternatives: + "1.0:": {} + time_limit: + type: float + default: 0.0 # Hmm, time limit needs to be calculated by default + alternatives: + "0.0:": {} + time_resolution: + type: float + default: 1.0 + alternatives: + "0.0:": {} + memory: + type: int + default: copy-from:system_default/memory + alternatives: + "1:": {} + output: + type: int + default: copy-from:system_default/output + alternatives: + "1:": {} + code: + type: int + default: copy-from:system_default/code + alternatives: + "1:": {} + compilation_time: + type: int + default: copy-from:system_default/compilation_time + alternatives: + "1:": {} + compilation_memory: + type: int + default: copy-from:system_default/compilation_memory + alternatives: + "1:": {} + validation_time: + type: int + default: copy-from:system_default/validation_time + alternatives: + "1:": {} + validation_memory: + type: int + default: copy-from:system_default/validation_memory + alternatives: + "1:": {} + validation_output: + type: int + default: copy-from:system_default/validation_output + alternatives: + "1:": {} + validation_passes: + type: int + default: 2 + alternatives: + "2:": {} + keywords: + type: list + content: + type: string + languages: + type: list + parsing: languages-parsing + content: + type: string + allow_file_writing: + type: bool + constants: + type: object + properties: {} + match_properties: + "[a-zA-Z_][a-zA-Z0-9_]*": + type: string # In spec, this should be allowed to be int or float as well... This is not supported for this system diff --git a/problemtools/config/legacy_config_specification.yaml b/problemtools/config/legacy_config_specification.yaml new file mode 100644 index 00000000..a4432be4 --- /dev/null +++ b/problemtools/config/legacy_config_specification.yaml @@ -0,0 +1,170 @@ +type: object +required: [] +properties: + problem_format_version: + type: string + default: legacy + alternatives: + legacy: {} + type: + type: string + default: pass-fail + alternatives: + pass-fail: + forbid: + grading/on_reject: grade + scoring: {} + name: + type: string + uuid: + type: string + author: + type: string + source: + type: string + source_url: + type: string + alternatives: + ".+": + require: + source: ".+" + "": {} + license: + type: string + default: unknown + alternatives: + unknown: + warn: License is unknown + require: + rights_owner: ".+" + cc0|cc by|cc by-sa|educational|permission: + require: + rights_owner: ".+" + public domain: + forbid: + rights_owner: ".+" + rights_owner: + type: string + parsing: rights-owner-legacy + limits: + type: object + properties: + time_multiplier: + type: float + default: 5 + alternatives: + "0.0:": {} + time_safety_margin: + type: float + default: 2 + memory: + type: int + default: copy-from:system_default/memory + alternatives: + "1:": {} + output: + type: int + default: copy-from:system_default/output + alternatives: + "1:": {} + code: + type: int + default: copy-from:system_default/code + alternatives: + "1:": {} + compilation_time: + type: int + default: copy-from:system_default/compilation_time + alternatives: + "1:": {} + compilation_memory: + type: int + default: copy-from:system_default/compilation_memory + alternatives: + "1:": {} + validation_time: + type: int + default: copy-from:system_default/validation_time + alternatives: + "1:": {} + validation_memory: + type: int + default: copy-from:system_default/validation_memory + alternatives: + "1:": {} + validation_output: + type: int + default: copy-from:system_default/validation_output + alternatives: + "1:": {} + validation: + type: object + parsing: legacy-validation + properties: + type: + type: string + alternatives: + default: + forbid: + validation/interactive: true + validation/score: true + custom: {} + interactive: + type: bool + score: + type: bool + validator_flags: + type: string + keywords: + type: list + parsing: space-separated-strings + content: + type: string + grading: + type: object + properties: + show_test_data_groups: + type: bool + alternatives: + True: + forbid: + type: pass-fail + False: {} + on_reject: + type: string + default: worst_error + flags: + - deprecated + alternatives: + first_error: {} + worst_error: {} + grade: {} + objective: + type: string + default: max + alternatives: + min: {} + max: {} + accept_score: + type: float # Should actually be type string, add custom parsing? + default: 1.0 + flags: + - deprecated + reject_score: + type: float # Should actually be type string, add custom parsing? + default: 0.0 + flags: + - deprecated + range: + type: object + parsing: min-max-float-string + flags: + - deprecated + properties: + min: + type: float + default: -.inf + max: + type: float + default: .inf + diff --git a/problemtools/config/system_default.yaml b/problemtools/config/system_default.yaml new file mode 100644 index 00000000..2083f01e --- /dev/null +++ b/problemtools/config/system_default.yaml @@ -0,0 +1,8 @@ +memory: 2048 +output: 8 +code: 128 +compilation_time: 60 +compilation_memory: 2048 +validation_time: 60 +validation_memory: 2048 +validation_output: 8 \ No newline at end of file diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index 31bb6205..64cff13d 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -32,6 +32,7 @@ from . import config from . import languages from . import run +from . import config_parser from abc import ABC from typing import Any, Callable, ClassVar, Literal, Pattern, Match, ParamSpec, Type, TypeVar @@ -413,13 +414,19 @@ def __init__(self, problem: Problem, aspect_name: str, datadir: str|None=None, p if self.config is None: self.config = {} + if 'range' in self.config: # Convert range in config to reasonable format + try: + a, b = map(float, self.config['range'].split()) + self.config['range'] = {'min': a, 'max': b} + except Exception: + self.error(f"Invalid format '{self.config['range']}' for range: must be exactly two floats") + # For non-root groups, missing properties are inherited from the parent group if parent: for field, parent_value in parent.config.items(): if not field in self.config: self.config[field] = parent_value - # TODO: Decide if these should stay # Some deprecated properties are inherited from problem config during a transition period problem_grading = problem.get(ProblemConfig)['grading'] for key in ['accept_score', 'reject_score', 'range']: @@ -493,10 +500,8 @@ def has_custom_groups(self) -> bool: def get_score_range(self) -> tuple[float, float]: try: - score_range = self.config['range'] - min_score, max_score = list(map(float, score_range.split())) - return (min_score, max_score) - except: + return (self.config['range']['min'], self.config['range']['max']) + except Exception: return (float('-inf'), float('inf')) @@ -535,15 +540,18 @@ def check(self, context: Context) -> bool: if self._problem.get(ProblemTestCases)['is_scoring']: # Check grading - try: - score_range = self.config['range'] - min_score, max_score = list(map(float, score_range.split())) - if min_score > max_score: - self.error(f"Invalid score range '{score_range}': minimum score cannot be greater than maximum score") - except VerifyError: - raise - except Exception: - self.error(f"Invalid format '{score_range}' for range: must be exactly two floats") + min_score, max_score = self.get_score_range() + if min_score > max_score: + self.error(f"Invalid score range '{min_score} {max_score}': minimum score cannot be greater than maximum score") + #try: Should never be a problem anymore + # score_range = self.config['range'] + # min_score, max_score = list(map(float, score_range.split())) + # if min_score > max_score: + # self.error(f"Invalid score range '{score_range}': minimum score cannot be greater than maximum score") + #except VerifyError: + # raise + #except Exception: + # self.error(f"Invalid format '{score_range}' for range: must be exactly two floats") if self._parent is None: seen_secret = False @@ -781,547 +789,212 @@ class ProblemStatementLegacy(ProblemStatement): class ProblemStatement2023_07(ProblemStatement): EXTENSIONS = ['md', 'tex'] -config_format = { - "type":"object", - "required": [], - "config_fields": ["problem_format_version", "type", "name", "uuid", "author", "source", "source_url", "license", "rights_owner", "limits", "validation", "validator_flags", "keywords", "grading"], - "properties": { - "problem_format_version": { - "type": "string", - "default": "legacy", - "alternatives": {"legacy":{}} - }, - "type": { - "type": "string", - "default": "pass-fail", - "alternatives": { - "pass-fail": { - "forbid": ["grading/on_reject:grade"] - }, - "scoring": {}, - }, - }, - "name": { - "type": "string", - "default": "", - }, - "uuid": { - "type": "string", - "default": "", - }, - "author": { - "type": "string", - "default": "", - }, - "source": { - "type": "string", - "default": "", - }, - "source_url": { - "type": "string", - "default": "", - "alternatives": { - ".*": { - "require": ["source"] - } - }, - }, - "license": { - "type": "string", - "default": "unknown", - "alternatives": { - "unknown": { - "warn": "License is unknown", - "require": ["rights_owner"] - }, - "cc0|cc by|cc by-sa|educational|permission": { - "require": ["rights_owner"] - }, - "public domain": { - "forbid": ["rights_owner"] - } - } - }, - "rights_owner": { - "type": "string", - "default": "copy-from:author", - }, - "limits": { - "type": "object", - "required": [], - "properties": { - "time_multiplier": { - "type": "float", - "default": 5.0 - }, - "time_safety_margin": { - "type": "float", - "default": 2.0 - }, - "memory": { - "type": "int", - "default": "copy-from:system_default/memory" - }, - "output": { - "type": "int", - "default": "copy-from:system_default/output" - }, - "code": { - "type": "int", - "default": "copy-from:system_default/code" - }, - "compilation_time": { - "type": "float", - "default": "copy-from:system_default/compilation_time" - }, - "compilation_memory": { - "type": "int", - "default": "copy-from:system_default/compilation_memory" - }, - "validation_time": { - "type": "float", - "default": "copy-from:system_default/validation_time" - }, - "validation_memory": { - "type": "int", - "default": "copy-from:system_default/validation_memory" - }, - "validation_output": { - "type": "int", - "default": "copy-from:system_default/validation_output" - } - } - }, - "validation": { - "type": "object", - "parsing": "legacy-validation", - "required": [], - "properties": { - "type": { - "type": "string", - "default": "default", - "alternatives": { - "default": { - "forbid": ["validation/interactive:true", "validation/score:true"] - }, - "custom": {} - } - }, - "interactive": { - "type": "bool", - "default": False - }, - "score": { - "type": "bool", - "default": False - } - } - }, - "validator_flags": { - "type": "string", - "default": "", - }, - "keywords": { - "type": "list", - "parsing": "space-separated-strings", - "content": { - "type": "string", - }, - "default": [], - } - } -} - -class BaseValidator: - def __init__(self, layout: dict, aspect: ProblemAspect, path: str = ""): - self.layout = layout - self.aspect = aspect - self.path = path - - def verify(self, value): - """ - Verifies the value: - - Applies defaults - - Converts types - - Logs warnings/errors if needed - """ - raise NotImplementedError("Subclasses must implement verify") - - def check(self, value, get_path_func): - """ - Performs extra-checks (like forbid/require logic) - get_path_func can be used to fetch other values by path. - """ - raise NotImplementedError("Subclasses must implement check") - - -class StringValidator(BaseValidator): - def __init__(self, layout: dict, aspect: ProblemAspect, path: str = ""): - super().__init__(layout, aspect, path) - alternatives = self.layout.get("alternatives") - if alternatives: - self.patterns = {alt: re.compile('^' + alt + '$') for alt in alternatives} - else: - self.patterns = None - - def verify(self, value): - if value is None: - value = self.layout.get("default", "") - if not isinstance(value, str): - self.aspect.warning(f'Property {self.path} was expected to be of type string') - value = str(value) - if self.patterns: - if not any(pattern.match(value) for pattern in self.patterns.values()): - self.aspect.error(f"Property {self.path} is {value} but must match one of {list(self.patterns.keys())}") - value = self.layout.get("default", "") - return value - - def check(self, value, get_path_func): - if not self.patterns: - return - match = next((key for key, pattern in self.patterns.items() if pattern.match(value)), None) - checks = self.layout["alternatives"][match] - for forbidden in checks.get("forbid", []): - other_path, expected = forbidden.split(':') - if get_path_func(other_path) == expected: - self.aspect.error(f"Property {self.path} has value {value} which forbids property {other_path} to have value {expected}") - for required in checks.get("required", []): - if not get_path_func(required): #TODO: This is not a good way to handle this check I think - self.aspect.error(f"Property {self.path} has value {value} which requires property {required}") - if "warn" in checks: - self.aspect.warning(checks["warn"]) - -class ObjectValidator(BaseValidator): - def verify(self, value): - if value is None: - return self.layout.get("default", {}) - if self.layout.get("parsing") == "legacy-validation": - if not isinstance(value, str): - self.aspect.error(f"Property {self.path} was expected to be a string") - return {} - elements = value.split() - value = { - "type": elements[0], - "interactive": "interactive" in elements[1:], - "score": "score" in elements[1:] - } - if not isinstance(value, dict): - self.aspect.error(f"property {self.path} was expected to be a dictionary") - return {} - for prop in self.layout.get("required", []): - if prop not in value: - self.aspect.error(f"Missing required property: {self.path}/{prop}") - for prop in value.keys(): - if prop not in self.layout["properties"]: - self.aspect.warning(f"Unknown property: {self.path}/{prop}") - return value - - def check(self, value, get_path_func): - pass - -class ListValidator(BaseValidator): - def verify(self, value): - if value is None: - return self.layout.get("default", []) - if self.layout.get("parsing") == "space-separated-strings": - if not isinstance(value, str): - self.aspect.error(f"Property {self.path} was expected to be a string") - return {} - value = value.split() - if not isinstance(value, list): - self.aspect.error(f"property {self.path} was expected to be a list") - return {} - return value - - def check(self, value, get_path_func): - pass - -class FloatValidator(BaseValidator): - def verify(self, value): - if value is None: - return self.layout.get("default", 0.0) - if not isinstance(value, float): - try: - value = float(value) - except Exception: - self.aspect.error(f"Property {self.path} was expected to be a float") - value = self.layout.get("default", 0.0) - return value - - def check(self, value, get_path_func): - pass - -class IntValidator(BaseValidator): - def verify(self, value): - if value is None: - return self.layout.get("default", 0) - if not isinstance(value, int): - try: - value = int(value) - self.aspect.warning(f"Property {self.path} should be of type integer, interpreting as {value}") - except Exception: - self.aspect.error(f"Property {self.path} was expected to be an integer") - value = self.layout.get("default", 0) - return value - - def check(self, value, get_path_func): - pass - -def get_validator(layout: dict, aspect: ProblemAspect, path: str) -> BaseValidator: - type_map = { - "string": StringValidator, - "int": IntValidator, - "float": FloatValidator, - "object": ObjectValidator, - } - typ = layout.get("type") - if typ not in type_map: - raise NotImplementedError(f"Unrecognized type: {typ}") - return type_map[typ](layout, aspect, path) - -class ConfigLoader: - def __init__(self, config_specification: dict) -> None: - self.spec = config_specification - - -class NewProblemConfig(ProblemPart): - PART_NAME = 'config2' - - LAYOUT = {} - - def setup(self): - self.configfile = os.path.join(self.problem.probdir, 'problem.yaml') - self.data = {} - if os.path.isfile(self.configfile): - try: - with open(self.configfile) as f: - self.data = yaml.safe_load(f) or {} - except Exception as e: - self.error(str(e)) - self.data = self.verify_data(self.LAYOUT, self.data) - self.inject_data() - def resolve_all_copy_from(data, path): - if isinstance(data, dict): - for k, v in data.items(): - resolve_all_copy_from(v, f'{path}/{k}') - else: - self.resolve_copy_from(path) - resolve_all_copy_from(self.data, "") - - def inject_data(self): - pass - - def resolve_copy_from(self, path: str, resolving=set()): - if path in resolving: - raise NotImplementedError(f"Circular copy-from dependency between properties {list(resolving)}") - val = self.get_path(path) - if not isinstance(val, str) or not val.startswith("copy-from:"): - return - copy_path = val.split(':')[1] - resolving.add(path) - self.resolve_copy_from(copy_path) - resolving.remove(path) - cur = self.data - parts = path.split('/') - for d in parts[:-1]: - cur = cur[d] - cur[parts[-1]] = self.get_path(copy_path) - - - def get_path(self, *path: list[str|int]) -> Any: - cur = self.data - for part in path: - if isinstance(part, int): - if not isinstance(cur, list) or len(cur) >= part: - return None - cur = cur[part] - elif isinstance(cur, dict): - cur = cur.get(part) - return None - return cur - - def verify_data(self, layout, data, path="") -> Any: - validator = get_validator(layout, self, path) - verified = validator.verify(data) - if layout["type"] == "object": - for prop, spec in layout.get("properties", {}).items(): - verified[prop] = self.verify_data(spec, verified.get(prop), f"{path}/{prop}") - elif layout["type"] == "list": - for i in range(len(verified)): - verified[i] = self.verify_data(layout["content"], verified[i], f"{path}[{i}]") - return verified - - def check_data(self, layout, data, path=""): - validator = get_validator(layout, self, path) - validator.check(data, self.get_path) - if layout["type"] == "object": - for prop, spec in layout.get("properties", {}).items(): - self.check_data(spec, data.get(prop), f"{path}/{prop}") - elif layout["type"] == "list": - for i in range(len(verified)): - self.verify_check(layout["content"], verified[i], f"{path}[{i}]") - - def check(self, context: Context) -> bool: - self.check_data(self.LAYOUT, self.data) - return True - -class NewProblemConfigLegacy(NewProblemConfig): - LAYOUT = config_format - class ProblemConfig(ProblemPart): PART_NAME = 'config' - - @staticmethod - def setup_dependencies(): - return {ProblemStatement} - - _MANDATORY_CONFIG = ['name'] - _OPTIONAL_CONFIG = config.load_config('problem.yaml') - _VALID_LICENSES = ['unknown', 'public domain', 'cc0', 'cc by', 'cc by-sa', 'educational', 'permission'] - - def setup(self): + SPECIFICATION_FILE_NAME: str|None = None + + def get_injected_data(self) -> dict: + return { + "languages": self.problem.language_config, + "system_default": config.load_config('system_default.yaml') + } + + def setup(self) -> dict: + if self.SPECIFICATION_FILE_NAME is None: + raise NotImplementedError("Subclasses of ProblemConfig need to define SPECIFICATION_FILE_NAME") self.debug(' Loading problem config') + spec = config.load_config(self.SPECIFICATION_FILE_NAME) self.configfile = os.path.join(self.problem.probdir, 'problem.yaml') - self._data = {} - + data = {} if os.path.isfile(self.configfile): try: with open(self.configfile) as f: - self._data = yaml.safe_load(f) - # Loading empty yaml yields None, for no apparent reason... - if self._data is None: - self._data = {} + data = yaml.safe_load(f) or {} except Exception as e: self.error(str(e)) - - # Add config items from problem statement e.g. name - self._data.update(self.problem.get(ProblemStatement)) - - # Populate rights_owner unless license is public domain - if 'rights_owner' not in self._data and self._data.get('license') != 'public domain': - if 'author' in self._data: - self._data['rights_owner'] = self._data['author'] - elif 'source' in self._data: - self._data['rights_owner'] = self._data['source'] - - if 'license' in self._data: - self._data['license'] = self._data['license'].lower() - - # Ugly backwards compatibility hack - if 'name' in self._data and not isinstance(self._data['name'], dict): - self._data['name'] = {'': self._data['name']} - - self._origdata = copy.deepcopy(self._data) - - for field, default in copy.deepcopy(ProblemConfig._OPTIONAL_CONFIG).items(): - if not field in self._data: - self._data[field] = default - elif isinstance(default, dict) and isinstance(self._data[field], dict): - self._data[field] = dict(list(default.items()) + list(self._data[field].items())) - - val = self._data['validation'].split() - self._data['validation-type'] = val[0] - self._data['validation-params'] = val[1:] - - self._data['grading']['custom_scoring'] = False - for param in self._data['validation-params']: - if param == 'score': - self._data['grading']['custom_scoring'] = True - elif param == 'interactive': - pass - - return self._data - - + self.metadata = config_parser.Metadata(spec) + self.metadata.set_error_callback(self.error) + self.metadata.set_warning_callback(self.warning) + self.metadata.load_config(data, self.get_injected_data()) + return self.metadata.data + def __str__(self) -> str: return 'problem configuration' - + def check(self, context: Context) -> bool: if self._check_res is not None: return self._check_res self._check_res = True - + if not os.path.isfile(self.configfile): self.error(f"No config file {self.configfile} found") - - for field in ProblemConfig._MANDATORY_CONFIG: - if not field in self._data: - self.error(f"Mandatory field '{field}' not provided") - - for field, value in self._origdata.items(): - if field not in ProblemConfig._OPTIONAL_CONFIG.keys() and field not in ProblemConfig._MANDATORY_CONFIG: - self.warning(f"Unknown field '{field}' provided in problem.yaml") - - for field, value in self._data.items(): - if value is None: - self.error(f"Field '{field}' provided in problem.yaml but is empty") - self._data[field] = ProblemConfig._OPTIONAL_CONFIG.get(field, '') - - # Check type - if not self._data['type'] in ['pass-fail', 'scoring']: - self.error(f"Invalid value '{self._data['type']}' for type") - - # Check rights_owner - if self._data['license'] == 'public domain': - if self._data['rights_owner'].strip() != '': - self.error('Can not have a rights_owner for a problem in public domain') - elif self._data['license'] != 'unknown': - if self._data['rights_owner'].strip() == '': - self.error('No author, source or rights_owner provided') - - # Check source_url - if (self._data['source_url'].strip() != '' and - self._data['source'].strip() == ''): - self.error('Can not provide source_url without also providing source') - - # Check license - if not self._data['license'] in ProblemConfig._VALID_LICENSES: - self.error(f"Invalid value for license: {self._data['license']}.\n Valid licenses are {ProblemConfig._VALID_LICENSES}") - elif self._data['license'] == 'unknown': - self.warning("License is 'unknown'") - - if self._data['grading']['show_test_data_groups'] not in [True, False]: - self.error(f"Invalid value for grading.show_test_data_groups: {self._data['grading']['show_test_data_groups']}") - elif self._data['grading']['show_test_data_groups'] and self._data['type'] == 'pass-fail': - self.error("Showing test data groups is only supported for scoring problems, this is a pass-fail problem") - if self._data['type'] != 'pass-fail' and self.problem.get(ProblemTestCases)['root_group'].has_custom_groups() and 'show_test_data_groups' not in self._origdata.get('grading', {}): - self.warning("Problem has custom testcase groups, but does not specify a value for grading.show_test_data_groups; defaulting to false") - - if 'on_reject' in self._data['grading']: - if self._data['type'] == 'pass-fail' and self._data['grading']['on_reject'] == 'grade': - self.error(f"Invalid on_reject policy '{self._data['grading']['on_reject']}' for problem type '{self._data['type']}'") - if not self._data['grading']['on_reject'] in ['first_error', 'worst_error', 'grade']: - self.error(f"Invalid value '{self._data['grading']['on_reject']}' for on_reject policy") - - if self._data['grading']['objective'] not in ['min', 'max']: - self.error(f"Invalid value '{self._data['grading']['objective']}' for objective") - - for deprecated_grading_key in ['accept_score', 'reject_score', 'range', 'on_reject']: - if deprecated_grading_key in self._data['grading']: - self.warning(f"Grading key '{deprecated_grading_key}' is deprecated in problem.yaml, use '{deprecated_grading_key}' in testdata.yaml instead") - - if not self._data['validation-type'] in ['default', 'custom']: - self.error(f"Invalid value '{self._data['validation']}' for validation, first word must be 'default' or 'custom'") - - if self._data['validation-type'] == 'default' and len(self._data['validation-params']) > 0: - self.error(f"Invalid value '{self._data['validation']}' for validation") - - if self._data['validation-type'] == 'custom': - for param in self._data['validation-params']: - if param not in['score', 'interactive']: - self.error(f"Invalid parameter '{param}' for custom validation") - - # Check limits - if not isinstance(self._data['limits'], dict): - self.error('Limits key in problem.yaml must specify a dict') - self._data['limits'] = ProblemConfig._OPTIONAL_CONFIG['limits'] - - # Some things not yet implemented - if self._data['libraries'] != '': - self.error("Libraries not yet supported") - + + self.metadata.check_config() + return self._check_res +# TODO: add additional checks +class ProblemConfigLegacy(ProblemConfig): + SPECIFICATION_FILE_NAME = 'legacy_config_specification.yaml' + +# TODO: add additional checks +class ProblemConfig2023_07(ProblemConfig): + SPECIFICATION_FILE_NAME = '2023-07_config_specification.yaml' + +#class ProblemConfig(ProblemPart): +# PART_NAME = 'config' +# +# @staticmethod +# def setup_dependencies(): +# return {ProblemStatement} +# +# _MANDATORY_CONFIG = ['name'] +# _OPTIONAL_CONFIG = config.load_config('problem.yaml') +# _VALID_LICENSES = ['unknown', 'public domain', 'cc0', 'cc by', 'cc by-sa', 'educational', 'permission'] +# +# def setup(self): +# self.debug(' Loading problem config') +# self.configfile = os.path.join(self.problem.probdir, 'problem.yaml') +# self._data = {} +# +# if os.path.isfile(self.configfile): +# try: +# with open(self.configfile) as f: +# self._data = yaml.safe_load(f) +# # Loading empty yaml yields None, for no apparent reason... +# if self._data is None: +# self._data = {} +# except Exception as e: +# self.error(str(e)) +# +# # Add config items from problem statement e.g. name +# self._data.update(self.problem.get(ProblemStatement)) +# +# # Populate rights_owner unless license is public domain +# if 'rights_owner' not in self._data and self._data.get('license') != 'public domain': +# if 'author' in self._data: +# self._data['rights_owner'] = self._data['author'] +# elif 'source' in self._data: +# self._data['rights_owner'] = self._data['source'] +# +# if 'license' in self._data: +# self._data['license'] = self._data['license'].lower() +# +# # Ugly backwards compatibility hack +# if 'name' in self._data and not isinstance(self._data['name'], dict): +# self._data['name'] = {'': self._data['name']} +# +# self._origdata = copy.deepcopy(self._data) +# +# for field, default in copy.deepcopy(ProblemConfig._OPTIONAL_CONFIG).items(): +# if not field in self._data: +# self._data[field] = default +# elif isinstance(default, dict) and isinstance(self._data[field], dict): +# self._data[field] = dict(list(default.items()) + list(self._data[field].items())) +# +# val = self._data['validation'].split() +# self._data['validation-type'] = val[0] +# self._data['validation-params'] = val[1:] +# +# self._data['grading']['custom_scoring'] = False +# for param in self._data['validation-params']: +# if param == 'score': +# self._data['grading']['custom_scoring'] = True +# elif param == 'interactive': +# pass +# +# return self._data +# +# +# def __str__(self) -> str: +# return 'problem configuration' +# +# def check(self, context: Context) -> bool: +# if self._check_res is not None: +# return self._check_res +# self._check_res = True +# +# if not os.path.isfile(self.configfile): +# self.error(f"No config file {self.configfile} found") +# +# for field in ProblemConfig._MANDATORY_CONFIG: +# if not field in self._data: +# self.error(f"Mandatory field '{field}' not provided") +# +# for field, value in self._origdata.items(): +# if field not in ProblemConfig._OPTIONAL_CONFIG.keys() and field not in ProblemConfig._MANDATORY_CONFIG: +# self.warning(f"Unknown field '{field}' provided in problem.yaml") +# +# for field, value in self._data.items(): +# if value is None: +# self.error(f"Field '{field}' provided in problem.yaml but is empty") +# self._data[field] = ProblemConfig._OPTIONAL_CONFIG.get(field, '') +# +# # Check type +# if not self._data['type'] in ['pass-fail', 'scoring']: +# self.error(f"Invalid value '{self._data['type']}' for type") +# +# # Check rights_owner +# if self._data['license'] == 'public domain': +# if self._data['rights_owner'].strip() != '': +# self.error('Can not have a rights_owner for a problem in public domain') +# elif self._data['license'] != 'unknown': +# if self._data['rights_owner'].strip() == '': +# self.error('No author, source or rights_owner provided') +# +# # Check source_url +# if (self._data['source_url'].strip() != '' and +# self._data['source'].strip() == ''): +# self.error('Can not provide source_url without also providing source') +# +# # Check license +# if not self._data['license'] in ProblemConfig._VALID_LICENSES: +# self.error(f"Invalid value for license: {self._data['license']}.\n Valid licenses are {ProblemConfig._VALID_LICENSES}") +# elif self._data['license'] == 'unknown': +# self.warning("License is 'unknown'") +# +# if self._data['grading']['show_test_data_groups'] not in [True, False]: +# self.error(f"Invalid value for grading.show_test_data_groups: {self._data['grading']['show_test_data_groups']}") +# elif self._data['grading']['show_test_data_groups'] and self._data['type'] == 'pass-fail': +# self.error("Showing test data groups is only supported for scoring problems, this is a pass-fail problem") +# if self._data['type'] != 'pass-fail' and self.problem.get(ProblemTestCases)['root_group'].has_custom_groups() and 'show_test_data_groups' not in self._origdata.get('grading', {}): +# self.warning("Problem has custom testcase groups, but does not specify a value for grading.show_test_data_groups; defaulting to false") +# +# if 'on_reject' in self._data['grading']: +# if self._data['type'] == 'pass-fail' and self._data['grading']['on_reject'] == 'grade': +# self.error(f"Invalid on_reject policy '{self._data['grading']['on_reject']}' for problem type '{self._data['type']}'") +# if not self._data['grading']['on_reject'] in ['first_error', 'worst_error', 'grade']: +# self.error(f"Invalid value '{self._data['grading']['on_reject']}' for on_reject policy") +# +# if self._data['grading']['objective'] not in ['min', 'max']: +# self.error(f"Invalid value '{self._data['grading']['objective']}' for objective") +# +# for deprecated_grading_key in ['accept_score', 'reject_score', 'range', 'on_reject']: +# if deprecated_grading_key in self._data['grading']: +# self.warning(f"Grading key '{deprecated_grading_key}' is deprecated in problem.yaml, use '{deprecated_grading_key}' in testdata.yaml instead") +# +# if not self._data['validation-type'] in ['default', 'custom']: +# self.error(f"Invalid value '{self._data['validation']}' for validation, first word must be 'default' or 'custom'") +# +# if self._data['validation-type'] == 'default' and len(self._data['validation-params']) > 0: +# self.error(f"Invalid value '{self._data['validation']}' for validation") +# +# if self._data['validation-type'] == 'custom': +# for param in self._data['validation-params']: +# if param not in['score', 'interactive']: +# self.error(f"Invalid parameter '{param}' for custom validation") +# +# # Check limits +# if not isinstance(self._data['limits'], dict): +# self.error('Limits key in problem.yaml must specify a dict') +# self._data['limits'] = ProblemConfig._OPTIONAL_CONFIG['limits'] +# +# # Some things not yet implemented +# if self._data['libraries'] != '': +# self.error("Libraries not yet supported") +# +# return self._check_res + class ProblemTestCases(ProblemPart): PART_NAME = 'testdata' @@ -1334,7 +1007,7 @@ def setup(self): self.testcase_by_infile = {} return { 'root_group': TestCaseGroup(self.problem, self.PART_NAME), - 'is_interactive': 'interactive' in self.problem.get(ProblemConfig)['validation-params'], + 'is_interactive': self.problem.get(ProblemConfig)['validation']['interactive'], 'is_scoring': self.problem.get(ProblemConfig)['type'] == 'scoring' } @@ -2116,7 +1789,7 @@ def check(self, context: Context) -> bool: PROBLEM_FORMATS: dict[str, dict[str, list[Type[ProblemPart]]]] = { 'legacy': { - 'config': [ProblemConfig], + 'config': [ProblemConfigLegacy], 'statement': [ProblemStatementLegacy, Attachments], 'validators': [InputValidators, OutputValidators], 'graders': [Graders], From 4f628b23a29acf8898aea12b5c76b45c65b608dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Tue, 25 Mar 2025 14:41:42 +0100 Subject: [PATCH 23/32] Can now verify (basic) legacy problem again, yippie! --- .../config/legacy_config_specification.yaml | 4 ++ problemtools/verifyproblem.py | 57 +++++++++---------- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/problemtools/config/legacy_config_specification.yaml b/problemtools/config/legacy_config_specification.yaml index a4432be4..25024a00 100644 --- a/problemtools/config/legacy_config_specification.yaml +++ b/problemtools/config/legacy_config_specification.yaml @@ -103,6 +103,7 @@ properties: properties: type: type: string + default: default alternatives: default: forbid: @@ -155,6 +156,9 @@ properties: default: 0.0 flags: - deprecated + custom_scoring: + type: bool + default: copy-from:validation/score range: type: object parsing: min-max-float-string diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index 64cff13d..56c0ddfd 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -37,6 +37,8 @@ from abc import ABC from typing import Any, Callable, ClassVar, Literal, Pattern, Match, ParamSpec, Type, TypeVar +from copy import deepcopy + log = logging.getLogger(__name__) Verdict = Literal['AC', 'TLE', 'OLE', 'MLE', 'RTE', 'WA', 'PAC', 'JE'] @@ -414,23 +416,17 @@ def __init__(self, problem: Problem, aspect_name: str, datadir: str|None=None, p if self.config is None: self.config = {} - if 'range' in self.config: # Convert range in config to reasonable format - try: - a, b = map(float, self.config['range'].split()) - self.config['range'] = {'min': a, 'max': b} - except Exception: - self.error(f"Invalid format '{self.config['range']}' for range: must be exactly two floats") - # For non-root groups, missing properties are inherited from the parent group if parent: for field, parent_value in parent.config.items(): if not field in self.config: self.config[field] = parent_value + # TODO: Decide if these should stay # Some deprecated properties are inherited from problem config during a transition period - problem_grading = problem.get(ProblemConfig)['grading'] + problem_grading = problem.get(ProblemConfig)['original_data']['grading'] for key in ['accept_score', 'reject_score', 'range']: - if key in problem.get(ProblemConfig)['grading']: + if key in problem.get(ProblemConfig)['original_data']['grading']: self.config[key] = problem_grading[key] problem_on_reject = problem_grading.get('on_reject') @@ -500,7 +496,9 @@ def has_custom_groups(self) -> bool: def get_score_range(self) -> tuple[float, float]: try: - return (self.config['range']['min'], self.config['range']['max']) + score_range = self.config['range'] + min_score, max_score = list(map(float, score_range.split())) + return (min_score, max_score) except Exception: return (float('-inf'), float('inf')) @@ -540,18 +538,15 @@ def check(self, context: Context) -> bool: if self._problem.get(ProblemTestCases)['is_scoring']: # Check grading - min_score, max_score = self.get_score_range() - if min_score > max_score: - self.error(f"Invalid score range '{min_score} {max_score}': minimum score cannot be greater than maximum score") - #try: Should never be a problem anymore - # score_range = self.config['range'] - # min_score, max_score = list(map(float, score_range.split())) - # if min_score > max_score: - # self.error(f"Invalid score range '{score_range}': minimum score cannot be greater than maximum score") - #except VerifyError: - # raise - #except Exception: - # self.error(f"Invalid format '{score_range}' for range: must be exactly two floats") + try: + score_range = self.config['range'] + min_score, max_score = list(map(float, score_range.split())) + if min_score > max_score: + self.error(f"Invalid score range '{score_range}': minimum score cannot be greater than maximum score") + except VerifyError: + raise + except Exception: + self.error(f"Invalid format '{score_range}' for range: must be exactly two floats") if self._parent is None: seen_secret = False @@ -796,7 +791,8 @@ class ProblemConfig(ProblemPart): def get_injected_data(self) -> dict: return { "languages": self.problem.language_config, - "system_default": config.load_config('system_default.yaml') + "system_default": config.load_config('system_default.yaml'), + "original_data": deepcopy(self.data) # TODO: TestCaseGroup does not like the new system } def setup(self) -> dict: @@ -805,17 +801,18 @@ def setup(self) -> dict: self.debug(' Loading problem config') spec = config.load_config(self.SPECIFICATION_FILE_NAME) self.configfile = os.path.join(self.problem.probdir, 'problem.yaml') - data = {} + self.data = {} if os.path.isfile(self.configfile): try: with open(self.configfile) as f: - data = yaml.safe_load(f) or {} + self.data = yaml.safe_load(f) or {} except Exception as e: self.error(str(e)) self.metadata = config_parser.Metadata(spec) self.metadata.set_error_callback(self.error) self.metadata.set_warning_callback(self.warning) - self.metadata.load_config(data, self.get_injected_data()) + self.metadata.load_config(self.data, self.get_injected_data()) + #print(yaml.dump(self.metadata.data)) return self.metadata.data def __str__(self) -> str: @@ -1328,12 +1325,12 @@ def check(self, context: Context) -> bool: if isinstance(v, run.SourceCode) and v.language.lang_id not in recommended_output_validator_languages: self.warning('output validator language %s is not recommended' % v.language.name) - if self.problem.get(ProblemConfig)['validation'] == 'default' and self._validators: + if self.problem.get(ProblemConfig)['validation']['type'] == 'default' and self._validators: self.error('There are validator programs but problem.yaml has validation = "default"') - elif self.problem.get(ProblemConfig)['validation'] != 'default' and not self._validators: + elif self.problem.get(ProblemConfig)['validation']['type'] != 'default' and not self._validators: self.error('problem.yaml specifies custom validator but no validator programs found') - if self.problem.get(ProblemConfig)['validation'] == 'default' and self._default_validator is None: + if self.problem.get(ProblemConfig)['validation']['type'] == 'default' and self._default_validator is None: self.error('Unable to locate default validator') for val in self._validators[:]: @@ -1423,7 +1420,7 @@ def _parse_validator_results(self, val, status: int, feedbackdir, testcase: Test def _actual_validators(self) -> list: vals = self._validators - if self.problem.get(ProblemConfig)['validation'] == 'default': + if self.problem.get(ProblemConfig)['validation']['type'] == 'default': vals = [self._default_validator] return [val for val in vals if val is not None] From 314a814d56d251e4333c6da008da405cd83ae40f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Tue, 25 Mar 2025 17:27:06 +0100 Subject: [PATCH 24/32] Bug fixes Co-authored-by: Zazmuz --- problemtools/verifyproblem.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index 56c0ddfd..ee08234d 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -426,7 +426,7 @@ def __init__(self, problem: Problem, aspect_name: str, datadir: str|None=None, p # Some deprecated properties are inherited from problem config during a transition period problem_grading = problem.get(ProblemConfig)['original_data']['grading'] for key in ['accept_score', 'reject_score', 'range']: - if key in problem.get(ProblemConfig)['original_data']['grading']: + if key in problem_grading: self.config[key] = problem_grading[key] problem_on_reject = problem_grading.get('on_reject') @@ -789,10 +789,13 @@ class ProblemConfig(ProblemPart): SPECIFICATION_FILE_NAME: str|None = None def get_injected_data(self) -> dict: + orig_data = deepcopy(self.data) # Ugly hack to make TestCaseGroup work like before + if 'grading' not in orig_data: + orig_data['grading'] = {'objective': 'max', 'show_test_data_groups': False} return { "languages": self.problem.language_config, "system_default": config.load_config('system_default.yaml'), - "original_data": deepcopy(self.data) # TODO: TestCaseGroup does not like the new system + "original_data": orig_data } def setup(self) -> dict: @@ -1830,6 +1833,8 @@ def getProblemPart(self, part: Type[_ProblemPartT]) -> _ProblemPartT: return self._classes[part.PART_NAME] # type: ignore def __enter__(self) -> Problem: + ProblemAspect.errors = 0 + ProblemAspect.warnings = 0 self.tmpdir = tempfile.mkdtemp(prefix=f'verify-{self.shortname}-') if not os.path.isdir(self.probdir): self.error(f"Problem directory '{self.probdir}' not found") @@ -1874,8 +1879,6 @@ def check(self, args: argparse.Namespace) -> tuple[int, int]: if self.shortname is None: return 1, 0 - ProblemAspect.errors = 0 - ProblemAspect.warnings = 0 ProblemAspect.bail_on_error = args.bail_on_error ProblemAspect.consider_warnings_errors = args.werror From 1a710a0c7b916fbb0d09384cd96c74540fca4f8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Tue, 25 Mar 2025 19:40:52 +0100 Subject: [PATCH 25/32] Some additional changes Co-authored-by: Zazmuz --- problemtools/config_parser/notes.txt | 188 --------------------------- problemtools/verifyproblem.py | 164 +---------------------- 2 files changed, 7 insertions(+), 345 deletions(-) delete mode 100644 problemtools/config_parser/notes.txt diff --git a/problemtools/config_parser/notes.txt b/problemtools/config_parser/notes.txt deleted file mode 100644 index 425f4bac..00000000 --- a/problemtools/config_parser/notes.txt +++ /dev/null @@ -1,188 +0,0 @@ -1. Load problem-config from problem.yaml -2. Override config-field 'name' with languages found in statements - -I guess previously the 'name' field did not matter, but now it has to match the problemstatements - -Is being checked: - [x] Config file exists - [c] Problem has at least one name, either through the name-field or through \problemname{X} from statements - [x] All fields are either optional or mandatory, where optional fields are taken from a standard-configuration in some standard-paths - [x] All fields have a value - [x] Field 'type' is either 'pass-fail' or 'scoring' - [x] Check that problem has either a license or a rights-owner, and that problem does not have a rights-owner if it is in public domain - [x] Check that field 'source' exists if 'source_url' exists - [x] Check that the license is valid - [x] Warn if license is unknown - [x] Check that 'grading/show_test_data_groups' is either True or False - [x] If above is true, check that problem is not pass-fail - [c] Warn if problem has custom test data groups, that is subgroups beyond sample and secret, and does not specify 'grading/show_test_data_groups' - [x] Check that 'grading/on_reject' is not 'grade' if problem-type is 'pass-fail' - [x] Check that 'grading/on_reject' is either 'first_error', 'worst_error' or 'grade' if it exists - [x] Check that 'grading/objective' is either 'min' or 'max' - [x] Warn if a deprecated grading-keys are found under 'grading' ('accept_score', 'reject_score', 'range', 'on_reject') - [x] Check that 'validation-type' is either 'default' or 'custom' - [x] Check that there are no 'validation-params' if 'validation-type' is 'default' - [x] Check that 'validation-params' are either 'grade' or 'interactive' if 'validation-type' is 'custom' - [x] Check that 'limits' is a dictionary - [r] Check that key 'libraries' does not exist (???) Remove this - - -"format": { - "argument": { - "alternatives": [dict|value], - "is_mandatory": true|false, - "fallback": Any, - "forbids": { - "field": [values] - }, - "get_from_config": true|false, # Allow stuff to come from other parts of the problem - } -} - -"legacy": { - "type":"object", - "required": [], - "config_fields": ["problem_format_version", "type", "name", "uuid", "author", "source", "source_url", "license", "rights_owner", "limits", "validation", "validator_flags", "keywords", "grading"], - "properties": { - "problem_format_version": { - "type": "string", - "default": "legacy", - "alternatives": {"legacy":{}} - }, - "type": { - "type": "string", - "default": "pass-fail", - "alternatives": { - "pass-fail": { - "forbid": "grading/on_reject:grade" - }, - "scoring": {}, - }, - }, - "name": { - "type": "string", - "default": "", - }, - "uuid": { - "type": "string", - "default": "", - }, - "author": { - "type": "string", - "default": "", - }, - "source": { - "type": "string", - "default": "", - }, - "source_url": { - "type": "string", - "default": "", - "alternatives": { - ".*": { - "require": ["source"] - } - }, - }, - "license": { - "type": "string", - "default": "unknown", - "alternatives": { - "unknown": { - "warn": "License is unknown", - "require": ["rights_owner"] - } - "cc0|cc by|cc by-sa|educational|permission": { - "require": ["rights_owner"] - }, - "public domain": { - "forbid": ["rights_owner"] - } - } - }, - "rights_owner": { - "type": "string", - "default": "copy-from:author", - "alternatives": {".*":[]} - }, - "limits": { - "type": "object", - "required": [], - "properties": { - "time_multiplier": { - "type": "float", - "default": 5.0 - }, - "time_safety_margin": { - "type": "float", - "default": 2.0 - }, - "memory": { - "type": "int", - "default": "copy-from:system_default/memory" - }, - "output": { - "type": "int", - "default": "copy-from:system_default/output" - }, - "code": { - "type": "int", - "default": "copy-from:system_default/code" - }, - "compilation_time": { - "type": "float", - "default": "copy-from:system_default/compilation_time" - }, - "compilation_memory": { - "type": "int", - "default": "copy-from:system_default/compilation_memory" - }, - "validation_time": { - "type": "float", - "default": "copy-from:system_default/validation_time" - }, - "validation_memory": { - "type": "int", - "default": "copy-from:system_default/validation_memory" - }, - "validation_output": { - "type": "int", - "default": "copy-from:system_default/validation_output" - } - } - }, - "validation": { - "type": "specially-parsed-object", - "required": [], - "properties": { - "type": { - "type": "string", - "default": "default", - "alternatives": { - "default": { - "forbid": ["validation/interactive:true", "validation/score:true"] - }, - "custom": {} - } - }, - "interactive": { - "type": "bool", - "default": false - }, - "score": { - "type": "bool", - "default": false - } - } - }, - "validator_flags": { - "type": "string", - "default": "", - "alternatives": { ".*": {} } - }, - "keywords": { - "type": "space-separated-strings", - "default": [], - } - } -} diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index ee08234d..05334471 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -815,7 +815,6 @@ def setup(self) -> dict: self.metadata.set_error_callback(self.error) self.metadata.set_warning_callback(self.warning) self.metadata.load_config(self.data, self.get_injected_data()) - #print(yaml.dump(self.metadata.data)) return self.metadata.data def __str__(self) -> str: @@ -833,168 +832,19 @@ def check(self, context: Context) -> bool: return self._check_res -# TODO: add additional checks class ProblemConfigLegacy(ProblemConfig): SPECIFICATION_FILE_NAME = 'legacy_config_specification.yaml' -# TODO: add additional checks + def check(self, context): + super().check(context) + if (self.problem.get(ProblemConfig)['type'] != 'pass-fail' and + self.problem.get(ProblemTestCases)['root_group'].has_custom_groups() and + 'show_test_data_groups' not in self.get(ProblemConfig)['original_data'].get('grading', {})): + self.warning("Problem has custom testcase groups, but does not specify a value for grading.show_test_data_groups; defaulting to false") + class ProblemConfig2023_07(ProblemConfig): SPECIFICATION_FILE_NAME = '2023-07_config_specification.yaml' -#class ProblemConfig(ProblemPart): -# PART_NAME = 'config' -# -# @staticmethod -# def setup_dependencies(): -# return {ProblemStatement} -# -# _MANDATORY_CONFIG = ['name'] -# _OPTIONAL_CONFIG = config.load_config('problem.yaml') -# _VALID_LICENSES = ['unknown', 'public domain', 'cc0', 'cc by', 'cc by-sa', 'educational', 'permission'] -# -# def setup(self): -# self.debug(' Loading problem config') -# self.configfile = os.path.join(self.problem.probdir, 'problem.yaml') -# self._data = {} -# -# if os.path.isfile(self.configfile): -# try: -# with open(self.configfile) as f: -# self._data = yaml.safe_load(f) -# # Loading empty yaml yields None, for no apparent reason... -# if self._data is None: -# self._data = {} -# except Exception as e: -# self.error(str(e)) -# -# # Add config items from problem statement e.g. name -# self._data.update(self.problem.get(ProblemStatement)) -# -# # Populate rights_owner unless license is public domain -# if 'rights_owner' not in self._data and self._data.get('license') != 'public domain': -# if 'author' in self._data: -# self._data['rights_owner'] = self._data['author'] -# elif 'source' in self._data: -# self._data['rights_owner'] = self._data['source'] -# -# if 'license' in self._data: -# self._data['license'] = self._data['license'].lower() -# -# # Ugly backwards compatibility hack -# if 'name' in self._data and not isinstance(self._data['name'], dict): -# self._data['name'] = {'': self._data['name']} -# -# self._origdata = copy.deepcopy(self._data) -# -# for field, default in copy.deepcopy(ProblemConfig._OPTIONAL_CONFIG).items(): -# if not field in self._data: -# self._data[field] = default -# elif isinstance(default, dict) and isinstance(self._data[field], dict): -# self._data[field] = dict(list(default.items()) + list(self._data[field].items())) -# -# val = self._data['validation'].split() -# self._data['validation-type'] = val[0] -# self._data['validation-params'] = val[1:] -# -# self._data['grading']['custom_scoring'] = False -# for param in self._data['validation-params']: -# if param == 'score': -# self._data['grading']['custom_scoring'] = True -# elif param == 'interactive': -# pass -# -# return self._data -# -# -# def __str__(self) -> str: -# return 'problem configuration' -# -# def check(self, context: Context) -> bool: -# if self._check_res is not None: -# return self._check_res -# self._check_res = True -# -# if not os.path.isfile(self.configfile): -# self.error(f"No config file {self.configfile} found") -# -# for field in ProblemConfig._MANDATORY_CONFIG: -# if not field in self._data: -# self.error(f"Mandatory field '{field}' not provided") -# -# for field, value in self._origdata.items(): -# if field not in ProblemConfig._OPTIONAL_CONFIG.keys() and field not in ProblemConfig._MANDATORY_CONFIG: -# self.warning(f"Unknown field '{field}' provided in problem.yaml") -# -# for field, value in self._data.items(): -# if value is None: -# self.error(f"Field '{field}' provided in problem.yaml but is empty") -# self._data[field] = ProblemConfig._OPTIONAL_CONFIG.get(field, '') -# -# # Check type -# if not self._data['type'] in ['pass-fail', 'scoring']: -# self.error(f"Invalid value '{self._data['type']}' for type") -# -# # Check rights_owner -# if self._data['license'] == 'public domain': -# if self._data['rights_owner'].strip() != '': -# self.error('Can not have a rights_owner for a problem in public domain') -# elif self._data['license'] != 'unknown': -# if self._data['rights_owner'].strip() == '': -# self.error('No author, source or rights_owner provided') -# -# # Check source_url -# if (self._data['source_url'].strip() != '' and -# self._data['source'].strip() == ''): -# self.error('Can not provide source_url without also providing source') -# -# # Check license -# if not self._data['license'] in ProblemConfig._VALID_LICENSES: -# self.error(f"Invalid value for license: {self._data['license']}.\n Valid licenses are {ProblemConfig._VALID_LICENSES}") -# elif self._data['license'] == 'unknown': -# self.warning("License is 'unknown'") -# -# if self._data['grading']['show_test_data_groups'] not in [True, False]: -# self.error(f"Invalid value for grading.show_test_data_groups: {self._data['grading']['show_test_data_groups']}") -# elif self._data['grading']['show_test_data_groups'] and self._data['type'] == 'pass-fail': -# self.error("Showing test data groups is only supported for scoring problems, this is a pass-fail problem") -# if self._data['type'] != 'pass-fail' and self.problem.get(ProblemTestCases)['root_group'].has_custom_groups() and 'show_test_data_groups' not in self._origdata.get('grading', {}): -# self.warning("Problem has custom testcase groups, but does not specify a value for grading.show_test_data_groups; defaulting to false") -# -# if 'on_reject' in self._data['grading']: -# if self._data['type'] == 'pass-fail' and self._data['grading']['on_reject'] == 'grade': -# self.error(f"Invalid on_reject policy '{self._data['grading']['on_reject']}' for problem type '{self._data['type']}'") -# if not self._data['grading']['on_reject'] in ['first_error', 'worst_error', 'grade']: -# self.error(f"Invalid value '{self._data['grading']['on_reject']}' for on_reject policy") -# -# if self._data['grading']['objective'] not in ['min', 'max']: -# self.error(f"Invalid value '{self._data['grading']['objective']}' for objective") -# -# for deprecated_grading_key in ['accept_score', 'reject_score', 'range', 'on_reject']: -# if deprecated_grading_key in self._data['grading']: -# self.warning(f"Grading key '{deprecated_grading_key}' is deprecated in problem.yaml, use '{deprecated_grading_key}' in testdata.yaml instead") -# -# if not self._data['validation-type'] in ['default', 'custom']: -# self.error(f"Invalid value '{self._data['validation']}' for validation, first word must be 'default' or 'custom'") -# -# if self._data['validation-type'] == 'default' and len(self._data['validation-params']) > 0: -# self.error(f"Invalid value '{self._data['validation']}' for validation") -# -# if self._data['validation-type'] == 'custom': -# for param in self._data['validation-params']: -# if param not in['score', 'interactive']: -# self.error(f"Invalid parameter '{param}' for custom validation") -# -# # Check limits -# if not isinstance(self._data['limits'], dict): -# self.error('Limits key in problem.yaml must specify a dict') -# self._data['limits'] = ProblemConfig._OPTIONAL_CONFIG['limits'] -# -# # Some things not yet implemented -# if self._data['libraries'] != '': -# self.error("Libraries not yet supported") -# -# return self._check_res - class ProblemTestCases(ProblemPart): PART_NAME = 'testdata' From a76595e18306319954f3b4f0218858ec4bd65d7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Tue, 25 Mar 2025 20:08:54 +0100 Subject: [PATCH 26/32] Fix bugs Co-authored-by: Zazmuz --- problemtools/tests/test_verify_hello.py | 1 - problemtools/verifyproblem.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/problemtools/tests/test_verify_hello.py b/problemtools/tests/test_verify_hello.py index 49ff53aa..acdc638c 100644 --- a/problemtools/tests/test_verify_hello.py +++ b/problemtools/tests/test_verify_hello.py @@ -1,7 +1,6 @@ import pathlib import problemtools.verifyproblem as verify - def test_load_hello(): directory = pathlib.Path(__file__).parent / "hello" string = str(directory.resolve()) diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index 05334471..2472b692 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -841,6 +841,7 @@ def check(self, context): self.problem.get(ProblemTestCases)['root_group'].has_custom_groups() and 'show_test_data_groups' not in self.get(ProblemConfig)['original_data'].get('grading', {})): self.warning("Problem has custom testcase groups, but does not specify a value for grading.show_test_data_groups; defaulting to false") + return self._check_res class ProblemConfig2023_07(ProblemConfig): SPECIFICATION_FILE_NAME = '2023-07_config_specification.yaml' From b22b5a9f6e8911198d552835eb468e909e24993f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Tue, 25 Mar 2025 20:36:00 +0100 Subject: [PATCH 27/32] Make ints be interpreted as floats --- problemtools/config_parser/parser.py | 4 ++-- problemtools/verifyproblem.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/problemtools/config_parser/parser.py b/problemtools/config_parser/parser.py index 7bad9f68..f82efbef 100644 --- a/problemtools/config_parser/parser.py +++ b/problemtools/config_parser/parser.py @@ -263,7 +263,7 @@ def _parse(self, val): if val is None: return None - if not isinstance(val, float): + if not isinstance(val, (int, float)): try: cast = float(val) self.warning_func( @@ -274,7 +274,7 @@ def _parse(self, val): self.error_func(f"Expected a float, got {val} ({self.path})") return None - return val + return float(val) class DefaultBoolParser(Parser): diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index 2472b692..df5dbddd 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -839,7 +839,7 @@ def check(self, context): super().check(context) if (self.problem.get(ProblemConfig)['type'] != 'pass-fail' and self.problem.get(ProblemTestCases)['root_group'].has_custom_groups() and - 'show_test_data_groups' not in self.get(ProblemConfig)['original_data'].get('grading', {})): + 'show_test_data_groups' not in self.problem.get(ProblemConfig)['original_data'].get('grading', {})): self.warning("Problem has custom testcase groups, but does not specify a value for grading.show_test_data_groups; defaulting to false") return self._check_res From 57b9dc5b14a53f649aa2ea3bb66e7054b8b8e7e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Tue, 25 Mar 2025 21:07:32 +0100 Subject: [PATCH 28/32] Clarify new feature in spec.md --- problemtools/config_parser/spec.md | 1 + 1 file changed, 1 insertion(+) diff --git a/problemtools/config_parser/spec.md b/problemtools/config_parser/spec.md index 55e0999f..4879fd4c 100644 --- a/problemtools/config_parser/spec.md +++ b/problemtools/config_parser/spec.md @@ -36,6 +36,7 @@ Standard default: {} Has the following properties: - "required": List of all strictly required properties, will give an error if one is missing - "properties": Dictionary of property-names to their types +- "match_properties": Dictionary of regex strings to their types. A step will be performed to add all properties that match here to the normal properties. ## list Standard default: [] From ae185ec38cf0208225d8429c5c5d96a104c9f6fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Tue, 25 Mar 2025 21:40:58 +0100 Subject: [PATCH 29/32] Add new problem config to new format version --- problemtools/verifyproblem.py | 1 + 1 file changed, 1 insertion(+) diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index df5dbddd..4c8fe61b 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -1648,6 +1648,7 @@ def check(self, context: Context) -> bool: 'submissions': [Submissions], }, '2023-07': { # TODO: Add all the parts + 'config': [ProblemConfig2023_07], 'statement': [ProblemStatement2023_07, Attachments], } } From 481133c56ca7e9ae4df6cbca4547e53f2b2b174f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20S=C3=B6derbergh?= Date: Tue, 25 Mar 2025 23:18:52 +0100 Subject: [PATCH 30/32] small fix to hopefully pass CI tests --- problemtools/config_parser/parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/problemtools/config_parser/parser.py b/problemtools/config_parser/parser.py index f82efbef..5dffa7f3 100644 --- a/problemtools/config_parser/parser.py +++ b/problemtools/config_parser/parser.py @@ -586,5 +586,6 @@ def _parse(self, val): parsers = { p.NAME: p for p in parser_classes } if len(parser_classes) != len(parsers): - duplicates = [f' - {prop}, {cnt} occurences' for prop, cnt in Counter(c.NAME for c in parser_classes).items() if cnt > 1] - raise NotImplementedError(f"Duplicate name(s) detected in parsers:\n{'\n'.join(duplicates)}") + duplicates = '\n'.join(f' - {prop}, {cnt} occurences' for prop, cnt in Counter(c.NAME for c in parser_classes).items() if cnt > 1) + + raise NotImplementedError(f"Duplicate name(s) detected in parsers:\n{duplicates}") From 5555449f59c1af797ba4bb24a1995c6c803aab05 Mon Sep 17 00:00:00 2001 From: Rasmus Hulthe Date: Wed, 26 Mar 2025 00:41:38 +0100 Subject: [PATCH 31/32] Fixed mypy static typing errors for CI tests Co-authored-by: square-cylinder --- problemtools/ProblemPlasTeX/__init__.py | 1 + .../config/2023-07_config_specification.yaml | 5 --- problemtools/config_parser/__init__.py | 24 +++++++++--- problemtools/config_parser/config_path.py | 6 +-- problemtools/config_parser/matcher.py | 3 ++ problemtools/config_parser/parser.py | 37 +++++++++---------- .../config/2023-07_specification.yaml | 5 --- problemtools/verifyproblem.py | 6 ++- 8 files changed, 48 insertions(+), 39 deletions(-) diff --git a/problemtools/ProblemPlasTeX/__init__.py b/problemtools/ProblemPlasTeX/__init__.py index f7fb3d53..eb5ef76c 100644 --- a/problemtools/ProblemPlasTeX/__init__.py +++ b/problemtools/ProblemPlasTeX/__init__.py @@ -95,6 +95,7 @@ def render(self, document): if templatepath == None: raise Exception('Could not find templates needed for conversion to HTML') + assert templatepath is not None # Ugly but unfortunately PlasTeX is quite inflexible when it comes to # configuring where to search for template files os.environ['ProblemRendererTEMPLATES'] = templatepath diff --git a/problemtools/config/2023-07_config_specification.yaml b/problemtools/config/2023-07_config_specification.yaml index 727f74fa..9a3b8b60 100644 --- a/problemtools/config/2023-07_config_specification.yaml +++ b/problemtools/config/2023-07_config_specification.yaml @@ -72,11 +72,6 @@ properties: parsing: string-to-list content: type: string - contributors: - type: list - parsing: string-to-list - content: - type: string translators: type: object properties: {} diff --git a/problemtools/config_parser/__init__.py b/problemtools/config_parser/__init__.py index fabcb2f7..0c1ab774 100644 --- a/problemtools/config_parser/__init__.py +++ b/problemtools/config_parser/__init__.py @@ -33,15 +33,16 @@ def set_error_callback(self, fun: Callable): def set_warning_callback(self, fun: Callable): self.warning_func = fun + @staticmethod def invert_graph(dependency_graph: dict[Path, list[Path]]) -> dict[Path, list[Path]]: - # TODO: catch key errors better - depends_on_graph = {k: [] for k in dependency_graph.keys()} + depends_on_graph: dict[Path, list[Path]] = {k: [] for k in dependency_graph.keys()} for dependant, dependencies in dependency_graph.items(): for dependency in dependencies: depends_on_graph[dependency].append(dependant) return depends_on_graph - def topo_sort(dependency_graph: dict[Path, list[Path]]) -> Generator[Path]: + @staticmethod + def topo_sort(dependency_graph: dict[Path, list[Path]]) -> Generator[Path, None, None]: # Dependancy graph: # Path : [Depends on Path, ...] @@ -91,8 +92,11 @@ def get_path_dependencies(self) -> dict[Path, list[Path]]: return graph def get_copy_dependencies(self) -> dict[Path, list[Path]]: + if self.data is None: + raise SpecificationError("Data is not loaded yet") + stack = [(Path(), Path(child)) for child in self.data.keys()] - graph = {Path(): []} + graph: dict[Path, list[Path]] = {Path(): []} while stack: parent, path = stack.pop() @@ -109,6 +113,7 @@ def get_copy_dependencies(self) -> dict[Path, list[Path]]: return graph + @staticmethod def resolve_match_properties(spec: dict, data: dict): if spec.get("type") == "list": if type(data) is list: @@ -132,7 +137,8 @@ def resolve_match_properties(spec: dict, data: dict): for prop_name, prop in spec.get("properties", {}).items(): if prop_name in data: Metadata.resolve_match_properties(prop, data[prop_name]) - + + @staticmethod def remove_match_properties(spec: dict): if spec.get("type") == "list": Metadata.remove_match_properties(spec["content"]) @@ -144,9 +150,13 @@ def remove_match_properties(spec: dict): Metadata.remove_match_properties(prop) def load_config(self, config: dict, injected_data: dict) -> None: - self.data: dict = DefaultObjectParser( + self.data = DefaultObjectParser( config, self.spec, Path(), self.warning_func, self.error_func ).parse() + + if self.data is None: + raise SpecificationError("DefaultObjectParser returned None") + Metadata.resolve_match_properties(self.spec, self.data) Metadata.remove_match_properties(self.spec) for cfg_path in Metadata.topo_sort(self.get_path_dependencies()): @@ -180,6 +190,8 @@ def load_config(self, config: dict, injected_data: dict) -> None: full_path.set(self.data, copy_val) def do_alternative_checks(self, checks: dict, prop_path: Path) -> None: + if self.data is None: + raise SpecificationError("Data is not loaded yet") if "warn" in checks: self.warning_func(checks["warn"]) for check_result, check_name in ((False, "forbid"), (True, "require")): diff --git a/problemtools/config_parser/config_path.py b/problemtools/config_parser/config_path.py index 8db39453..ec098240 100644 --- a/problemtools/config_parser/config_path.py +++ b/problemtools/config_parser/config_path.py @@ -30,7 +30,7 @@ def parse_part(part: str): @staticmethod def combine(*parts: str | int | Path) -> Path: """Fuse multiple paths together into one path""" - res = [] + res: list[str | int] = [] for part in parts: if isinstance(part, int): res.append(part) @@ -106,7 +106,7 @@ def path_is_not_copyfrom(path: Path) -> bool: state = "object-properties" elif part == "content": state = "base" - new_out = [] + new_out: list[Path] = [] for path in out: val = path.index(data) or [] if is_copyfrom(val): # skip copied @@ -133,7 +133,7 @@ def last_name(self) -> int | str: return self.path[-1] def __str__(self) -> str: - strings = [] + strings: list[str] = [] for part in self.path: if isinstance(part, int): strings[-1] += f"[{part}]" diff --git a/problemtools/config_parser/matcher.py b/problemtools/config_parser/matcher.py index bf5e7c52..df62bce7 100644 --- a/problemtools/config_parser/matcher.py +++ b/problemtools/config_parser/matcher.py @@ -38,11 +38,14 @@ def __str__(self) -> str: class IntMatch(AlternativeMatch): def __init__(self, matchstr: str | int): + self.start: int | None = None + self.end: int | None = None if type(matchstr) not in (str, int): raise SpecificationError(f"Int match needs argument to be of type string or int. Got {type(matchstr)}") if type(matchstr) is int: self.start = self.end = matchstr return + assert type(matchstr) is str try: if matchstr.count(":") > 1: raise ValueError diff --git a/problemtools/config_parser/parser.py b/problemtools/config_parser/parser.py index 5dffa7f3..3146d0bd 100644 --- a/problemtools/config_parser/parser.py +++ b/problemtools/config_parser/parser.py @@ -76,6 +76,21 @@ def __init__( else: self.alternatives = None + def check_object_properties(self, val: dict): + spec: dict = self.spec_path.index(self.specification) + required_props = spec.get('required', []) + missing_props = [req for req in required_props if req not in val] + + for req in missing_props: + self.error_func(f"Missing required property: {Path.combine(self.path, req)}") + + known_props = spec.get('properties', []) + unknown_props = [prop for prop in val.keys() if prop not in known_props] + + for prop in unknown_props: + self.warning_func(f"Unknown property: {Path.combine(self.path, prop)}") + val.pop(prop, None) + def parse(self): out = self._parse(self.path.index(self.data, None)) @@ -85,24 +100,7 @@ def parse(self): self.warning_func(f"deprecated property was provided ({self.path})") if self.OUTPUT_TYPE == "object": - required = ( - Path.combine(self.spec_path, "required").index(self.specification, []) - ) - for req in required: - req_path = Path.combine(self.path, req) - if req_path.index(self.data) is None: - self.error_func(f"Missing required property: {req_path}") - - remove = [] - known_props = Path.combine(self.spec_path, "properties").index( - self.specification - ) - for prop in out.keys(): - if prop not in known_props: - self.warning_func(f"Unknown property: {Path.combine(self.path, prop)}") - remove.append(prop) - for r in remove: - del out[r] + self.check_object_properties(out) if self.alternatives is not None: if not any(matcher.check(out) for matcher in self.alternatives): @@ -129,6 +127,7 @@ def parse(self): def _parse(self, val): raise NotImplementedError("Subclasses of Parse need to implement _parse()") + @staticmethod def smallest_edit_dist(a: str, b: list[str]) -> str: def edit_dist(a: str, b: str) -> int: n = len(a) @@ -155,7 +154,7 @@ def edit_dist(a: str, b: str) -> int: return best @staticmethod - def get_parser_type(specification: dict) -> type: + def get_parser_type(specification: dict) -> type["Parser"]: parsing_rule = specification.get("parsing") if parsing_rule is None: typ = specification.get("type") diff --git a/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml b/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml index 727f74fa..9a3b8b60 100644 --- a/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml +++ b/problemtools/tests/config_parser_tests/config/2023-07_specification.yaml @@ -72,11 +72,6 @@ properties: parsing: string-to-list content: type: string - contributors: - type: list - parsing: string-to-list - content: - type: string translators: type: object properties: {} diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index 4c8fe61b..6393d51b 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -804,7 +804,7 @@ def setup(self) -> dict: self.debug(' Loading problem config') spec = config.load_config(self.SPECIFICATION_FILE_NAME) self.configfile = os.path.join(self.problem.probdir, 'problem.yaml') - self.data = {} + self.data: dict[str, Any] = {} if os.path.isfile(self.configfile): try: with open(self.configfile) as f: @@ -815,6 +815,10 @@ def setup(self) -> dict: self.metadata.set_error_callback(self.error) self.metadata.set_warning_callback(self.warning) self.metadata.load_config(self.data, self.get_injected_data()) + + if self.metadata.data is None: + raise VerifyError(f"Failed to load problem config {self.configfile}") + return self.metadata.data def __str__(self) -> str: From ff1003b295f80cb646aaad487959615e6b6928ab Mon Sep 17 00:00:00 2001 From: Rasmus Hulthe Date: Wed, 26 Mar 2025 00:54:06 +0100 Subject: [PATCH 32/32] Made invert_graph more robust Co-authored-by: square-cylinder --- problemtools/config_parser/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/problemtools/config_parser/__init__.py b/problemtools/config_parser/__init__.py index 0c1ab774..841b27de 100644 --- a/problemtools/config_parser/__init__.py +++ b/problemtools/config_parser/__init__.py @@ -9,6 +9,7 @@ from .config_path import Path, PathError from .parser import Parser, DefaultObjectParser from .matcher import AlternativeMatch +from itertools import chain class Metadata: @@ -35,7 +36,8 @@ def set_warning_callback(self, fun: Callable): @staticmethod def invert_graph(dependency_graph: dict[Path, list[Path]]) -> dict[Path, list[Path]]: - depends_on_graph: dict[Path, list[Path]] = {k: [] for k in dependency_graph.keys()} + nodes = set(dependency_graph.keys()).union(chain(*dependency_graph.values())) + depends_on_graph: dict[Path, list[Path]] = {k: [] for k in nodes} for dependant, dependencies in dependency_graph.items(): for dependency in dependencies: depends_on_graph[dependency].append(dependant)