diff --git a/README.md b/README.md index 5f586fa..460c73c 100644 --- a/README.md +++ b/README.md @@ -573,6 +573,13 @@ Arguments: * **Example**: `--disable-str-serializable-types float int BooleanString IsoDatetimeString` * **Optional** +* `--allow-words` - List of words to remove from the keyword blacklist. + By default, field names that clash with Python keywords or built-ins (e.g. `id`, `type`, `hash`, `format`) are + renamed by appending `_` (e.g. `id_`). WARNING: reserved keywords may cause syntax errors if you allow them, so be careful with this option. + * **Format**: `--allow-words WORD [WORD ...]` + * **Example**: `--allow-words id type` — generates `id: int` and `type: str` instead of `id_: int` and `type_: str` + * **Optional** + ### Low level API \- diff --git a/json_to_models/cli.py b/json_to_models/cli.py index 6a4a6e1..391b29a 100644 --- a/json_to_models/cli.py +++ b/json_to_models/cli.py @@ -71,7 +71,8 @@ def __init__(self): self.max_literals: int = -1 # --max-strings-literals self.merge_policy: List[ModelCmp] = [] # --merge self.structure_fn: STRUCTURE_FN_TYPE = None # -s - self.model_generator: Type[GenericModelCodeGenerator] = None # -f & --code-generator + # -f & --code-generator + self.model_generator: Type[GenericModelCodeGenerator] = None self.model_generator_kwargs: Dict[str, Any] = None self.argparser = self._create_argparser() @@ -93,7 +94,8 @@ def parse_args(self, args: List[str] = None): disable_unicode_conversion = namespace.disable_unicode_conversion self.strings_converters = namespace.strings_converters self.max_literals = namespace.max_strings_literals - merge_policy = [m.split("_") if "_" in m else m for m in namespace.merge] + merge_policy = [ + m.split("_") if "_" in m else m for m in namespace.merge] structure = namespace.structure framework = namespace.framework code_generator = namespace.code_generator @@ -101,14 +103,16 @@ def parse_args(self, args: List[str] = None): dict_keys_regex: List[str] = namespace.dict_keys_regex dict_keys_fields: List[str] = namespace.dict_keys_fields preamble: str = namespace.preamble + allow_words: List[str] = namespace.allow_words for name in namespace.disable_str_serializable_types: registry.remove_by_name(name) - self.setup_models_data(namespace.model or (), namespace.list or (), parser) + self.setup_models_data(namespace.model or (), + namespace.list or (), parser) self.validate(merge_policy, framework, code_generator) self.set_args(merge_policy, structure, framework, code_generator, code_generator_kwargs_raw, - dict_keys_regex, dict_keys_fields, disable_unicode_conversion, preamble) + dict_keys_regex, dict_keys_fields, disable_unicode_conversion, preamble, allow_words) def run(self): if self.enable_datetime: @@ -158,14 +162,18 @@ def validate(self, merge_policy, framework, code_generator): for m in merge_policy: if isinstance(m, list): if m[0] not in self.MODEL_CMP_MAPPING: - raise ValueError(f"Invalid merge policy '{m[0]}', choices are {self.MODEL_CMP_MAPPING.keys()}") + raise ValueError( + f"Invalid merge policy '{m[0]}', choices are {self.MODEL_CMP_MAPPING.keys()}") elif m not in self.MODEL_CMP_MAPPING: - raise ValueError(f"Invalid merge policy '{m}', choices are {self.MODEL_CMP_MAPPING.keys()}") + raise ValueError( + f"Invalid merge policy '{m}', choices are {self.MODEL_CMP_MAPPING.keys()}") if framework == 'custom' and code_generator is None: - raise ValueError("You should specify --code-generator to support custom generator") + raise ValueError( + "You should specify --code-generator to support custom generator") elif framework != 'custom' and code_generator is not None: - raise ValueError("--code-generator argument has no effect without '--framework custom' argument") + raise ValueError( + "--code-generator argument has no effect without '--framework custom' argument") def setup_models_data( self, @@ -189,7 +197,8 @@ def setup_models_data( elif len(model_tuple) == 3: model_name, lookup, path_raw = model_tuple else: - raise RuntimeError('`--model` argument should contain exactly 2 or 3 strings') + raise RuntimeError( + '`--model` argument should contain exactly 2 or 3 strings') for real_path in process_path(path_raw): iterator = iter_json_file(parser(real_path), lookup) @@ -208,6 +217,7 @@ def set_args( dict_keys_fields: List[str], disable_unicode_conversion: bool, preamble: str, + allow_words: List[str] = (), ): """ Convert CLI args to python representation and set them to appropriate object attributes @@ -234,7 +244,8 @@ def set_args( self.model_generator_kwargs = dict( post_init_converters=self.strings_converters, convert_unicode=not disable_unicode_conversion, - max_literals=self.max_literals + max_literals=self.max_literals, + allow_words=allow_words, ) if code_generator_kwargs_raw: for item in code_generator_kwargs_raw: @@ -245,7 +256,8 @@ def set_args( name, value = item.split("=", 1) self.model_generator_kwargs[name] = value - self.dict_keys_regex = [re.compile(rf"^{r}$") for r in dict_keys_regex] if dict_keys_regex else () + self.dict_keys_regex = [re.compile( + rf"^{r}$") for r in dict_keys_regex] if dict_keys_regex else () self.dict_keys_fields = dict_keys_fields or () if preamble: preamble = preamble.strip() @@ -392,6 +404,15 @@ def _create_argparser(cls) -> argparse.ArgumentParser: nargs=3, action="append", metavar=("", "", ""), help="DEPRECATED, use --model argument instead" ) + parser.add_argument( + "--allow-words", + metavar="WORD", + default=[], + nargs="+", type=str, + help="List of words to remove from the keyword blacklist.\n" + "Prevents appending '_' to these field names.\n" + "WARNING: reserved keywords may cause syntax errors if you allow them, so be careful with this option." + ) return parser @@ -421,7 +442,8 @@ def json(path: Path) -> Union[dict, list]: @staticmethod def yaml(path: Path) -> Union[dict, list]: if yaml_load is None: - print('Yaml parser is not installed. To parse yaml files ruamel.yaml (or PyYaml) is required.') + print( + 'Yaml parser is not installed. To parse yaml files ruamel.yaml (or PyYaml) is required.') raise ImportError('yaml') with path.open() as fp: return yaml_load(fp) @@ -467,7 +489,8 @@ def iter_json_file(data: Union[dict, list], lookup: str) -> Generator[Union[dict elif isinstance(item, dict): yield item else: - raise TypeError(f'dict or list is expected at {lookup if lookup != "-" else "JSON root"}, not {type(item)}') + raise TypeError( + f'dict or list is expected at {lookup if lookup != "-" else "JSON root"}, not {type(item)}') def process_path(path: str) -> Iterable[Path]: diff --git a/json_to_models/models/base.py b/json_to_models/models/base.py index 6102cf1..bb60e75 100644 --- a/json_to_models/models/base.py +++ b/json_to_models/models/base.py @@ -24,8 +24,10 @@ keywords_set = set(keyword.kwlist) builtins_set = set(__builtins__.keys()) other_common_names_set = {'datetime', 'time', 'date', 'defaultdict', 'schema'} -blacklist_words = frozenset(keywords_set | builtins_set | other_common_names_set) -ones = ['', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine'] +blacklist_words = frozenset( + keywords_set | builtins_set | other_common_names_set) +ones = ['', 'one', 'two', 'three', 'four', + 'five', 'six', 'seven', 'eight', 'nine'] def template(pattern: str, indent: str = INDENT) -> Template: @@ -73,7 +75,8 @@ class {{ name }}{% if bases %}({{ bases }}){% endif %}: STR_CONVERT_DECORATOR = template("convert_strings({{ str_fields }}{%% if kwargs %%}, %s{%% endif %%})" % KWAGRS_TEMPLATE) - FIELD: Template = template("{{name}}: {{type}}{% if body %} = {{ body }}{% endif %}") + FIELD: Template = template( + "{{name}}: {{type}}{% if body %} = {{ body }}{% endif %}") DEFAULT_MAX_LITERALS = 10 default_types_style = { StringLiteral: { @@ -87,29 +90,36 @@ def __init__( max_literals=DEFAULT_MAX_LITERALS, post_init_converters=False, convert_unicode=True, - types_style: Dict[Union['BaseType', Type['BaseType']], dict] = None + types_style: Dict[Union['BaseType', + Type['BaseType']], dict] = None, + allow_words: Iterable[str] = (), ): self.model = model self.post_init_converters = post_init_converters self.convert_unicode = convert_unicode + self.allow_words = frozenset(allow_words) resolved_types_style = copy.deepcopy(self.default_types_style) types_style = types_style or {} for t, style in types_style.items(): resolved_types_style.setdefault(t, {}) resolved_types_style[t].update(style) - resolved_types_style[StringLiteral][StringLiteral.TypeStyle.max_literals] = int(max_literals) + resolved_types_style[StringLiteral][StringLiteral.TypeStyle.max_literals] = int( + max_literals) self.types_style = resolved_types_style - self.model.set_raw_name(self.convert_class_name(self.model.name), generated=self.model.is_name_generated) + self.model.set_raw_name(self.convert_class_name( + self.model.name), generated=self.model.is_name_generated) @cached_method def convert_class_name(self, name): - return prepare_label(name, convert_unicode=self.convert_unicode, to_snake_case=False) + return prepare_label(name, convert_unicode=self.convert_unicode, to_snake_case=False, + allow_words=self.allow_words) @cached_method def convert_field_name(self, name): - return prepare_label(name, convert_unicode=self.convert_unicode, to_snake_case=True) + return prepare_label(name, convert_unicode=self.convert_unicode, to_snake_case=True, + allow_words=self.allow_words) def generate(self, nested_classes: List[str] = None, bases: str = None, extra: str = "") \ -> Tuple[ImportPathList, str]: @@ -142,9 +152,11 @@ def decorators(self) -> Tuple[ImportPathList, List[str]]: if str_fields and decorator_kwargs: imports.extend([ *decorator_imports, - ('json_to_models.models.string_converters', ['convert_strings']), + ('json_to_models.models.string_converters', + ['convert_strings']), ]) - decorators.append(self.STR_CONVERT_DECORATOR.render(str_fields=str_fields, kwargs=decorator_kwargs)) + decorators.append(self.STR_CONVERT_DECORATOR.render( + str_fields=str_fields, kwargs=decorator_kwargs)) return imports, decorators def field_data(self, name: str, meta: MetaData, optional: bool) -> Tuple[ImportPathList, dict]: @@ -156,7 +168,8 @@ def field_data(self, name: str, meta: MetaData, optional: bool) -> Tuple[ImportP :param optional: Is field optional :return: imports, field data """ - imports, typing = metadata_to_typing(meta, types_style=self.types_style) + imports, typing = metadata_to_typing( + meta, types_style=self.types_style) data = { "name": self.convert_field_name(name), @@ -171,13 +184,15 @@ def fields(self) -> Tuple[ImportPathList, List[str]]: :return: imports, list of fields as string """ - required, optional = sort_fields(self.model, unicode_fix=not self.convert_unicode) + required, optional = sort_fields( + self.model, unicode_fix=not self.convert_unicode) imports: ImportPathList = [] strings: List[str] = [] for is_optional, fields in enumerate((required, optional)): fields = self._filter_fields(fields) for field in fields: - field_imports, data = self.field_data(field, self.model.type[field], bool(is_optional)) + field_imports, data = self.field_data( + field, self.model.type[field], bool(is_optional)) imports.extend(field_imports) strings.append(self.FIELD.render(**data)) return imports, strings @@ -256,7 +271,8 @@ def generate_code(structure: ModelsStructureType, class_generator: Type[GenericM """ root, mapping = structure with AbsoluteModelRef.inject(mapping): - imports, classes = _generate_code(root, class_generator, class_generator_kwargs or {}) + imports, classes = _generate_code( + root, class_generator, class_generator_kwargs or {}) imports_str = "" if imports: imports_str = compile_imports(imports) + objects_delimiter @@ -284,7 +300,8 @@ def sort_kwargs(kwargs: dict, ordering: Iterable[Iterable[str]]) -> dict: return sorted_dict -def prepare_label(s: str, convert_unicode: bool, to_snake_case: bool) -> str: +def prepare_label(s: str, convert_unicode: bool, to_snake_case: bool, + allow_words: frozenset = frozenset()) -> str: if convert_unicode: s = unidecode(s) s = re.sub(r"\W", "", s) @@ -293,6 +310,6 @@ def prepare_label(s: str, convert_unicode: bool, to_snake_case: bool) -> str: s = ones[int(s[0])] + "_" + s[1:] if to_snake_case: s = inflection.underscore(s) - if s in blacklist_words: + if s in (blacklist_words - allow_words): s += "_" return s diff --git a/test/test_cli/test_script.py b/test/test_cli/test_script.py index 6e77145..eb6f454 100644 --- a/test/test_cli/test_script.py +++ b/test/test_cli/test_script.py @@ -44,7 +44,8 @@ def tmp_dir_cleanup(): def test_help(): c = f"{executable} -h" - proc = subprocess.Popen(c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + proc = subprocess.Popen( + c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() assert not stderr, stderr assert stdout, stdout @@ -53,11 +54,16 @@ def test_help(): test_commands = [ - pytest.param(f"""{executable} -m Photo items "{test_data_path / 'photos.json'}" """, id="list1"), - pytest.param(f"""{executable} -l Photo items "{test_data_path / 'photos.json'}" """, id="list1_legacy"), - pytest.param(f"""{executable} -m User "{test_data_path / 'users.json'}" """, id="list2"), - pytest.param(f"""{executable} -l User - "{test_data_path / 'users.json'}" """, id="list2_legacy"), - pytest.param(f"""{executable} -m Photos "{test_data_path / 'photos.json'}" """, id="model1"), + pytest.param( + f"""{executable} -m Photo items "{test_data_path / 'photos.json'}" """, id="list1"), + pytest.param( + f"""{executable} -l Photo items "{test_data_path / 'photos.json'}" """, id="list1_legacy"), + pytest.param( + f"""{executable} -m User "{test_data_path / 'users.json'}" """, id="list2"), + pytest.param( + f"""{executable} -l User - "{test_data_path / 'users.json'}" """, id="list2_legacy"), + pytest.param( + f"""{executable} -m Photos "{test_data_path / 'photos.json'}" """, id="model1"), pytest.param(f"""{executable} -m Model items "{test_data_path / 'photos.json'}" \ -m Model - "{test_data_path / 'users.json'}" """, id="duplicate_name"), @@ -104,7 +110,8 @@ def load_model(code, module_name=''): def execute_test(command, output_file: Path = None, output=None) -> str: - proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + proc = subprocess.Popen(command, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = map(bytes.decode, proc.communicate()) if output_file: assert output is None @@ -267,4 +274,78 @@ def test_disable_some_string_types(command): stdout = execute_test(command) assert 'lat: str' in stdout assert 'lng: str' in stdout - assert not any(re.match(r'\s+zipcode:.+int.+', line) for line in stdout.split('\n')), "zipcode should not be parsed as int" + assert not any(re.match(r'\s+zipcode:.+int.+', line) + for line in stdout.split('\n')), "zipcode should not be parsed as int" + + +# -- allow-words tests ------------------------------------------------------- + +allow_words_commands = [ + pytest.param( + f"""{executable} -m User "{test_data_path / 'users.json'}" --allow-words id""", + id="users_allow_id", + ), + pytest.param( + f"""{executable} -m Photo items "{test_data_path / 'photos.json'}" --allow-words id""", + id="photos_allow_id", + ), +] + + +@pytest.mark.parametrize("command", allow_words_commands) +def test_allow_words_prevents_underscore(command): + """--allow-words should remove the trailing '_' from the listed field names.""" + stdout = execute_test(command) + assert "id:" in stdout, "field 'id' should appear unmodified" + assert "id_:" not in stdout, "field 'id_' should not appear when 'id' is allowed" + + +@pytest.mark.parametrize("command", allow_words_commands) +def test_allow_words_pydantic(command): + """--allow-words should work with the pydantic framework.""" + command += " -f pydantic" + stdout = execute_test(command) + assert "(BaseModel):" in stdout + assert "id:" in stdout + assert "id_:" not in stdout + + +@pytest.mark.parametrize("command", allow_words_commands) +def test_allow_words_attrs(command): + """--allow-words should work with the attrs framework.""" + command += " -f attrs" + stdout = execute_test(command) + assert "@attr.s" in stdout + assert "id:" in stdout + assert "id_:" not in stdout + + +@pytest.mark.parametrize("command", allow_words_commands) +def test_allow_words_dataclasses(command): + """--allow-words should work with the dataclasses framework.""" + command += " -f dataclasses" + stdout = execute_test(command) + assert "@dataclass" in stdout + assert "id:" in stdout + assert "id_:" not in stdout + + +def test_default_behavior_appends_underscore(): + """Without --allow-words the blacklisted 'id' field should be renamed to 'id_'.""" + command = f"""{executable} -m User "{test_data_path / 'users.json'}" """ + stdout = execute_test(command) + assert "id_:" in stdout, "field 'id' should be renamed to 'id_' by default" + assert "\n id:" not in stdout, "field 'id' should not appear unmodified without --allow-words" + + +def test_allow_words_multiple(): + """Multiple words can be unblacklisted at once with --allow-words.""" + # Create a temporary JSON file whose keys include several blacklisted names + tmp_json = tmp_path / "allow_words_multi.json" + tmp_json.write_text(json.dumps( + {"id": 1, "type": "user", "hash": "abc", "name": "John"})) + command = f"""{executable} -m Model "{tmp_json}" --allow-words id type hash""" + stdout = execute_test(command) + for word in ("id", "type", "hash"): + assert f"{word}:" in stdout, f"field '{word}' should appear unmodified" + assert f"{word}_:" not in stdout, f"field '{word}_' should not appear when allowed"