Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
- Example: 10.2.1.4 is the 5th version that supports khiops 10.2.1.
- Internals: Changes in *Internals* sections are unlikely to be of interest for data scientists.

## Unreleased

### Added
- (`sklearn`) `n_feature_parts` parameter to the supervised estimators

## 11.0.0.2 - 2026-01-26

## Fixed
Expand Down
42 changes: 34 additions & 8 deletions khiops/sklearn/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def _check_pair_parameters(estimator):
if not isinstance(estimator.n_pairs, int):
raise TypeError(type_error_message("n_pairs", estimator.n_pairs, int))
if estimator.n_pairs < 0:
raise ValueError("'n_pairs' must be positive")
raise ValueError("'n_pairs' must be non-negative")
if estimator.specific_pairs is not None:
if not is_list_like(estimator.specific_pairs):
raise TypeError(
Expand Down Expand Up @@ -955,7 +955,7 @@ def _simplify(
type_error_message("'max_part_numbers' values", value, int)
)
elif value < 0:
raise ValueError("'max_part_numbers' values must be positive")
raise ValueError("'max_part_numbers' values must be non-negative")
# Create temporary directory and tables
computation_dir = self._create_computation_dir("simplify")
output_dir = self._get_output_dir(computation_dir)
Expand Down Expand Up @@ -1195,6 +1195,7 @@ def __init__(
specific_pairs=None,
all_possible_pairs=True,
construction_rules=None,
n_feature_parts=0,
verbose=False,
output_dir=None,
auto_sort=True,
Expand All @@ -1211,6 +1212,7 @@ def __init__(
self.specific_pairs = specific_pairs
self.all_possible_pairs = all_possible_pairs
self.construction_rules = construction_rules
self.n_feature_parts = n_feature_parts
self._original_target_dtype = None
self._predicted_target_meta_data_tag = None
self._khiops_baseline_model_prefix = None
Expand Down Expand Up @@ -1270,17 +1272,17 @@ def _fit_check_params(self, ds, **kwargs):
if not isinstance(self.n_features, int):
raise TypeError(type_error_message("n_features", self.n_features, int))
if self.n_features < 0:
raise ValueError("'n_features' must be positive")
raise ValueError("'n_features' must be non-negative")
if not isinstance(self.n_trees, int):
raise TypeError(type_error_message("n_trees", self.n_trees, int))
if self.n_trees < 0:
raise ValueError("'n_trees' must be positive")
raise ValueError("'n_trees' must be non-negative")
if not isinstance(self.n_text_features, int):
raise TypeError(
type_error_message("n_text_features", self.n_text_features, int)
)
if self.n_text_features < 0:
raise ValueError("'n_text_features' must be positive")
raise ValueError("'n_text_features' must be non-negative")
if not isinstance(self.type_text_features, str):
raise TypeError(
type_error_message("type_text_features", self.type_text_features, str)
Expand All @@ -1300,6 +1302,12 @@ def _fit_check_params(self, ds, **kwargs):
for rule in self.construction_rules:
if not isinstance(rule, str):
raise TypeError(type_error_message(rule, rule, str))
if not isinstance(self.n_feature_parts, int):
raise TypeError(
type_error_message("n_feature_parts", self.n_feature_parts, int)
)
if self.n_feature_parts < 0:
raise ValueError("'n_feature_parts' must be non-negative")

def _fit_train_model(self, ds, computation_dir, **kwargs):
# Train the model with Khiops
Expand Down Expand Up @@ -1384,6 +1392,7 @@ def _fit_prepare_training_function_inputs(self, ds, computation_dir):
kwargs["max_trees"] = kwargs.pop("n_trees")
kwargs["max_text_features"] = kwargs.pop("n_text_features")
kwargs["text_features"] = kwargs.pop("type_text_features")
kwargs["max_parts"] = kwargs.pop("n_feature_parts")

# Add the additional_data_tables parameter
kwargs["additional_data_tables"] = additional_data_tables
Expand Down Expand Up @@ -1513,6 +1522,7 @@ def __init__(
specific_pairs=None,
all_possible_pairs=True,
construction_rules=None,
n_feature_parts=0,
verbose=False,
output_dir=None,
auto_sort=True,
Expand All @@ -1525,6 +1535,7 @@ def __init__(
specific_pairs=specific_pairs,
all_possible_pairs=all_possible_pairs,
construction_rules=construction_rules,
n_feature_parts=n_feature_parts,
verbose=verbose,
output_dir=output_dir,
auto_sort=auto_sort,
Expand Down Expand Up @@ -1624,9 +1635,9 @@ def _fit_check_params(self, ds, **kwargs):

# Check estimator parameters
if self.n_evaluated_features < 0:
raise ValueError("'n_evaluated_features' must be positive")
raise ValueError("'n_evaluated_features' must be non-negative")
if self.n_selected_features < 0:
raise ValueError("'n_selected_features' must be positive")
raise ValueError("'n_selected_features' must be non-negative")


# Note: scikit-learn **requires** inherit first the mixins and then other classes
Expand Down Expand Up @@ -1685,7 +1696,10 @@ class KhiopsClassifier(ClassifierMixin, KhiopsPredictor):
construction_rules : list of str, optional
Allowed rules for the automatic feature construction. If not set, Khiops
uses the multi-table construction rules listed in
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`.
n_feature_parts : int, default 0
Maximum number of variable parts produced by preprocessing methods. If equal
to 0 it is automatically calculated.
group_target_value : bool, default ``False``
Allows grouping of the target values in classification. It can substantially
increase the training time.
Expand Down Expand Up @@ -1744,6 +1758,7 @@ def __init__(
specific_pairs=None,
all_possible_pairs=True,
construction_rules=None,
n_feature_parts=0,
group_target_value=False,
verbose=False,
output_dir=None,
Expand All @@ -1757,6 +1772,7 @@ def __init__(
n_selected_features=n_selected_features,
n_evaluated_features=n_evaluated_features,
construction_rules=construction_rules,
n_feature_parts=n_feature_parts,
verbose=verbose,
output_dir=output_dir,
auto_sort=auto_sort,
Expand Down Expand Up @@ -2086,6 +2102,9 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor):
Allowed rules for the automatic feature construction. If not set, Khiops
uses the multi-table construction rules listed in
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`.
n_feature_parts : int, default 0
Maximum number of variable parts produced by preprocessing methods. If equal
to 0 it is automatically calculated.
verbose : bool, default ``False``
If ``True`` it prints debug information and it does not erase temporary files
when fitting, predicting or transforming.
Expand Down Expand Up @@ -2129,6 +2148,7 @@ def __init__(
n_selected_features=0,
n_evaluated_features=0,
construction_rules=None,
n_feature_parts=0,
verbose=False,
output_dir=None,
auto_sort=True,
Expand All @@ -2141,6 +2161,7 @@ def __init__(
n_selected_features=n_selected_features,
n_evaluated_features=n_evaluated_features,
construction_rules=construction_rules,
n_feature_parts=n_feature_parts,
verbose=verbose,
output_dir=output_dir,
auto_sort=auto_sort,
Expand Down Expand Up @@ -2296,6 +2317,9 @@ class KhiopsEncoder(TransformerMixin, KhiopsSupervisedEstimator):
Allowed rules for the automatic feature construction. If not set, Khiops
uses the multi-table construction rules listed in
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`.
n_feature_parts : int, default 0
Maximum number of variable parts produced by preprocessing methods. If equal
to 0 it is automatically calculated.
informative_features_only : bool, default ``True``
If ``True`` keeps only informative features.
group_target_value : bool, default ``False``
Expand Down Expand Up @@ -2374,6 +2398,7 @@ def __init__(
specific_pairs=None,
all_possible_pairs=True,
construction_rules=None,
n_feature_parts=0,
informative_features_only=True,
group_target_value=False,
keep_initial_variables=False,
Expand All @@ -2390,6 +2415,7 @@ def __init__(
n_text_features=n_text_features,
type_text_features=type_text_features,
construction_rules=construction_rules,
n_feature_parts=n_feature_parts,
verbose=verbose,
output_dir=output_dir,
auto_sort=auto_sort,
Expand Down
15 changes: 15 additions & 0 deletions tests/test_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,7 @@ def setUpClass(cls):
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"max_parts": 3,
"group_target_value": False,
"additional_data_tables": {},
}
Expand Down Expand Up @@ -790,6 +791,7 @@ def setUpClass(cls):
"max_selected_variables": 1,
"max_evaluated_variables": 3,
"construction_rules": ["TableMode", "TableSelection"],
"max_parts": 5,
"additional_data_tables": {},
}
},
Expand Down Expand Up @@ -818,6 +820,7 @@ def setUpClass(cls):
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"max_parts": 7,
"informative_variables_only": True,
"group_target_value": False,
"keep_initial_categorical_variables": False,
Expand Down Expand Up @@ -860,6 +863,7 @@ def setUpClass(cls):
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"max_parts": 4,
"group_target_value": False,
"additional_data_tables": {"SpliceJunctionDNA"},
}
Expand Down Expand Up @@ -889,6 +893,7 @@ def setUpClass(cls):
"max_selected_variables": 1,
"max_evaluated_variables": 3,
"construction_rules": ["TableMode", "TableSelection"],
"max_parts": 6,
"additional_data_tables": {"SpliceJunctionDNA"},
}
},
Expand Down Expand Up @@ -918,6 +923,7 @@ def setUpClass(cls):
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"max_parts": 8,
"informative_variables_only": True,
"group_target_value": False,
"keep_initial_categorical_variables": False,
Expand Down Expand Up @@ -1435,6 +1441,7 @@ def test_parameter_transfer_classifier_fit_from_monotable_dataframe(self):
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 3,
"group_target_value": False,
},
)
Expand All @@ -1458,6 +1465,7 @@ def test_parameter_transfer_classifier_fit_from_monotable_dataframe_with_df_y(
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 3,
"group_target_value": False,
},
)
Expand All @@ -1480,6 +1488,7 @@ def test_parameter_transfer_classifier_fit_from_multitable_dataframe(self):
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 4,
"group_target_value": False,
},
)
Expand Down Expand Up @@ -1517,6 +1526,7 @@ def test_parameter_transfer_encoder_fit_from_monotable_dataframe(self):
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 7,
"informative_features_only": True,
"group_target_value": False,
"keep_initial_variables": False,
Expand All @@ -1543,6 +1553,7 @@ def test_parameter_transfer_encoder_fit_from_monotable_dataframe_with_df_y(
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 7,
"informative_features_only": True,
"group_target_value": False,
"keep_initial_variables": False,
Expand All @@ -1568,6 +1579,7 @@ def test_parameter_transfer_encoder_fit_from_multitable_dataframe(self):
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 8,
"informative_features_only": True,
"group_target_value": False,
"keep_initial_variables": False,
Expand Down Expand Up @@ -1608,6 +1620,7 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe(self):
"n_text_features": 300000,
"type_text_features": "ngrams",
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 5,
},
)

Expand All @@ -1626,6 +1639,7 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe_with_df_y(
"n_text_features": 300000,
"type_text_features": "ngrams",
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 5,
},
)

Expand All @@ -1644,6 +1658,7 @@ def test_parameter_transfer_regressor_fit_from_multitable_dataframe(self):
"n_selected_features": 1,
"n_evaluated_features": 3,
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 6,
},
)

Expand Down
Loading