diff --git a/CHANGELOG.md b/CHANGELOG.md index d4d940b7..63ce9d0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ - Example: 10.2.1.4 is the 5th version that supports khiops 10.2.1. - Internals: Changes in *Internals* sections are unlikely to be of interest for data scientists. +## Unreleased + +### Added +- (`sklearn`) `keep_selected_variables_only` parameter to the predictors (KhiopsClassifier and KhiopsRegressor) + ## 11.0.0.3 - 2026-03-06 ### Added diff --git a/khiops/sklearn/estimators.py b/khiops/sklearn/estimators.py index 9567ac1c..71990518 100644 --- a/khiops/sklearn/estimators.py +++ b/khiops/sklearn/estimators.py @@ -1196,6 +1196,7 @@ def __init__( all_possible_pairs=True, construction_rules=None, n_feature_parts=0, + keep_selected_variables_only=True, verbose=False, output_dir=None, auto_sort=True, @@ -1213,6 +1214,7 @@ def __init__( self.all_possible_pairs = all_possible_pairs self.construction_rules = construction_rules self.n_feature_parts = n_feature_parts + self.keep_selected_variables_only = keep_selected_variables_only self._original_target_dtype = None self._predicted_target_meta_data_tag = None self._khiops_baseline_model_prefix = None @@ -1523,6 +1525,7 @@ def __init__( all_possible_pairs=True, construction_rules=None, n_feature_parts=0, + keep_selected_variables_only=True, verbose=False, output_dir=None, auto_sort=True, @@ -1536,6 +1539,7 @@ def __init__( all_possible_pairs=all_possible_pairs, construction_rules=construction_rules, n_feature_parts=n_feature_parts, + keep_selected_variables_only=keep_selected_variables_only, verbose=verbose, output_dir=output_dir, auto_sort=auto_sort, @@ -1703,6 +1707,8 @@ class KhiopsClassifier(ClassifierMixin, KhiopsPredictor): group_target_value : bool, default ``False`` Allows grouping of the target values in classification. It can substantially increase the training time. + keep_selected_variables_only : bool, default ``True`` + Keeps only predictor-selected variables in the supervised analysis report. verbose : bool, default ``False`` If ``True`` it prints debug information and it does not erase temporary files when fitting, predicting or transforming. @@ -1760,6 +1766,7 @@ def __init__( construction_rules=None, n_feature_parts=0, group_target_value=False, + keep_selected_variables_only=True, verbose=False, output_dir=None, auto_sort=True, @@ -1773,6 +1780,7 @@ def __init__( n_evaluated_features=n_evaluated_features, construction_rules=construction_rules, n_feature_parts=n_feature_parts, + keep_selected_variables_only=keep_selected_variables_only, verbose=verbose, output_dir=output_dir, auto_sort=auto_sort, @@ -2105,6 +2113,8 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor): n_feature_parts : int, default 0 Maximum number of variable parts produced by preprocessing methods. If equal to 0 it is automatically calculated. + keep_selected_variables_only : bool, default ``True`` + Keeps only predictor-selected variables in the supervised analysis report. verbose : bool, default ``False`` If ``True`` it prints debug information and it does not erase temporary files when fitting, predicting or transforming. @@ -2149,6 +2159,7 @@ def __init__( n_evaluated_features=0, construction_rules=None, n_feature_parts=0, + keep_selected_variables_only=True, verbose=False, output_dir=None, auto_sort=True, @@ -2162,6 +2173,7 @@ def __init__( n_evaluated_features=n_evaluated_features, construction_rules=construction_rules, n_feature_parts=n_feature_parts, + keep_selected_variables_only=keep_selected_variables_only, verbose=verbose, output_dir=output_dir, auto_sort=auto_sort, diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py index 7aa9f8d0..9eb2f117 100644 --- a/tests/test_sklearn.py +++ b/tests/test_sklearn.py @@ -766,6 +766,7 @@ def setUpClass(cls): "max_parts": 3, "group_target_value": False, "additional_data_tables": {}, + "keep_selected_variables_only": False, } }, "predict": { @@ -794,6 +795,7 @@ def setUpClass(cls): "max_evaluated_variables": 3, "construction_rules": ["TableMode", "TableSelection"], "max_parts": 5, + "keep_selected_variables_only": False, "additional_data_tables": {}, } }, @@ -1449,6 +1451,7 @@ def test_parameter_transfer_classifier_fit_from_monotable_dataframe(self): "construction_rules": ["TableMode", "TableSelection"], "n_feature_parts": 3, "group_target_value": False, + "keep_selected_variables_only": False, }, ) @@ -1474,6 +1477,7 @@ def test_parameter_transfer_classifier_fit_from_monotable_dataframe_with_df_y( "construction_rules": ["TableMode", "TableSelection"], "n_feature_parts": 3, "group_target_value": False, + "keep_selected_variables_only": False, }, ) @@ -1631,6 +1635,7 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe(self): "type_text_features": "ngrams", "construction_rules": ["TableMode", "TableSelection"], "n_feature_parts": 5, + "keep_selected_variables_only": False, }, ) @@ -1651,6 +1656,7 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe_with_df_y( "type_text_features": "ngrams", "construction_rules": ["TableMode", "TableSelection"], "n_feature_parts": 5, + "keep_selected_variables_only": False, }, )