From 8230bd46990752b7ce3f4684428e0f2689e2b4ea Mon Sep 17 00:00:00 2001
From: Thierry RAMORASOAVINA <thierry.ramorasoavina@orange.com>
Date: Thu, 22 Jan 2026 16:54:02 +0100
Subject: [PATCH 1/2] Add the `n_feature_parts` parameter to the supervised
 estimators

- KhiopsClassifier, KhiopsRegressor and KhiopsEncoder
---
 CHANGELOG.md                 |  5 +++++
 khiops/sklearn/estimators.py | 28 +++++++++++++++++++++++++++-
 tests/test_sklearn.py        | 15 +++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bc440647..58ed6a29 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,11 @@
   - Example: 10.2.1.4 is the 5th version that supports khiops 10.2.1.
 - Internals: Changes in *Internals* sections are unlikely to be of interest for data scientists.
 
+## Unreleased
+
+### Added
+- (`sklearn`) `n_feature_parts` parameter to the supervised estimators 
+
 ## 11.0.0.2 - 2026-01-26
 
 ## Fixed
diff --git a/khiops/sklearn/estimators.py b/khiops/sklearn/estimators.py
index 3ad6776b..f1605dfc 100644
--- a/khiops/sklearn/estimators.py
+++ b/khiops/sklearn/estimators.py
@@ -1195,6 +1195,7 @@ def __init__(
         specific_pairs=None,
         all_possible_pairs=True,
         construction_rules=None,
+        n_feature_parts=0,
         verbose=False,
         output_dir=None,
         auto_sort=True,
@@ -1211,6 +1212,7 @@ def __init__(
         self.specific_pairs = specific_pairs
         self.all_possible_pairs = all_possible_pairs
         self.construction_rules = construction_rules
+        self.n_feature_parts = n_feature_parts
         self._original_target_dtype = None
         self._predicted_target_meta_data_tag = None
         self._khiops_baseline_model_prefix = None
@@ -1300,6 +1302,12 @@ def _fit_check_params(self, ds, **kwargs):
                 for rule in self.construction_rules:
                     if not isinstance(rule, str):
                         raise TypeError(type_error_message(rule, rule, str))
+        if not isinstance(self.n_feature_parts, int):
+            raise TypeError(
+                type_error_message("n_feature_parts", self.n_feature_parts, int)
+            )
+        if self.n_feature_parts < 0:
+            raise ValueError("'n_feature_parts' must be positive")
 
     def _fit_train_model(self, ds, computation_dir, **kwargs):
         # Train the model with Khiops
@@ -1384,6 +1392,7 @@ def _fit_prepare_training_function_inputs(self, ds, computation_dir):
         kwargs["max_trees"] = kwargs.pop("n_trees")
         kwargs["max_text_features"] = kwargs.pop("n_text_features")
         kwargs["text_features"] = kwargs.pop("type_text_features")
+        kwargs["max_parts"] = kwargs.pop("n_feature_parts")
 
         # Add the additional_data_tables parameter
         kwargs["additional_data_tables"] = additional_data_tables
@@ -1513,6 +1522,7 @@ def __init__(
         specific_pairs=None,
         all_possible_pairs=True,
         construction_rules=None,
+        n_feature_parts=0,
         verbose=False,
         output_dir=None,
         auto_sort=True,
@@ -1525,6 +1535,7 @@ def __init__(
             specific_pairs=specific_pairs,
             all_possible_pairs=all_possible_pairs,
             construction_rules=construction_rules,
+            n_feature_parts=n_feature_parts,
             verbose=verbose,
             output_dir=output_dir,
             auto_sort=auto_sort,
@@ -1685,7 +1696,10 @@ class KhiopsClassifier(ClassifierMixin, KhiopsPredictor):
     construction_rules : list of str, optional
         Allowed rules for the automatic feature construction. If not set, Khiops
         uses the multi-table construction rules listed in
-        `kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`
+        `kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`.
+    n_feature_parts : int, default 0
+        Maximum number of variable parts produced by preprocessing methods. If equal
+        to 0 it is automatically calculated.
     group_target_value : bool, default ``False``
         Allows grouping of the target values in classification. It can substantially
         increase the training time.
@@ -1744,6 +1758,7 @@ def __init__(
         specific_pairs=None,
         all_possible_pairs=True,
         construction_rules=None,
+        n_feature_parts=0,
         group_target_value=False,
         verbose=False,
         output_dir=None,
@@ -1757,6 +1772,7 @@ def __init__(
             n_selected_features=n_selected_features,
             n_evaluated_features=n_evaluated_features,
             construction_rules=construction_rules,
+            n_feature_parts=n_feature_parts,
             verbose=verbose,
             output_dir=output_dir,
             auto_sort=auto_sort,
@@ -2086,6 +2102,9 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor):
         Allowed rules for the automatic feature construction. If not set, Khiops
         uses the multi-table construction rules listed in
         `kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`.
+    n_feature_parts : int, default 0
+        Maximum number of variable parts produced by preprocessing methods. If equal
+        to 0 it is automatically calculated.
     verbose : bool, default ``False``
         If ``True`` it prints debug information and it does not erase temporary files
         when fitting, predicting or transforming.
@@ -2129,6 +2148,7 @@ def __init__(
         n_selected_features=0,
         n_evaluated_features=0,
         construction_rules=None,
+        n_feature_parts=0,
         verbose=False,
         output_dir=None,
         auto_sort=True,
@@ -2141,6 +2161,7 @@ def __init__(
             n_selected_features=n_selected_features,
             n_evaluated_features=n_evaluated_features,
             construction_rules=construction_rules,
+            n_feature_parts=n_feature_parts,
             verbose=verbose,
             output_dir=output_dir,
             auto_sort=auto_sort,
@@ -2296,6 +2317,9 @@ class KhiopsEncoder(TransformerMixin, KhiopsSupervisedEstimator):
         Allowed rules for the automatic feature construction. If not set, Khiops
         uses the multi-table construction rules listed in
         `kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`.
+    n_feature_parts : int, default 0
+        Maximum number of variable parts produced by preprocessing methods. If equal
+        to 0 it is automatically calculated.
     informative_features_only : bool, default ``True``
         If ``True`` keeps only informative features.
     group_target_value : bool, default ``False``
@@ -2374,6 +2398,7 @@ def __init__(
         specific_pairs=None,
         all_possible_pairs=True,
         construction_rules=None,
+        n_feature_parts=0,
         informative_features_only=True,
         group_target_value=False,
         keep_initial_variables=False,
@@ -2390,6 +2415,7 @@ def __init__(
             n_text_features=n_text_features,
             type_text_features=type_text_features,
             construction_rules=construction_rules,
+            n_feature_parts=n_feature_parts,
             verbose=verbose,
             output_dir=output_dir,
             auto_sort=auto_sort,
diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
index 1c515652..c8312fca 100644
--- a/tests/test_sklearn.py
+++ b/tests/test_sklearn.py
@@ -762,6 +762,7 @@ def setUpClass(cls):
                                 "specific_pairs": [("age", "race")],
                                 "all_possible_pairs": False,
                                 "construction_rules": ["TableMode", "TableSelection"],
+                                "max_parts": 3,
                                 "group_target_value": False,
                                 "additional_data_tables": {},
                             }
@@ -790,6 +791,7 @@ def setUpClass(cls):
                                 "max_selected_variables": 1,
                                 "max_evaluated_variables": 3,
                                 "construction_rules": ["TableMode", "TableSelection"],
+                                "max_parts": 5,
                                 "additional_data_tables": {},
                             }
                         },
@@ -818,6 +820,7 @@ def setUpClass(cls):
                                 "specific_pairs": [("age", "race")],
                                 "all_possible_pairs": False,
                                 "construction_rules": ["TableMode", "TableSelection"],
+                                "max_parts": 7,
                                 "informative_variables_only": True,
                                 "group_target_value": False,
                                 "keep_initial_categorical_variables": False,
@@ -860,6 +863,7 @@ def setUpClass(cls):
                                 "specific_pairs": [],
                                 "all_possible_pairs": False,
                                 "construction_rules": ["TableMode", "TableSelection"],
+                                "max_parts": 4,
                                 "group_target_value": False,
                                 "additional_data_tables": {"SpliceJunctionDNA"},
                             }
@@ -889,6 +893,7 @@ def setUpClass(cls):
                                 "max_selected_variables": 1,
                                 "max_evaluated_variables": 3,
                                 "construction_rules": ["TableMode", "TableSelection"],
+                                "max_parts": 6,
                                 "additional_data_tables": {"SpliceJunctionDNA"},
                             }
                         },
@@ -918,6 +923,7 @@ def setUpClass(cls):
                                 "specific_pairs": [],
                                 "all_possible_pairs": False,
                                 "construction_rules": ["TableMode", "TableSelection"],
+                                "max_parts": 8,
                                 "informative_variables_only": True,
                                 "group_target_value": False,
                                 "keep_initial_categorical_variables": False,
@@ -1435,6 +1441,7 @@ def test_parameter_transfer_classifier_fit_from_monotable_dataframe(self):
                 "specific_pairs": [("age", "race")],
                 "all_possible_pairs": False,
                 "construction_rules": ["TableMode", "TableSelection"],
+                "n_feature_parts": 3,
                 "group_target_value": False,
             },
         )
@@ -1458,6 +1465,7 @@ def test_parameter_transfer_classifier_fit_from_monotable_dataframe_with_df_y(
                 "specific_pairs": [("age", "race")],
                 "all_possible_pairs": False,
                 "construction_rules": ["TableMode", "TableSelection"],
+                "n_feature_parts": 3,
                 "group_target_value": False,
             },
         )
@@ -1480,6 +1488,7 @@ def test_parameter_transfer_classifier_fit_from_multitable_dataframe(self):
                 "specific_pairs": [],
                 "all_possible_pairs": False,
                 "construction_rules": ["TableMode", "TableSelection"],
+                "n_feature_parts": 4,
                 "group_target_value": False,
             },
         )
@@ -1517,6 +1526,7 @@ def test_parameter_transfer_encoder_fit_from_monotable_dataframe(self):
                 "specific_pairs": [("age", "race")],
                 "all_possible_pairs": False,
                 "construction_rules": ["TableMode", "TableSelection"],
+                "n_feature_parts": 7,
                 "informative_features_only": True,
                 "group_target_value": False,
                 "keep_initial_variables": False,
@@ -1543,6 +1553,7 @@ def test_parameter_transfer_encoder_fit_from_monotable_dataframe_with_df_y(
                 "specific_pairs": [("age", "race")],
                 "all_possible_pairs": False,
                 "construction_rules": ["TableMode", "TableSelection"],
+                "n_feature_parts": 7,
                 "informative_features_only": True,
                 "group_target_value": False,
                 "keep_initial_variables": False,
@@ -1568,6 +1579,7 @@ def test_parameter_transfer_encoder_fit_from_multitable_dataframe(self):
                 "specific_pairs": [],
                 "all_possible_pairs": False,
                 "construction_rules": ["TableMode", "TableSelection"],
+                "n_feature_parts": 8,
                 "informative_features_only": True,
                 "group_target_value": False,
                 "keep_initial_variables": False,
@@ -1608,6 +1620,7 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe(self):
                 "n_text_features": 300000,
                 "type_text_features": "ngrams",
                 "construction_rules": ["TableMode", "TableSelection"],
+                "n_feature_parts": 5,
             },
         )
 
@@ -1626,6 +1639,7 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe_with_df_y(
                 "n_text_features": 300000,
                 "type_text_features": "ngrams",
                 "construction_rules": ["TableMode", "TableSelection"],
+                "n_feature_parts": 5,
             },
         )
 
@@ -1644,6 +1658,7 @@ def test_parameter_transfer_regressor_fit_from_multitable_dataframe(self):
                 "n_selected_features": 1,
                 "n_evaluated_features": 3,
                 "construction_rules": ["TableMode", "TableSelection"],
+                "n_feature_parts": 6,
             },
         )
 

From 127535658c1ce4294c15b1b2e81517d25051d97d Mon Sep 17 00:00:00 2001
From: Thierry RAMORASOAVINA <thierry.ramorasoavina@orange.com>
Date: Thu, 12 Feb 2026 18:21:12 +0100
Subject: [PATCH 2/2] Sklearn : rephrase error messages to be more precise

---
 khiops/sklearn/estimators.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/khiops/sklearn/estimators.py b/khiops/sklearn/estimators.py
index f1605dfc..1af5d06f 100644
--- a/khiops/sklearn/estimators.py
+++ b/khiops/sklearn/estimators.py
@@ -180,7 +180,7 @@ def _check_pair_parameters(estimator):
     if not isinstance(estimator.n_pairs, int):
         raise TypeError(type_error_message("n_pairs", estimator.n_pairs, int))
     if estimator.n_pairs < 0:
-        raise ValueError("'n_pairs' must be positive")
+        raise ValueError("'n_pairs' must be non-negative")
     if estimator.specific_pairs is not None:
         if not is_list_like(estimator.specific_pairs):
             raise TypeError(
@@ -955,7 +955,7 @@ def _simplify(
                         type_error_message("'max_part_numbers' values", value, int)
                     )
                 elif value < 0:
-                    raise ValueError("'max_part_numbers' values must be positive")
+                    raise ValueError("'max_part_numbers' values must be non-negative")
         # Create temporary directory and tables
         computation_dir = self._create_computation_dir("simplify")
         output_dir = self._get_output_dir(computation_dir)
@@ -1272,17 +1272,17 @@ def _fit_check_params(self, ds, **kwargs):
         if not isinstance(self.n_features, int):
             raise TypeError(type_error_message("n_features", self.n_features, int))
         if self.n_features < 0:
-            raise ValueError("'n_features' must be positive")
+            raise ValueError("'n_features' must be non-negative")
         if not isinstance(self.n_trees, int):
             raise TypeError(type_error_message("n_trees", self.n_trees, int))
         if self.n_trees < 0:
-            raise ValueError("'n_trees' must be positive")
+            raise ValueError("'n_trees' must be non-negative")
         if not isinstance(self.n_text_features, int):
             raise TypeError(
                 type_error_message("n_text_features", self.n_text_features, int)
             )
         if self.n_text_features < 0:
-            raise ValueError("'n_text_features' must be positive")
+            raise ValueError("'n_text_features' must be non-negative")
         if not isinstance(self.type_text_features, str):
             raise TypeError(
                 type_error_message("type_text_features", self.type_text_features, str)
@@ -1307,7 +1307,7 @@ def _fit_check_params(self, ds, **kwargs):
                 type_error_message("n_feature_parts", self.n_feature_parts, int)
             )
         if self.n_feature_parts < 0:
-            raise ValueError("'n_feature_parts' must be positive")
+            raise ValueError("'n_feature_parts' must be non-negative")
 
     def _fit_train_model(self, ds, computation_dir, **kwargs):
         # Train the model with Khiops
@@ -1635,9 +1635,9 @@ def _fit_check_params(self, ds, **kwargs):
 
         # Check estimator parameters
         if self.n_evaluated_features < 0:
-            raise ValueError("'n_evaluated_features' must be positive")
+            raise ValueError("'n_evaluated_features' must be non-negative")
         if self.n_selected_features < 0:
-            raise ValueError("'n_selected_features' must be positive")
+            raise ValueError("'n_selected_features' must be non-negative")
 
 
 # Note: scikit-learn **requires** inherit first the mixins and then other classes