From 2724133e4e4995f0bd100ee1fe87e05430d27501 Mon Sep 17 00:00:00 2001
From: coder-jayp <jayp222001@gmail.com>
Date: Mon, 2 Feb 2026 23:21:54 +0530
Subject: [PATCH] Add CatBoostCvExperiment integration, tests, and __init__
 update

---
 .gitignore                                    | Bin 1893 -> 2166 bytes
 .../experiment/integrations/__init__.py       |   3 +
 .../experiment/integrations/catboost_cv.py    | 120 ++++++++++++++++++
 .../test_integrations/test_catboost_cv.py     |  70 ++++++++++
 4 files changed, 193 insertions(+)
 create mode 100644 src/hyperactive/experiment/integrations/catboost_cv.py
 create mode 100644 src/hyperactive/tests/test_integrations/test_catboost_cv.py
diff --git a/.gitignore b/.gitignore
index 3e7b58f97f5b376ff559bc70d7db8c51198ca07f..f6093d1e80b2bcb6fe70abcc7b49c266c45e6377 100644
GIT binary patch
literal 2166
zcmah~ZENH<5ccPV{SSf^3QO#TwpXt0r@JIauiT}fx$Bowj4Y3Lm9;ghB=5%k^?gQm
z_L5R43oFfwW=5mu8DFTkQ=-!F;B8N>+A8+R4)*W7zy9z+b<oH4s;>QHwAoW#Z`Z3$
z<)=Rkw|`>$m7glp>j<ay>Ox&9dQ9Y^We3fsiahJDZA`&7V}b!`yfz2Dvu?LuRd-X`
zvs<s4(e^E(w{)bQeM!4r6cHz7;gjoGx1w4s)w1@sffE1Ue}~oUki;o22ah^HI_-&i
zQG4slER+K!-Rh*pcdr9&eJ*6Sit9!al-6|&p|UO}-J?sD=}>kyu2&!5+`fNzbN^|+
z<=ymt@e0xm^*N3>GO4tuNOLb0GO1Ir30+dnq_mpTs%V1sNp*n-C=(67*NJkNhC0Tn
zXVFz|R+PYKI9;m9N<(C2-2<7Va5364%mJE?BmTvhDye+X&UO@2ejra~F8*A)>UoF9
zlTKk*rAIi5&{9u8OWNC5wl*LQhiNuTnb&kgz|FSC1J?6xG6Vn=Do-2^5wDI5iSK7;
z6YdxjuweK|LGMTf@{G4zul8x^p_Q&Dew4+{%abcKPuprEf<SY<avV`$OtE?#`t|DU
zlBLajMDFaT%WT8R{p6)f(aM9vAjj(*4zg7GDQFk*5>J30I7eXgBwzYLyB({T#;*HO
zEa(`p$VT@yib;TYL|EXrsp4O~P4w;aefhv+a2mx*uy^3m;T$Ui&KZ)zPSk*6lw!GD
zRow4Q(0)4mSLiw9In=%1Tlc71Hse4p<@$w0CT{C9(L{P7L{x)>+S@u%M}Z{09uwC)
za(JXfLQu(lH@y|Q5^$b$*pX0F^uLVK!=P?CQNw&d?bc7OsqsMO;2AK)1B>VTyM^~Y
zuuK`irqg3Dfp9`bLT%V}c9+{HC;UVX5R+GewFGKp%*@e%&uj$g)Ei?swCKF>!B1Xq
zXY0kQA6{RA1Ik8V4MPu|X))<CYfL}3pqTYi3trJ2KpC~<07hL3+=D%m;AcKzMg_!j
z%4jf~J9zZHZWuj6O|W!oua8oL&=~X`$_S>}&KlJcYHiV5OHpxnA<qexqmGpc9Ao(u
zL&ebxM=sTTBQ@v=MdZQsA`AwD#t>llby7mTOUdH|v{L`m3Ct$th@=P4Ym!?&Z^ZKA
zH+SmQ>u-KiV}w5@7gf7T?QGhQ4X8D2uW{eD{Pl{R<A7n>^?aJY9P_lNm_Hs;!WAY-
zSda^zW~m{a$_SXWV-GHWQC8lza|!pD5+8Ni<MR=uh~?viuM7-lnB2Id4eSK=fLb+U
z=9m}!0ba7la&!AT{;{tbeOT1XR%*QW6X30Y&OA_`$c%_FTNcACp9MMj8+-b%a{>An
zj>AC+`EQbLBc{ZB;b9U!SF)UkQq_uBJw$>2^Sz|LoA8}T6Y^)i2|dvleB02_LKmrM
z3dS#DR~NQ$MF(kHtcw-oy70wRh@UC;1!c`_+Z5Ge6VG{{;4#Pfa$@%WzfAuCCwiw`

literal 1893
zcmZuyO^@6*487}D2uOhfi?t23O_8>zWOsvZk&i_;>7^(PS*AVOu|-vqXV(4oeUxW{
zNzub-D3T&6>OJ~}ezRt(EYHDB)ayob$ZmH36#4euJ3T<3RNIEt8oQ&mZPZmA);}%x
zf2pf)!dlzOq@>gfeWU3)lTVI)f>UBq-nx{d>y`{H!UtpbW^{g3^~0KX>{Yk8sc%$I
z2bwrkG>!>7oWjaSKe6ed*wpp*H@U7+SbqGWQNhFvd)iJMOl;I~M>IhfT&Rj%&6G`V
zvVq^uL~24wy&2(nC2=d~2X0jDe9C4*Gip1Gn@PQY{o(%Y-Qy>@(8GFv@`%{!&uM}8
zs`HK#ZQasHp^wpJG(&f*HhRmalZ`H9Jw%?N%ru9|WRfTy4U9t<v32Q7lDKFqUF*c!
zV9VP1C$d@NKGZPG5t@M$Rx#!pHP6Pofl`)N2xO(~FGcH#$5eU)q0Y~_$URL2Amp7(
zRqrAaa9oQOXK(4CuCAIEZ`5vgkoN@ws5)}oCp;=ij?dm@Tq|WfwB-XuGm?%J7*C{j
zd7hv|hciE8^<_P~DTLb&)K`!NYRYp$O)01Pd7ji4yKE#o-0>gQ>rJe8Ymjy%hZluf
zPPQdbBei{s#wWD<1kRb|EN3y@J{do<PI(!IpP@!y)Wp_IE%IkzH6fhpyE^?nxlG?*
zh}9D>(VI!ayaf;T=O_h5-V#bVQU`)iPStiTxIWrw!g}_t(Nk<C#$(ty|Eznq%S=9(
zazO)|n)b{S*<8vA6(ElZu8lNMB*|))%<V`XPml@hLg<I}jW7*O%O;KldO`WyB8`gr
z183@(_ke#`eb=J>b{`m!D$Krr*ti-Z%RB%O2D3~E0dFC3YuOKOlr|w~SjivC6TnpA
znPegqg9~B--t}v1IriwSAUdqUG+--?FJAui>KZ84E&(~1OK3xnVN`H$r=<r;T#zF0
zOm+{@>r(<C&gaNeUE=@BM+}sRP+nUDX7T`sX)+z7C8!6MLGR2#N)Q@?e{&^daf7qE
zClu48m5!3;_|?9|Kb`cfZRC`ya|oT5Ab7vl+kNzGR@6{N%2!c9Yy%Rr|2AtO(Y2)U
z0Hk#1d;pV0dm?GTWktB+WfRKl-aY7-ufF?5FA2^VI#iFf9$nro9k?<#x43FweY|1s
zIbr|~6CcZ1-zG98ne0e@gFz5a&{Vc;Ix@LRfJDDcbfK$*>*dHJX#U>h9fo_9?As>;
zqX+sEB<}pdMfQT}r0N2XgRJ;9N~A{JefR@o-XH4D>^I#eNrJEfw;Jdxlkdz*h2bWK
zlfG=vGEV)@f&S-Ag!;m3-V43{!?-4a=W?}q1*B_P&!eR7Wmi8XK>9+2@571-o>wF-
M>;chmd3<ctzqRX$R{#J2

diff --git a/src/hyperactive/experiment/integrations/__init__.py b/src/hyperactive/experiment/integrations/__init__.py
index c302e25a..9c43557f 100644
--- a/src/hyperactive/experiment/integrations/__init__.py
+++ b/src/hyperactive/experiment/integrations/__init__.py
@@ -15,10 +15,13 @@
     TorchExperiment,
 )
 
+from .catboost_cv import CatBoostCvExperiment
+
 __all__ = [
     "SklearnCvExperiment",
     "SkproProbaRegExperiment",
     "SktimeClassificationExperiment",
     "SktimeForecastingExperiment",
     "TorchExperiment",
+    "CatBoostCvExperiment",
 ]
diff --git a/src/hyperactive/experiment/integrations/catboost_cv.py b/src/hyperactive/experiment/integrations/catboost_cv.py
new file mode 100644
index 00000000..a39975fe
--- /dev/null
+++ b/src/hyperactive/experiment/integrations/catboost_cv.py
@@ -0,0 +1,120 @@
+from typing import Dict, Any, Optional, Tuple
+import pandas as pd
+from catboost import Pool, cv
+from hyperactive.base._experiment import BaseExperiment
+
+class CatBoostCvExperiment(BaseExperiment):
+    """Cross-validation experiment for CatBoost using `catboost.cv()`.
+
+    Wraps CatBoost cross-validation so it can be used directly with Hyperactive optimizers.
+    Returns the final mean test metric over folds (e.g. test-Logloss-mean, test-AUC-mean).
+
+    Parameters
+    ----------
+    pool : catboost.Pool
+        Data pool with features, labels, and optional weights/groups.
+    iterations : int, default=100
+        Maximum boosting iterations.
+    fold_count : int, default=5
+        Number of cross-validation folds.
+    early_stopping_rounds : int or None, default=None
+    partition_random_seed : int, default=0
+    type : str, default="Classical"
+        CV scheme ('Classical', 'TimeSeries', 'Inverted', etc.).
+    metric : str, default="Logloss"
+        Metric to extract as the optimization score.
+    loss_function : str, default="Logloss"
+        Training objective. If different from `metric`, added as `custom_metric`.
+    """
+
+    _tags = {
+        "object_type": "experiment",
+        "python_dependencies": "catboost",
+        "property:randomness": "random",
+        "property:higher_or_lower_is_better": "lower"
+    }
+
+    def __init__(
+        self,
+        pool: Pool,
+        iterations: int = 100,
+        fold_count: int = 5,
+        early_stopping_rounds: Optional[int] = None,
+        partition_random_seed: int = 0,
+        type: str = "Classical",
+        metric: str = "Logloss",
+        loss_function: str = "Logloss",
+    ):
+        super().__init__()
+        self.pool = pool
+        self.iterations = iterations
+        self.fold_count = fold_count
+        self.early_stopping_rounds = early_stopping_rounds
+        self.partition_random_seed = partition_random_seed
+        self.type = type
+        self.metric = metric
+        self.loss_function = loss_function
+
+        direction = "lower"
+        self.set_tags(**{"property:higher_or_lower_is_better": direction})
+
+    def _paramnames(self) -> Optional[list]:
+        return None  
+
+    def _evaluate(self, params: Dict[str, Any]) -> Tuple[float, Dict[str, Any]]:
+        """Run CatBoost CV and return mean test metric + metadata."""
+        cv_params = params.copy()
+
+        cv_params.setdefault("loss_function", self.loss_function)
+
+        cv_params.setdefault("iterations", self.iterations)
+
+        if self.metric != self.loss_function:
+            custom_metrics = cv_params.get("custom_metric", [])
+            if isinstance(custom_metrics, str):
+                custom_metrics = [custom_metrics]
+            if self.metric not in custom_metrics:
+                custom_metrics.append(self.metric)
+            cv_params["custom_metric"] = custom_metrics
+
+        try:
+            cv_results: pd.DataFrame = cv(
+                params=cv_params,
+                pool=self.pool,
+                fold_count=self.fold_count,
+                early_stopping_rounds=self.early_stopping_rounds,
+                partition_random_seed=self.partition_random_seed,
+                type=self.type,
+                verbose=False,
+                plot=False,
+                return_models=False,
+                as_pandas=True,
+            )
+        except Exception as e:
+            raise RuntimeError(
+                f"CatBoost CV failed with params: {cv_params}\n"
+                f"Error: {str(e)}"
+            ) from e
+
+        target_col = f"test-{self.metric}-mean"
+
+        if target_col not in cv_results.columns:
+            available = ", ".join(cv_results.columns)
+            raise ValueError(
+                f"Expected column '{target_col}' not found in cv_results.\n"
+                f"Available columns: {available}\n"
+                f"Check that metric='{self.metric}' is computed. "
+                f"Current loss_function: '{cv_params.get('loss_function')}'"
+            )
+
+        mean_score = float(cv_results[target_col].iloc[-1])
+
+        metadata = {
+            "cv_results": cv_results.to_dict(orient="records"),
+            "final_iteration": int(cv_results["iterations"].iloc[-1]),
+            "target_column": target_col,
+            "used_loss_function": cv_params["loss_function"],
+            "custom_metrics_used": cv_params.get("custom_metric", None),
+        }
+
+        return mean_score, metadata
\ No newline at end of file
diff --git a/src/hyperactive/tests/test_integrations/test_catboost_cv.py b/src/hyperactive/tests/test_integrations/test_catboost_cv.py
new file mode 100644
index 00000000..6448ee18
--- /dev/null
+++ b/src/hyperactive/tests/test_integrations/test_catboost_cv.py
@@ -0,0 +1,70 @@
+import numpy as np
+import pytest
+from catboost import Pool
+from hyperactive.experiment.integrations import CatBoostCvExperiment
+
+@pytest.fixture
+def dummy_binary_pool():
+    """Create a small random binary classification dataset as CatBoost Pool."""
+    np.random.seed(42)
+    X = np.random.rand(400, 8)
+    y = np.random.randint(0, 2, 400)
+    return Pool(data=X, label=y)
+
+def test_catboost_cv_runs_and_returns_valid_score(dummy_binary_pool):
+    """Basic sanity test: ensure CatBoostCvExperiment runs cv() correctly."""
+    exp = CatBoostCvExperiment(
+        pool=dummy_binary_pool,
+        metric="Logloss",
+        fold_count=4,
+        iterations=50,
+        early_stopping_rounds=10,
+    )
+
+    params = {
+        "learning_rate": 0.03,
+        "depth": 5,
+        "l2_leaf_reg": 3.0,
+    }
+
+    raw_score, metadata = exp.evaluate(params)
+    signed_score, _ = exp.score(params)
+
+    assert isinstance(raw_score, float)
+    assert 0 < raw_score < 1, f"Logloss out of expected range: {raw_score:.4f}"
+    assert signed_score < 0, "Signed score should be negative (lower is better)"
+    assert exp.get_tag("property:higher_or_lower_is_better") == "lower"
+
+    assert isinstance(metadata, dict)
+    assert "cv_results" in metadata
+    assert "target_column" in metadata
+    assert metadata["target_column"] == "test-Logloss-mean"
+    assert "final_iteration" in metadata
+    assert metadata["final_iteration"] > 0
+    assert "used_loss_function" in metadata
+    assert metadata["used_loss_function"] == "Logloss"
+
+def test_catboost_cv_with_auc(dummy_binary_pool):
+    """Test AUC metric with Logloss objective (shows metric vs loss separation)."""
+    exp = CatBoostCvExperiment(
+        pool=dummy_binary_pool,
+        loss_function="Logloss",          
+        metric="AUC",                       
+        fold_count=3,
+        iterations=30,
+    )
+
+    params = {"learning_rate": 0.05, "depth": 4}
+
+    raw_score, metadata = exp.evaluate(params)
+    signed_score, _ = exp.score(params)
+
+    # AUC on random data is noisy, but typically around 0.5 ± 0.2–0.3
+    assert 0.25 < raw_score < 0.75, f"Unexpected AUC on random data: {raw_score:.4f}"
+
+    # Optimizing Logloss (lower-better) → tag is "lower", signed_score is negative
+    assert signed_score < 0, f"Signed score should be negative, got {signed_score:.4f}"
+
+    assert exp.get_tag("property:higher_or_lower_is_better") == "lower"
+    assert "used_loss_function" in metadata
+    assert metadata["used_loss_function"] == "Logloss"
\ No newline at end of file