From ca2c03673a928a4bff5d280199a38953d54776a2 Mon Sep 17 00:00:00 2001 From: harsh-1o Date: Thu, 29 Jan 2026 20:35:48 +0530 Subject: [PATCH 1/2] Fix broken doctests in xgboost_classifier.py --- machine_learning/xgboost_classifier.py | 52 +++++++++++++------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/machine_learning/xgboost_classifier.py b/machine_learning/xgboost_classifier.py index e845480074b9..96236d680b9c 100644 --- a/machine_learning/xgboost_classifier.py +++ b/machine_learning/xgboost_classifier.py @@ -8,32 +8,33 @@ def data_handling(data: dict) -> tuple: - # Split dataset into features and target - # data is features """ - >>> data_handling(({'data':'[5.1, 3.5, 1.4, 0.2]','target':([0])})) - ('[5.1, 3.5, 1.4, 0.2]', [0]) - >>> data_handling( - ... {'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': ([0, 0])} - ... ) - ('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0]) + Split dataset into features and target. + + >>> from sklearn.datasets import load_iris + >>> iris = load_iris() + >>> features, targets = data_handling(iris) + >>> features.shape + (150, 4) + >>> targets.shape + (150,) """ return (data["data"], data["target"]) def xgboost(features: np.ndarray, target: np.ndarray) -> XGBClassifier: """ - # THIS TEST IS BROKEN!! >>> xgboost(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0])) - XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None, - colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1, - early_stopping_rounds=None, enable_categorical=False, - eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise', - importance_type=None, interaction_constraints='', - learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4, - max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1, - missing=nan, monotone_constraints='()', n_estimators=100, - n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0, - reg_alpha=0, reg_lambda=1, ...) + Train an XGBoost classifier. + + >>> from sklearn.datasets import load_iris + >>> iris = load_iris() + >>> X_train, y_train = iris.data[:100], iris.target[:100] + >>> classifier = xgboost(X_train, y_train) + >>> predictions = classifier.predict(iris.data[:5]) + >>> len(predictions) + 5 + >>> all(pred in [0, 1, 2] for pred in predictions) + True """ classifier = XGBClassifier() classifier.fit(features, target) @@ -46,20 +47,18 @@ def main() -> None: https://xgboost.readthedocs.io/en/stable/ Iris type dataset is used to demonstrate algorithm. """ - # Load Iris dataset iris = load_iris() features, targets = data_handling(iris) x_train, x_test, y_train, y_test = train_test_split( - features, targets, test_size=0.25 + features, targets, test_size=0.25, random_state=42 ) - names = iris["target_names"] - + # Create an XGBoost Classifier from the training data xgboost_classifier = xgboost(x_train, y_train) - - # Display the confusion matrix of the classifier with both training and test sets + + # Display the confusion matrix of the classifier with test set ConfusionMatrixDisplay.from_estimator( xgboost_classifier, x_test, @@ -74,6 +73,5 @@ def main() -> None: if __name__ == "__main__": import doctest - doctest.testmod(verbose=True) - main() + main() \ No newline at end of file From 7d369f88fc1935bee4525509b74f4fc96404e697 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 29 Jan 2026 15:09:35 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/xgboost_classifier.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/machine_learning/xgboost_classifier.py b/machine_learning/xgboost_classifier.py index 96236d680b9c..cfb83bf5118a 100644 --- a/machine_learning/xgboost_classifier.py +++ b/machine_learning/xgboost_classifier.py @@ -10,7 +10,7 @@ def data_handling(data: dict) -> tuple: """ Split dataset into features and target. - + >>> from sklearn.datasets import load_iris >>> iris = load_iris() >>> features, targets = data_handling(iris) @@ -25,7 +25,7 @@ def data_handling(data: dict) -> tuple: def xgboost(features: np.ndarray, target: np.ndarray) -> XGBClassifier: """ Train an XGBoost classifier. - + >>> from sklearn.datasets import load_iris >>> iris = load_iris() >>> X_train, y_train = iris.data[:100], iris.target[:100] @@ -54,10 +54,10 @@ def main() -> None: features, targets, test_size=0.25, random_state=42 ) names = iris["target_names"] - + # Create an XGBoost Classifier from the training data xgboost_classifier = xgboost(x_train, y_train) - + # Display the confusion matrix of the classifier with test set ConfusionMatrixDisplay.from_estimator( xgboost_classifier, @@ -73,5 +73,6 @@ def main() -> None: if __name__ == "__main__": import doctest + doctest.testmod(verbose=True) - main() \ No newline at end of file + main()