WikarNotAvailable
diff --git a/‎scripts/models/svm/svm_tuning.ipynb‎
Lines changed: 215 additions & 0 deletions b/‎scripts/models/svm/svm_tuning.ipynb‎
Lines changed: 215 additions & 0 deletions
@@ -0,0 +1,215 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "25921d5f-e3b7-4960-ad21-95cad70e6e53",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.linear_model import SGDOneClassSVM\n",
+    "from sklearn.metrics import (\n",
+    "    classification_report,\n",
+    "    precision_score,\n",
+    "    recall_score,\n",
+    "    f1_score,\n",
+    "    accuracy_score,\n",
+    "    roc_auc_score,\n",
+    "    fbeta_score,\n",
+    ")\n",
+    "from sklearn.kernel_approximation import RBFSampler\n",
+    "from itertools import product\n",
+    "import csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "1a264aca-9ea2-4ff8-b1fc-ca619e83bb59",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "X_train = pd.read_parquet(\"data\").to_numpy(dtype=float)\n",
+    "validation_data = pd.read_parquet(\"benchmark\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "0caae7d5-143d-4ca1-a06f-b766af2a9c7d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "X_val = validation_data.drop(columns=['label']).to_numpy(dtype=float)\n",
+    "y_val = validation_data['label'].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "9fc0412f-0c32-4b8c-a90b-52b4c1409527",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "y_val = np.where(y_val, -1, 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "4dbf3113-58cb-4c28-ac90-418025a6f84e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "gamma_values = np.arange(0.1, 10.0, 0.1).tolist()\n",
+    "\n",
+    "param_values = {\"gamma\": [0.1, 1.0, 5.0, 10.0], \"nu\": [0.01, 0.05, 0.1, 0.2], \"tol\": [1e-7], \"eta0\": [1e-6]}\n",
+    "\n",
+    "keys = param_values.keys()\n",
+    "values = param_values.values()\n",
+    "combinations = [dict(zip(keys, combo)) for combo in product(*values)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "d852cff0-d44f-4b52-8803-e109cb22af58",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1\n",
+      "2\n",
+      "3\n",
+      "4\n",
+      "5\n",
+      "6\n",
+      "7\n",
+      "8\n",
+      "9\n",
+      "10\n",
+      "11\n",
+      "12\n",
+      "13\n",
+      "14\n",
+      "15\n",
+      "16\n"
+     ]
+    }
+   ],
+   "source": [
+    "csv_file = \"svm_results_tuning.csv\"\n",
+    "\n",
+    "models = []\n",
+    "\n",
+    "with open(csv_file, mode=\"w\", newline=\"\") as file:\n",
+    "    writer = csv.writer(file)\n",
+    "    writer.writerow([\"gamma\", \"nu\", \"tol\", \"eta0\", \"Precision\", \"Recall Normal\", \"Recall Anomaly\", \"F1-Score\", \"Accuracy\", \"AUC\", \"F2-Score\"])\n",
+    "\n",
+    "    for i, combo in enumerate(combinations, start=1):\n",
+    "        nystroem = RBFSampler(\n",
+    "            gamma=combo[\"gamma\"], \n",
+    "            n_components=1000,\n",
+    "            random_state=42\n",
+    "        )\n",
+    "\n",
+    "        sgd_ocsvm = SGDOneClassSVM(\n",
+    "            nu=combo[\"nu\"],\n",
+    "            shuffle=True,\n",
+    "            learning_rate = 'constant',\n",
+    "            tol=combo[\"tol\"],\n",
+    "            random_state=42,\n",
+    "            eta0=combo[\"eta0\"],\n",
+    "            max_iter=10000\n",
+    "        )\n",
+    "\n",
+    "        X_batch_transformed = nystroem.fit_transform(X_val)\n",
+    "\n",
+    "        sgd_ocsvm.fit(X_batch_transformed)\n",
+    "\n",
+    "        X_val_transformed = nystroem.transform(X_val)\n",
+    "        y_pred = sgd_ocsvm.predict(X_val_transformed)\n",
+    "\n",
+    "        accuracy = accuracy_score(y_val, y_pred)\n",
+    "        precision = precision_score(y_val, y_pred, pos_label=1)\n",
+    "        recall_normal = recall_score(y_val, y_pred, pos_label=1)\n",
+    "        recall_anomaly = recall_score(y_val, y_pred, pos_label=-1)\n",
+    "        f1 = f1_score(y_val, y_pred, pos_label=1)\n",
+    "        auc = roc_auc_score(y_val, y_pred)\n",
+    "        f2 = fbeta_score(y_val, y_pred, beta=2, pos_label=1)\n",
+    "\n",
+    "        print(i)\n",
+    "        models.append({\"auc\": auc, \"y_pred\": y_pred})\n",
+    "\n",
+    "        writer.writerow([\n",
+    "                combo[\"gamma\"], combo[\"nu\"], combo[\"tol\"], combo[\"eta0\"],\n",
+    "                precision, recall_normal, recall_anomaly, f1, accuracy, auc, f2\n",
+    "        ])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "5b454121",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "   Anomalous       0.22      0.29      0.25     14034\n",
+      "      Normal       0.79      0.72      0.75     50702\n",
+      "\n",
+      "    accuracy                           0.62     64736\n",
+      "   macro avg       0.50      0.50      0.50     64736\n",
+      "weighted avg       0.66      0.62      0.64     64736\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "highest_auc_item = max(models, key=lambda x: x[\"auc\"])\n",
+    "print(classification_report(y_val, y_pred, target_names=[\"Anomalous\", \"Normal\"]))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}