|
| 1 | +from decouple import config |
| 2 | +from sklearn.model_selection import train_test_split |
| 3 | +import pandas as pd |
| 4 | +from services import Search |
| 5 | +from sklearn.linear_model import LogisticRegression |
| 6 | +from sklearn.metrics import ( |
| 7 | + accuracy_score, average_precision_score, f1_score, |
| 8 | + precision_score, recall_score |
| 9 | +) |
| 10 | +from sklearn.preprocessing import LabelEncoder |
| 11 | + |
| 12 | +estimator = LogisticRegression( |
| 13 | + penalty = None, solver = "newton-cg", max_iter = 250, multi_class = "ovr" |
| 14 | +) |
| 15 | + |
| 16 | +encoder = LabelEncoder() |
| 17 | + |
| 18 | +def get_predictions(tasks, data_model): |
| 19 | + df = pd.DataFrame(tasks).drop(columns=["id"]) |
| 20 | + resp = "species" |
| 21 | + X = df.drop(columns=resp) |
| 22 | + y = encoder.fit_transform(df[resp]) |
| 23 | + X_train, X_test, y_train, y_test = train_test_split( |
| 24 | + X, y, test_size=0.2 |
| 25 | + ) |
| 26 | + |
| 27 | + estimator.fit(X_train, y_train) |
| 28 | + preds = estimator.predict(X_test).reshape(-1, 1) |
| 29 | + document = { |
| 30 | + "accuracy": accuracy_score(y_test, preds), |
| 31 | + "average_precision": average_precision_score(y_test, preds), |
| 32 | + "f1": f1_score(y_test, preds, average="weighted"), |
| 33 | + "precision": precision_score(y_test, preds, average="weighted"), |
| 34 | + "recall": recall_score(y_test, preds, average="weighted"), |
| 35 | + } |
| 36 | + |
| 37 | + search_host = config("ELASTICSEARCH_HOST") |
| 38 | + search = Search(search_host) |
| 39 | + search.index(index=search.indx, document=document) |
| 40 | + search.close() |
| 41 | + |
| 42 | + preds = estimator.predict(X).reshape(-1, 1) |
| 43 | + pred_tasks = [] |
| 44 | + for i, task_item in enumerate(tasks): |
| 45 | + task_item[resp] = preds[i] |
| 46 | + pred_task = data_model(**task_item) |
| 47 | + pred_tasks.append(pred_task) |
| 48 | + return pred_tasks |
0 commit comments