Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DIRECTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@
* [Geometry](geometry/geometry.py)
* [Graham Scan](geometry/graham_scan.py)
* [Jarvis March](geometry/jarvis_march.py)
* [Segment Intersection](geometry/segment_intersection.py)
* Tests
Comment thread
poyea marked this conversation as resolved.
* [Test Graham Scan](geometry/tests/test_graham_scan.py)
* [Test Jarvis March](geometry/tests/test_jarvis_march.py)
Expand Down
44 changes: 44 additions & 0 deletions machine_learning/forecasting/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@

from warnings import simplefilter

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVR
from statsmodels.tsa.statespace.sarimax import SARIMAX
Expand Down Expand Up @@ -78,6 +80,29 @@ def support_vector_regressor(x_train: list, x_test: list, train_user: list) -> f
return float(y_pred[0])


def random_forest_regressor(x_train: list, x_test: list, train_user: list) -> float:
"""
Fourth method: Random Forest Regressor
Random Forest is an ensemble learning method for regression that operates
by constructing a multitude of decision trees at training time and outputting
the mean prediction of the individual trees.

It is more robust than a single decision tree and less prone to overfitting.
Good for capturing nonlinear relationships in data.

input : training data (date, total_event) in list of float
where x = list of set (date and total event)
output : list of total user prediction in float

>>> random_forest_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4])
1.95
"""
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(x_train, train_user)
prediction = model.predict(x_test)
return float(prediction[0])
Comment thread
poyea marked this conversation as resolved.


def interquartile_range_checker(train_user: list) -> float:
"""
Optional method: interquatile range
Expand Down Expand Up @@ -120,6 +145,22 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
return safe > not_safe


def plot_forecast(actual, predictions):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Type hints required

plt.figure(figsize=(10, 5))
plt.plot(range(len(actual)), actual, label="Actual")
plt.plot(len(actual), predictions[0], "ro", label="Linear Reg")
plt.plot(len(actual), predictions[1], "go", label="SARIMAX")
Comment thread
poyea marked this conversation as resolved.
plt.plot(len(actual), predictions[2], "bo", label="SVR")
plt.plot(len(actual), predictions[3], "yo", label="RF")
plt.legend()
plt.title("Data Safety Forecast")
plt.xlabel("Days")
plt.ylabel("Normalized User Count")
plt.grid(True)
plt.tight_layout()
plt.show()


if __name__ == "__main__":
"""
data column = total user in a day, how much online event held in one day,
Expand Down Expand Up @@ -155,8 +196,11 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
),
sarimax_predictor(train_user, train_match, test_match),
support_vector_regressor(x_train, x_test, train_user),
random_forest_regressor(x_train, x_test, train_user),
]

# check the safety of today's data
not_str = "" if data_safety_checker(res_vote, test_user[0]) else "not "
print(f"Today's data is {not_str}safe.")

plot_forecast(train_user, res_vote)