Machine_Learning_Models/Linear_Regression.py at main · JacobGH2/Machine_Learning_Models · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# -*- coding: utf-8 -*-
"""Assignment1

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1XfK4PZ4pvohS_qKhnzTCWi_svGCDfMMr
"""

import numpy as np
import matplotlib.pyplot as plt
import statistics

prices = [8730,8781,9449,10224,10575,11070,11485,11845,11580,11960,12565,13645,
          14575,14610,14450,13970,14490,16395,17820,18160,24000,24110,24820,
          25980,27400,32720]
prices_max = [14840,16535,18328,19571,20295,20325,19695,19435,19785,24335,25010,
              25450,26015,26795,26670,24425,24350,25805,25800,26070,38565,38675,
              39035,39730,40945,55620]
years = [1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,
         2006,2007,2008,2009,2010,2011,2019,2020,2021,2022,2023,2024]
years_train = [1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,
               2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,
               2020,2021,2022,2023,2024,2025]

# model function
def f_theta(input_x):
  global theta_0
  global theta_1
  return theta_0 + input_x*theta_1

# cost function
def cost(theta_0, theta_1, inputs, outputs):
    error_sum = 0
    for idx, x in enumerate(inputs):
        error_sum += (f_theta(inputs[idx])-outputs[idx])**2
    return error_sum/(2*len(inputs))

# partial derivative calculations
def theta_0_partial(inputs, outputs):
    error_sum = 0
    for idx, x in enumerate(inputs):
      error_sum += (f_theta(inputs[idx])-outputs[idx])
    return error_sum/len(inputs)
def theta_1_partial(inputs, outputs):
    error_sum = 0
    for idx, x in enumerate(inputs):
      error_sum += (f_theta(inputs[idx])-outputs[idx]) * inputs[idx]
    return error_sum/len(inputs)

# update rule
def update_weights(inputs, outputs):
    global theta_0
    #print("Theta_0 partial: ", theta_0_partial())
    theta_0 = theta_0 - l_rate*theta_0_partial(inputs, outputs)
    global theta_1
    #print("Theta_1 partial: ", theta_1_partial())
    theta_1 = theta_1 - l_rate*theta_1_partial(inputs, outputs)

# plot curves and report final weights/predictions
def report(loss_in, theta_0_in, theta_1_in, prices_pd_in, prices_in):
    plt.figure()
    plt.title("Loss Curve")
    plt.plot(loss_in)
    print("Final weights: theta_0: ",theta_0_in, " theta_1: ", theta_1_in)
    print("Prediction: 2012-$", prices_pd_in[20]," 2013-$", prices_pd_in[21]," 2014-$", prices_pd_in[22]," 2015-$", prices_pd_in[23])
    print(" 2016-$", prices_pd_in[24]," 2017-$", prices_pd_in[25]," 2018-$", prices_pd_in[26], " 2025-$", prices_pd_in[33])
    plt.figure()
    plt.title("Year/Price Curve with Prediction Line")
    plt.scatter(years, prices_in)
    plt.plot(years_train, prices_pd_in)

# no feature scaling/dynamic learning rate
l_rate = .0000001
theta_0 = 8700
theta_1 = 0

# minimum price

# training
loss = []
ind = 0
while ind < 100:
  loss.append(cost(theta_0,theta_1,years,prices))
  update_weights(years, prices)
  ind += 1

# generating predictions
prices_pd = []
for x in years_train:
    prices_pd.append(f_theta(x))

print("No FS/DLR: Minimum Price")
report(loss, theta_0, theta_1, prices_pd, prices)

# maximum price

l_rate = .0000001
theta_0 = 1000
theta_1 = 1000

# training
loss = []
ind = 0
while ind < 100:
  loss.append(cost(theta_0,theta_1,years,prices_max))
  update_weights(years, prices_max)
  ind += 1

# generating predictions
prices_pd = []
for x in years_train:
    prices_pd.append(f_theta(x))

print("No FS/DLR: Maximum Price")
report(loss, theta_0, theta_1, prices_pd, prices_max)

# with feature scaling
theta_0 = .5
theta_1 = .5
l_rate = .1

# normalizing years
years_sc = []
years_mean = statistics.mean(years)
years_stdev = statistics.stdev(years)
for x in years:
    years_sc.append((x-years_mean)/years_stdev)

# normalizing minimum prices
prices_sc = []
for x in prices:
    prices_sc.append((x-statistics.mean(prices))/statistics.stdev(prices))

# training
loss2 = []
c = 500
it = 0
while it < 100:
    loss2.append(cost(theta_0, theta_1, years_sc, prices_sc))
    update_weights(years_sc, prices_sc)
    l_rate = (l_rate*c)/(c+it) # implementation of dynamic learning rate
    it += 1

# generating predictions
# f_theta(normalized_year) = x, x is normalized price prediction
# prices_pd[x] = stdev(prices)*prices_sc[x] + mean(prices)
prices_pd = []
for x in years_train:
    price_pd_norm = f_theta((x-years_mean)/years_stdev)
    prices_pd.append(statistics.stdev(prices)*price_pd_norm + statistics.mean(prices))

print("With FS/DLR: Minimum Price")
report(loss2, theta_0, theta_1, prices_pd, prices)

# maximum price
l_rate = .1
theta_0 = .5
theta_1 = .5

prices_m_sc = []
for x in prices_max:
    prices_m_sc.append((x-statistics.mean(prices_max))/statistics.stdev(prices_max))

# training
loss3 = []
c = 500
it = 0
while it < 100:
    loss3.append(cost(theta_0, theta_1, years_sc, prices_m_sc))
    update_weights(years_sc, prices_m_sc)
    l_rate = (l_rate*c)/(c+it) #implementation of dynamic learning rate
    it += 1

# generating predictions
# f_theta(normalized_year) = x, x is normalized price prediction
# prices_pd[x] = stdev(prices)*prices_sc[x] + mean(prices)
prices_m_pd = []
for x in years_train:
    price_pd_norm = f_theta((x-years_mean)/years_stdev)
    prices_m_pd.append(statistics.stdev(prices_max)*price_pd_norm + statistics.mean(prices_max))

print("With FS/DLR: Maximum Price")
report(loss3, theta_0, theta_1, prices_m_pd, prices_max)

"""Summary

My model has found a predicted price range of \$27711.87-\$43479.08 for the 2025 Ranger. The final weight values have been stated above for all four models. I believe that my last two models (with FS/DLR) accurately model the data, as evidenced by their year-price plots and my model's prediction line. The models with FS/DLR produced much more accurate results than the models without FS/DLR. My model could be improved by selecting different starting weights, a different normalization method, or a different value of the constant c for DLR. I would not buy a Ford Ranger at the predicted price range.
"""