-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMachineLearning.py
109 lines (88 loc) · 3.75 KB
/
MachineLearning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import cross_validate
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
class ModelRegressionEvaluate:
"""
A class that takes a list of regression models and evaluates their performance
using cross-validation.
Parameters
----------
models : list
A list of regression models to evaluate.
Methods
-------
fit(X_train, y_train)
Fits the regression models on the training data using cross-validation and
stores the evaluation results in the 'results' attribute.
get_results()
Returns the evaluation results as a pandas DataFrame.
"""
def __init__(self, models):
self.models = models
def fit(self, X_train, y_train):
self.results = []
for model in self.models:
start = time.time()
scores = cross_validate(model, X_train, y_train, cv=5,
scoring=['neg_mean_absolute_error', 'neg_mean_squared_error', 'r2'],
return_train_score=False)
end = time.time()
results_dict = {}
results_dict['model'] = model.__class__.__name__
results_dict['mean_mae'] = -np.mean(scores['test_neg_mean_absolute_error'])
results_dict['mean_rmse'] = np.sqrt(-np.mean(scores['test_neg_mean_squared_error']))
results_dict['mean_r2'] = np.mean(scores['test_r2'])
results_dict['time'] = end - start
self.results.append(results_dict)
def get_results(self):
return pd.DataFrame(self.results)
class RegressionModelPlot:
"""A class for creating a set of plots to visualize the performance of a regression model.
Parameters
----------
y_test : pandas.DataFrame
The actual target values for the test set.
y_pred : array-like
The predicted target values for the test set.
color : str, optional
The color to use for the plot markers and histograms.
Methods
-------
plot()
Creates a set of three plots to visualize the performance of the regression model.
"""
def __init__(self, y_test, y_pred, color='b'):
self.y_test = y_test
self.y_pred = y_pred
self.color = color
def plot(self):
"""Creates a set of three plots to visualize the performance of the regression model.
The three plots are: a scatter plot with regression line, a histogram of errors, and a residual plot.
"""
# Create subplots
fig, axs = plt.subplots(ncols=3, figsize=(15,5))
# Plot scatter plot with regression line
axs[0].scatter(self.y_test[self.y_test.columns[0]], self.y_pred, color=self.color)
axs[0].plot([self.y_test.min(), self.y_test.max()], [self.y_test.min(), self.y_test.max()], 'k--', lw=3, color='k')
axs[0].set_xlabel('Actual Values')
axs[0].set_ylabel('Predicted Values')
axs[0].set_title('Scatter Plot with Regression Line')
# Plot histogram of errors
errors = self.y_test[self.y_test.columns[0]] - self.y_pred
axs[1].hist(errors, bins=50, color=self.color)
axs[1].axvline(x=errors.median(), color='k', linestyle='--', lw=3)
axs[1].set_xlabel('Errors')
axs[1].set_ylabel('Frequency')
axs[1].set_title('Histogram of Errors')
# Plot residual plot
axs[2].scatter(self.y_pred, errors, color=self.color)
axs[2].axhline(y=0, color='k', linestyle='-', lw=3)
axs[2].set_xlabel('Predicted Values')
axs[2].set_ylabel('Errors')
axs[2].set_title('Residual Plot')
# Show the plots
plt.tight_layout()
plt.show()